diff options
author | Ryan Bloom <rbb@apache.org> | 2000-08-12 17:22:34 +0000 |
---|---|---|
committer | Ryan Bloom <rbb@apache.org> | 2000-08-12 17:22:34 +0000 |
commit | e3d662d9887a4e92fd8aeed999c3d94e5f92de68 (patch) | |
tree | 24664ccae2516b5258ea8ff630bd3a13e602f2bc /buckets | |
parent | 51b903e161dfa172ab0cf2a84ecceb28016653f7 (diff) | |
download | apr-e3d662d9887a4e92fd8aeed999c3d94e5f92de68.tar.gz |
Remove all files from the buckets directory. This is in preparation
for committing a patch that actually implements filtering.
git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@60498 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'buckets')
-rw-r--r-- | buckets/README.txt | 44 | ||||
-rw-r--r-- | buckets/ap_buf.c | 292 | ||||
-rw-r--r-- | buckets/ap_eos_buf.c | 88 | ||||
-rw-r--r-- | buckets/ap_mmap_buf.c | 131 | ||||
-rw-r--r-- | buckets/ap_rmem_buf.c | 151 | ||||
-rw-r--r-- | buckets/ap_rwmem_buf.c | 171 | ||||
-rw-r--r-- | buckets/apr_buf.h | 397 | ||||
-rw-r--r-- | buckets/doc_SFmtg.txt | 172 | ||||
-rw-r--r-- | buckets/doc_bucket_brigades.txt | 381 | ||||
-rw-r--r-- | buckets/doc_dean_iol.txt | 496 | ||||
-rw-r--r-- | buckets/doc_greg_filters.txt | 102 | ||||
-rw-r--r-- | buckets/doc_page_io.txt | 166 | ||||
-rw-r--r-- | buckets/doc_stacked_io.txt | 1312 | ||||
-rw-r--r-- | buckets/doc_wishes.txt | 269 | ||||
-rw-r--r-- | buckets/greg_patch.txt | 631 | ||||
-rw-r--r-- | buckets/ryan.patch | 651 |
16 files changed, 0 insertions, 5454 deletions
diff --git a/buckets/README.txt b/buckets/README.txt deleted file mode 100644 index 393798775..000000000 --- a/buckets/README.txt +++ /dev/null @@ -1,44 +0,0 @@ - -This directory contains several prototype implementations of -layered IO with filtering. None of these will be distributed -as part of the server until we agree on a solution. - -Design Rationale ----------------- - - doc_SFmtg.txt - -- notes from the 1998 design meeting in SF - - doc_stacked_io.txt - -- Ed and Alexei's vision of stacked-io with layer caching - - doc_page_io.txt - -- Dean's comments on performance considerations - - doc_dean_iol.txt - -- Rationale behind the IOL stuff that is now in APR - - doc_bucket_brigades.txt - -- Roy's ramblings about the bucket brigades design - - doc_wishes.txt - -- Everyone's requirements for layered-IO and filters - - doc_greg_filters.txt - -- Greg's initial filter design rationale - -Bachelor #1 ------------ - - apr_buf.h - ap_buf.c - ap_mmap_buf.c - ap_rwmem_buf.c - ap_rmem_buf.c - ap_eos_buf.c - ryan.patch - -Bachelor #2 ------------ - - greg_patch.txt diff --git a/buckets/ap_buf.c b/buckets/ap_buf.c deleted file mode 100644 index 9b3e2b2cd..000000000 --- a/buckets/ap_buf.c +++ /dev/null @@ -1,292 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1996-1999 The Apache Group. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * 4. The names "Apache Server" and "Apache Group" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Group and was originally based - * on public domain software written at the National Center for - * Supercomputing Applications, University of Illinois, Urbana-Champaign. - * For more information on the Apache Group and the Apache HTTP server - * project, please see <http://www.apache.org/>. - * - */ - -#include "apr_private.h" -#include "apr_pools.h" -#include "apr_lib.h" -#include "apr_errno.h" -#include <stdlib.h> -#include <sys/uio.h> -#include "apr_buf.h" - -APR_EXPORT(apr_status_t) ap_bucket_destroy(ap_bucket *e) -{ - if (e->free) { - e->free(e); - } - free(e); - return APR_SUCCESS; -} - -static apr_status_t ap_bucket_list_destroy(ap_bucket *e) -{ - ap_bucket *cur = e; - ap_bucket *next; - - while (cur) { - next = cur->next; - ap_bucket_destroy(cur); - cur = next; - } - return APR_SUCCESS; -} - -APR_EXPORT(apr_status_t) ap_brigade_destroy(void *data) -{ - apr_bucket_brigade *b = data; - - ap_bucket_list_destroy(b->head); - /* The brigade itself is allocated out of a pool, so we don't actually - * want to free it. If we did, we would do that free() here. - */ - - return APR_SUCCESS; -} - -APR_EXPORT(apr_bucket_brigade *) ap_brigade_create(apr_pool_t *p) -{ - apr_bucket_brigade *b; - - b = apr_palloc(p, sizeof(*b)); - b->p = p; - b->head = b->tail = NULL; - - apr_register_cleanup(b->p, b, ap_brigade_destroy, - ap_brigade_destroy); - return b; -} - -APR_EXPORT(void) ap_brigade_append_buckets(apr_bucket_brigade *b, - ap_bucket *e) -{ - ap_bucket *cur = e; - - if (b->tail) { - b->tail->next = e; - e->prev = b->tail; - while (cur->next) { - cur = cur->next; - } - b->tail = cur; - } - else { - b->head = b->tail = e; - } -} - -APR_EXPORT(int) ap_brigade_to_iovec(apr_bucket_brigade *b, - struct iovec *vec, int nvec) -{ - ap_bucket *e; - struct iovec *orig; - - orig = vec; - e = b->head; - while (e && nvec) { - vec->iov_len = ap_get_bucket_len(e); - vec->iov_base = (void *)e->read(e); - e = e->next; - --nvec; - ++vec; - } - return vec - orig; -} - -APR_EXPORT(void) ap_brigade_catenate(apr_bucket_brigade *a, - apr_bucket_brigade *b) -{ - if (b->head) { - if (a->tail) { - a->tail->next = b->head; - } - b->head->prev = a->tail; - a->tail = b->tail; - if (!a->head) { - a->head = b->head; - } - b->head = NULL; - b->tail = b->head; - } -} - -APR_EXPORT(void) ap_consume_buckets(apr_bucket_brigade *b, int nvec) -{ - int i; - - for (i=0; i < nvec; i++) { - if (b->head == b->tail) { - ap_bucket_destroy(b->head); - b->head = b->tail = NULL; - break; - } - b->head = b->head->next; - ap_bucket_destroy(b->head->prev); - b->head->prev = NULL; - } -} - -APR_EXPORT(apr_status_t) ap_brigade_to_iol(apr_ssize_t *total_bytes, - apr_bucket_brigade *b, - ap_iol *iol) -{ - apr_status_t status; - int iov_used; - struct iovec vec[16]; /* seems like a reasonable number to me */ - apr_ssize_t bytes = 0; - - *total_bytes = 0; - do { - iov_used = ap_brigade_to_iovec(b, vec, 16); - status = iol_writev(iol, vec, iov_used, &bytes); - - ap_consume_buckets(b, 16); - - if (status != APR_SUCCESS) { - return status; - } - *total_bytes += bytes; - } while (iov_used == 16); - return APR_SUCCESS; -} - -APR_EXPORT(int) ap_get_bucket_len(ap_bucket *b) -{ - if (b) { - return b->getlen(b); - } - return 0; -} - -APR_EXPORT(int) ap_brigade_vputstrs(apr_bucket_brigade *b, va_list va) -{ - ap_bucket *r; - const char *x; - int j, k, rv; - apr_ssize_t i; - - if (b->tail && b->tail->color == AP_BUCKET_rwmem) { - ap_bucket *rw; - rw = b->tail; - /* I have no idea if this is a good idea or not. Probably not. - * Basically, if the last bucket in the list is a rwmem bucket, - * then we just add to it instead of allocating a new read only - * bucket. This is incredibly easy to take out if it is a bad - * idea. RBB - */ - for (k = 0;;) { - x = va_arg(va, const char *); - if (x == NULL) - break; - j = strlen(x); - - rv = rw->write(rw, x, j, &i); - if (i != j) { - /* Do we need better error reporting? */ - return -1; - } - k += i; - - ap_brigade_append_buckets(b, rw); - } - } - - for (k = 0;;) { - x = va_arg(va, const char *); - if (x == NULL) - break; - j = strlen(x); - - r = ap_bucket_rwmem_create(x, j, &i); - if (i != j) { - /* Do we need better error reporting? */ - return -1; - } - k += i; - - ap_brigade_append_buckets(b, r); - } - - return k; -} - -APR_EXPORT(int) ap_brigade_printf(apr_bucket_brigade *b, const char *fmt, ...) -{ - va_list ap; - int res; - - va_start(ap, fmt); - res = ap_brigade_vprintf(b, fmt, ap); - va_end(ap); - return res; -} - -APR_EXPORT(int) ap_brigade_vprintf(apr_bucket_brigade *b, const char *fmt, va_list va) -{ - /* THIS IS A HACK. This needs to be replaced with a function to printf - * directly into a bucket. I'm being lazy right now. RBB - */ - char buf[4096]; - ap_bucket *r; - int res, i; - - res = apr_vsnprintf(buf, 4096, fmt, va); - - r = ap_bucket_rwmem_create(buf, strlen(buf), &i); - ap_brigade_append_buckets(b, r); - - return res; -} diff --git a/buckets/ap_eos_buf.c b/buckets/ap_eos_buf.c deleted file mode 100644 index 44a4e6551..000000000 --- a/buckets/ap_eos_buf.c +++ /dev/null @@ -1,88 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1996-1999 The Apache Group. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * 4. The names "Apache Server" and "Apache Group" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Group and was originally based - * on public domain software written at the National Center for - * Supercomputing Applications, University of Illinois, Urbana-Champaign. - * For more information on the Apache Group and the Apache HTTP server - * project, please see <http://www.apache.org/>. - * - */ - -#include "apr_private.h" -#include "apr_buf.h" -#include <stdlib.h> - -static const char * eos_get_str(ap_bucket *e) -{ - return NULL; -} - -static int eos_get_len(ap_bucket *e) -{ - return 0; -} - -APR_EXPORT(ap_bucket *) ap_bucket_eos_create(void) -{ - ap_bucket *newbuf; - - newbuf = calloc(1, sizeof(*newbuf)); - - newbuf->color = AP_BUCKET_eos; - newbuf->read = eos_get_str; - newbuf->getlen = eos_get_len; - newbuf->write = NULL; - newbuf->split = NULL; - newbuf->free = NULL; - newbuf->data = NULL; - - return newbuf; -} - diff --git a/buckets/ap_mmap_buf.c b/buckets/ap_mmap_buf.c deleted file mode 100644 index c98a49881..000000000 --- a/buckets/ap_mmap_buf.c +++ /dev/null @@ -1,131 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1996-1999 The Apache Group. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * 4. The names "Apache Server" and "Apache Group" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Group and was originally based - * on public domain software written at the National Center for - * Supercomputing Applications, University of Illinois, Urbana-Champaign. - * For more information on the Apache Group and the Apache HTTP server - * project, please see <http://www.apache.org/>. - * - */ - -#include "apr_private.h" -#include "apr_buf.h" -#include <stdlib.h> - -static const char * mmap_get_str(ap_bucket *e) -{ - apr_bucket_mmap *b = (apr_bucket_mmap *)e->data; - return b->alloc_addr; -} - -static int mmap_get_len(ap_bucket *e) -{ - apr_bucket_mmap *b = (apr_bucket_mmap *)e->data; - return b->len; -} - -static apr_status_t mmap_bucket_insert(ap_bucket *e, const void *buf, - apr_size_t nbytes, apr_ssize_t *w) -{ - apr_bucket_mmap *b = (apr_bucket_mmap *)e->data; - apr_mmap_t *mm = (apr_mmap_t *)buf; - - b->alloc_addr = mm->mm; - b->len = nbytes; - *w = nbytes; - return APR_SUCCESS; -} - -static apr_status_t mmap_split(ap_bucket *e, apr_size_t nbyte) -{ - ap_bucket *newbuck; - apr_bucket_mmap *a = (apr_bucket_mmap *)e->data; - apr_bucket_mmap *b; - apr_ssize_t dump; - - newbuck = ap_bucket_mmap_create(a->alloc_addr, a->len, &dump); - b = (apr_bucket_mmap *)newbuck->data; - a->alloc_addr = a->alloc_addr + nbyte; - a->len = b->len - nbyte; - - a->len = nbyte; - - newbuck->prev = e; - newbuck->next = e->next; - e->next = newbuck; - - return APR_SUCCESS; -} - -APR_EXPORT(ap_bucket *) ap_bucket_mmap_create(const void *buf, - apr_size_t nbytes, apr_ssize_t *w) -{ - ap_bucket *newbuf; - apr_bucket_mmap *b; - - newbuf = calloc(1, sizeof(*newbuf)); - b = malloc(sizeof(*b)); - - b->alloc_addr = NULL; - b->len = 0; - - newbuf->data = b; - mmap_bucket_insert(newbuf, buf, nbytes, w); - - newbuf->color = AP_BUCKET_mmap; - newbuf->read = mmap_get_str; - newbuf->getlen = mmap_get_len; - newbuf->write = mmap_bucket_insert; - newbuf->split = mmap_split; - newbuf->free = NULL; - - return newbuf; -} - diff --git a/buckets/ap_rmem_buf.c b/buckets/ap_rmem_buf.c deleted file mode 100644 index 30886063f..000000000 --- a/buckets/ap_rmem_buf.c +++ /dev/null @@ -1,151 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1996-1999 The Apache Group. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * 4. The names "Apache Server" and "Apache Group" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Group and was originally based - * on public domain software written at the National Center for - * Supercomputing Applications, University of Illinois, Urbana-Champaign. - * For more information on the Apache Group and the Apache HTTP server - * project, please see <http://www.apache.org/>. - * - */ - -#include "apr_private.h" -#include "apr_buf.h" -#include <stdlib.h> - -#ifndef DEFAULT_RWBUF_SIZE -#define DEFAULT_RWBUF_SIZE (4096) -#endif - -static const char * rmem_get_str(ap_bucket *e) -{ - apr_bucket_rmem *b = (apr_bucket_rmem *)e->data; - return b->start; -} - -static int rmem_get_len(ap_bucket *e) -{ - apr_bucket_rmem *b = (apr_bucket_rmem *)e->data; - return (char *)b->end - (char *)b->start; -} - -static apr_status_t rmem_split(ap_bucket *e, apr_size_t nbyte) -{ - ap_bucket *newbuck; - apr_bucket_rmem *a = (apr_bucket_rmem *)e->data; - apr_bucket_rmem *b; - apr_ssize_t dump; - - newbuck = ap_bucket_rmem_create(a->start, a->alloc_len, &dump); - b = (apr_bucket_rmem *)newbuck->data; - - b->alloc_len = a->alloc_len - nbyte; - a->alloc_len = nbyte; - b->end = a->end; - a->end = a->start + nbyte; - b->start = a->end + 1; - - newbuck->prev = e; - newbuck->next = e->next; - e->next = newbuck; - - - return APR_SUCCESS; -} - -/* - * save nbyte bytes to the bucket. - * Only returns fewer than nbyte if an error ocurred. - * Returns -1 if no bytes were written before the error ocurred. - * It is worth noting that if an error occurs, the buffer is in an unknown - * state. - */ -static apr_status_t rmem_insert(ap_bucket *e, const void *buf, - apr_size_t nbyte, apr_ssize_t *w) -{ - apr_bucket_rmem *b = (apr_bucket_rmem *)e->data; - - if (nbyte == 0) { - *w = 0; - return APR_SUCCESS; - } - - /* We should probably do some checking to make sure we don't allocate too - * much memory, but that can wait for the second pass. - */ - b->start = buf; - b->end = (char *)b->start + nbyte; - *w = nbyte; - return APR_SUCCESS; -} - -APR_EXPORT(ap_bucket *) ap_bucket_rmem_create(const void *buf, - apr_size_t nbyte, apr_ssize_t *w) -{ - ap_bucket *newbuf; - apr_bucket_rmem *b; - - newbuf = calloc(1, sizeof(*newbuf)); - b = malloc(sizeof(*b)); - - b->alloc_len = 0; - b->start = b->end = NULL; - - newbuf->data = b; - rmem_insert(newbuf, buf, nbyte, w); - - newbuf->color = AP_BUCKET_rmem; - newbuf->read = rmem_get_str; - newbuf->getlen = rmem_get_len; - newbuf->write = rmem_insert; - newbuf->split = rmem_split; - newbuf->free = NULL; - return newbuf; -} - diff --git a/buckets/ap_rwmem_buf.c b/buckets/ap_rwmem_buf.c deleted file mode 100644 index 4ee983776..000000000 --- a/buckets/ap_rwmem_buf.c +++ /dev/null @@ -1,171 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1996-1999 The Apache Group. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * 4. The names "Apache Server" and "Apache Group" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the Apache Group - * for use in the Apache HTTP server project (http://www.apache.org/)." - * - * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Group and was originally based - * on public domain software written at the National Center for - * Supercomputing Applications, University of Illinois, Urbana-Champaign. - * For more information on the Apache Group and the Apache HTTP server - * project, please see <http://www.apache.org/>. - * - */ - -#include "apr_private.h" -#include "apr_buf.h" -#include <stdlib.h> - -#ifndef DEFAULT_RWBUF_SIZE -#define DEFAULT_RWBUF_SIZE (4096) -#endif - -static const char * rwmem_get_str(ap_bucket *e) -{ - apr_bucket_rwmem *b = (apr_bucket_rwmem *)e->data; - return b->start; -} - -static int rwmem_get_len(ap_bucket *e) -{ - apr_bucket_rwmem *b = (apr_bucket_rwmem *)e->data; - return (char *)b->end - (char *)b->start; -} - -static void rwmem_destroy(void *e) -{ - apr_bucket_rwmem *d = (apr_bucket_rwmem *)e; - free(d->alloc_addr); -} - -static apr_status_t rwmem_split(ap_bucket *e, apr_size_t nbyte) -{ - ap_bucket *newbuck; - apr_bucket_rwmem *a = (apr_bucket_rwmem *)e; - apr_bucket_rwmem *b; - apr_ssize_t dump; - - newbuck = ap_bucket_rwmem_create(a->alloc_addr, a->alloc_len, &dump); - b = (apr_bucket_rwmem *)newbuck->data; - - b->alloc_addr = a->alloc_addr; - b->alloc_len = a->alloc_len; - b->end = a->end; - a->end = a->start + nbyte; - b->start = a->end + 1; - - newbuck->prev = e; - newbuck->next = e->next; - e->next = newbuck; - - return APR_SUCCESS; -} - -/* - * save nbyte bytes to the bucket. - * Only returns fewer than nbyte if an error occurred. - * Returns -1 if no bytes were written before the error occurred. - * It is worth noting that if an error occurs, the buffer is in an unknown - * state. - */ -static apr_status_t rwmem_insert(ap_bucket *e, const void *buf, - apr_size_t nbyte, apr_ssize_t *w) -{ - int amt; - int total; - apr_bucket_rwmem *b = (apr_bucket_rwmem *)e->data; - - if (nbyte == 0) { - *w = 0; - return APR_SUCCESS; - } - -/* - * At this point, we need to make sure we aren't trying to write too much - * data to the bucket. We will need to write to the dist here, but I am - * leaving that for a later pass. The basics are presented below, but this - * is horribly broken. - */ - amt = b->alloc_len - ((char *)b->end - (char *)b->start); - total = 0; - if (nbyte > amt) { - /* loop through and write to the disk */ - /* Replace the rwmem buckets with file buckets */ - } - /* now we know that nbyte < b->alloc_len */ - memcpy(b->end, buf, nbyte); - b->end = (char *)b->end + nbyte; - *w = total + nbyte; - return APR_SUCCESS; -} - -APR_EXPORT(ap_bucket *) ap_bucket_rwmem_create(const void *buf, - apr_size_t nbyte, apr_ssize_t *w) -{ - ap_bucket *newbuf; - apr_bucket_rwmem *b; - - newbuf = calloc(1, sizeof(*newbuf)); - b = malloc(sizeof(*b)); - - b->alloc_addr = calloc(DEFAULT_RWBUF_SIZE, 1); - b->alloc_len = DEFAULT_RWBUF_SIZE; - b->start = b->alloc_addr; - b->end = b->alloc_addr; - - newbuf->data = b; - rwmem_insert(newbuf, buf, nbyte, w); - - newbuf->color = AP_BUCKET_rwmem; - newbuf->read = rwmem_get_str; - newbuf->getlen = rwmem_get_len; - newbuf->write = rwmem_insert; - newbuf->split = rwmem_split; - newbuf->free = rwmem_destroy; - - return newbuf; -} - diff --git a/buckets/apr_buf.h b/buckets/apr_buf.h deleted file mode 100644 index 13d1c973f..000000000 --- a/buckets/apr_buf.h +++ /dev/null @@ -1,397 +0,0 @@ -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2000 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" must - * not be used to endorse or promote products derived from this - * software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * nor may "Apache" appear in their name, without prior written - * permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * <http://www.apache.org/>. - */ - -#ifndef AP_BUF_H -#define AP_BUF_H - -#include "apr_mmap.h" -#include "apr_errno.h" -#include "apr_private.h" -/* Currently we need this, but when the filtering is done, the iol's should - * just go away all together, and so will this. :-) */ -#include "../../../include/ap_iol.h" -#ifdef HAVE_SYS_UIO_H -#include <sys/uio.h> /* for struct iovec */ -#endif -#ifdef HAVE_STDARG_H -#include <stdarg.h> -#endif - - -/* The basic concept behind bucket_brigades..... - * - * A bucket brigade is simply a Queue of buckets, where we aren't limited - * to inserting at the front and removing at the end. - * - * Buckets are just data stores. They can be files, mmap areas, or just - * pre-allocated memory. The point of buckets is to store data. Along with - * that data, come some functions to access it. The functions are relatively - * simple, read, write, getlen, split, and free. - * - * read reads a string of data. Currently, it assumes we read all of the - * data in the bucket. This should be changed to only read the specified - * amount. - * - * getlen gets the number of bytes stored in the bucket. - * - * write writes the specified data to the bucket. Depending on the type of - * bucket, this may append to the end of previous data, or wipe out the data - * currently in the bucket. rwmem buckets append currently, all others - * erase the current bucket. - * - * split just makes one bucket into two at the spefied location. To implement - * this correctly, we really need to implement reference counting. - * - * free just destroys the data associated with the bucket. - * - * We may add more functions later. There has been talk of needing a stat, - * which would probably replace the getlen. And, we definately need a convert - * function. Convert would make one bucket type into another bucket type. - * - * To write a bucket brigade, they are first made into an iovec, so that we - * don't write too little data at one time. Currently we ignore compacting the - * buckets into as few buckets as possible, but if we really want to be - * performant, then we need to compact the buckets before we convert to an - * iovec, or possibly while we are converting to an iovec. - * - * I'm not really sure what else to say about the buckets. They are relatively - * simple and straight forward IMO. It is just a way to organize data in - * memory that allows us to modify that data and move it around quickly and - * easily. - */ - -/* The types of bucket brigades the code knows about. We really don't need - * this enum. All access to the bucket brigades can be done through function - * pointers in the bucket type. However, when we start to do conversion - * routines, this enum will be a huge performance benefit, so we leave it - * alone. As of this moment, only rwmem, rmem, mmap, and eos buckets have - * been implemented. The rest will wait until the filtering design is - * decided upon, or until somebody gets around to them. - */ -typedef enum { - AP_BUCKET_rwmem, - AP_BUCKET_rmem, - AP_BUCKET_file, - AP_BUCKET_mmap, - AP_BUCKET_filename, - AP_BUCKET_cached_entity, - AP_BUCKET_URI, - AP_BUCKET_eos /* End-of-stream bucket. Special case to say this is - * the end of the bucket so all data should be sent - * immediately. */ -} ap_bucket_color_e; - -typedef struct ap_bucket ap_bucket; -/* - * The basic bucket type. This is an abstraction on top of all other bucket - * types. This contains the type of bucket, a pointer to the bucket, and - * a couple of function pointers. Doing it this way, lets us morph buckets - * from one type to another relatively easily. Just change the data pointer - * to point to the new bucket, and replace all of the function pointers. - * - * This also allows for a very simple interface for all features of buckets - * for all bucket types. (does that make any sense at all?) - * - * The current functions are: - * getlen -- get the length of the data in the bucket - * (likely to be replaced soon) - * read -- read the data in the bucket (not garaunteed to read it all) - * write -- insert data into the bucket - * split -- split one bucket into two buckets - * free -- destroy the bucket, freeing it's memory - * - * funtions to be added: - * stat -- get all of the metadata about the bucket (lifetime, type, etc.) - * convert -- change one bucket type into another bucket type. - * - * There are also pointer to the next and previus buckets in the list. - */ -struct ap_bucket { - ap_bucket_color_e color; /* what type of bucket is it */ - void *data; /* for use by free() */ - - /* All of the function pointers that can act on a bucket. */ - void (*free)(void *e); /* can be NULL */ - int (*getlen)(ap_bucket *e); /* Get the length of the string */ - - /* Read the data from the bucket. */ - const char *(*read)(ap_bucket *e); /* Get the string */ - - /* Write into a bucket. The buf is a different type based on the - * bucket type used. For example, with AP_BUCKET_mmap it is an ap_mmap_t - * for AP_BUCKET_file it is an ap_file_t, and for AP_BUCKET_rwmem it is - * a char *. The nbytes is the amount of actual data in buf. This is - * not the sizeof(buf), it is the actual number of bytes in the char * - * that buf resolves to. written is how much of that data was inserted - * into the bucket. - */ - int (*write)(ap_bucket *e, const void *buf, apr_size_t nbytes, apr_ssize_t *w); - - /* Split one bucket into to at the specified position */ - apr_status_t (*split)(ap_bucket *e, apr_size_t nbytes); - - ap_bucket *next; /* The next node in the bucket list */ - ap_bucket *prev; /* The prev node in the bucket list */ -}; - -typedef struct ap_bucket_brigade ap_bucket_brigade; -/* - * This is the basic bucket brigade. That means it is a list of buckets. - * It has a pool out of which the buckets and the bucket brigade are allocated. - * That may change though, because I am leaning towards make the buckets have - * the same lifetime as the data they store in most cases. It also has a - * pointer to the head and tail of the bucket list. This allows us to - * easily remove data from the bucket list, and to easily append data at - * the end. By walking the list, it is also possible to insert in the middle - * of the list. - */ -struct ap_bucket_brigade { - apr_pool_t *p; /* The pool to associate this with. - I do not allocate out of the pool, - but this lets me register a cleanup - to put a limit on the brigade's - lifetime. */ - ap_bucket *head; /* The start of the brigade */ - ap_bucket *tail; /* The end of the brigade */ -}; - -/* ****** Different bucket types *****/ - -typedef struct ap_bucket_rmem ap_bucket_rmem; -/* - * The Read only bucket type. This is basically for memory allocated off the - * stack or literal strings. It cannot be modified, and the lifetime is - * defined by when it was allocated. Most likely these should be split into - * two different types. This contains a pointer to the front and end of the - * string so that it is possible to remove characters at either end. - */ -struct ap_bucket_rmem { - size_t alloc_len; /* how much was allocated */ - const void *start; /* Where does the actual data start - in the alloc'ed block */ - const void *end; /* where does the data actually end? */ -}; - -typedef struct ap_bucket_rwmem ap_bucket_rwmem; -/* - * The read/write memory bucket type. This is for data that has been - * allocated out of the heap. This bucket actually starts by allocating - * 4K of memory. We do this so that the bucket has room to grow. At the - * bottom of the filter stack, we are likely to have to condense the buckets - * to as few as possible. By allocating a big space at the beginning, we - * don't have to make as many allocations at the bottom. If the top level - * handlers are written correctly, we won't have to do much copying either. - * Of course, for legacy handlers, we will have to condense. - * - * This bucket type has a pointer to the start of the allocation. This will - * never be modified. This is used a a reference for the free call. It also - * has the length of the amount allocated. The length could probably go - * away. - * - * Finally, we have a pointer to the start and end of the string currently - * referenced by the bucket. The end cannot be past the original allocation - * pointer + the allocation length. The start cannot be before the original - * allocation pointer. We keep a pointer to the start and end so that we can - * easily add and remove characters at either end. Oh, the start cannot be - * after the end either. - */ -struct ap_bucket_rwmem { - void *alloc_addr; /* Where does the data start */ - size_t alloc_len; /* how much was allocated */ - void *start; /* Where does the actual data start - in the alloc'ed block */ - void *end; /* where does the data actually end? */ -}; - -typedef struct ap_bucket_mmap ap_bucket_mmap; - -/* - * The mmap bucket type. This is basically just an allocation address and a - * length. This needs to be changed to a pointer to an mmap structure that - * has a reference count in it, and a pointer to the beginning and end of - * the data the bucket is referencing. - */ -struct ap_bucket_mmap { - void *alloc_addr; /* Where does the mmap start? */ - int len; /* The amount of data in the mmap that we are - * referencing with this bucket. This may be - * smaller than the length in the data object, - * but it may not be bigger. - */ -}; - -/* ****** Bucket Brigade Functions ***** */ - -/* Create a new bucket brigade. The bucket brigade is originally empty. */ -APR_EXPORT(ap_bucket_brigade *) ap_brigade_create(apr_pool_t *p); - -/* destroy an enitre bucket brigade. This includes destroying all of the - * buckets within the bucket brigade's bucket list. */ -APR_EXPORT(apr_status_t) ap_brigade_destroy(void *b); - -/* append bucket(s) to a bucket_brigade. This is the correct way to add - * buckets to the end of a bucket briagdes bucket list. This will accept - * a list of buckets of any length. - */ -APR_EXPORT(void) ap_brigade_append_buckets(ap_bucket_brigade *b, - ap_bucket *e); - -/* consume nbytes from beginning of b -- call ap_bucket_destroy as - appropriate, and/or modify start on last element */ -APR_EXPORT(void) ap_brigade_consume(ap_bucket_brigade *, int nbytes); - -/* create an iovec of the elements in a bucket_brigade... return number - * of elements used. This is useful for writing to a file or to the - * network efficiently. - */ -APR_EXPORT(int) ap_brigade_to_iovec(ap_bucket_brigade *, - struct iovec *vec, int nvec); - -/* catenate bucket_brigade b onto bucket_brigade a, bucket_brigade b is - * empty after this. Neither bucket brigade can be NULL, but either one of - * them can be emtpy when calling this function. - */ -APR_EXPORT(void) ap_brigade_catenate(ap_bucket_brigade *a, - ap_bucket_brigade *b); - -/* Destroy the first nvec buckets. This is very much like ap_brigade_consume - * except instead of specifying the number of bytes to consume, it consumes - * a specified number of buckets. The original purpose for this function - * was in ap_brigade_to_iovec. After converting the first 16 buckets to - * vectors, we would destroy those 16 buckets. My gut is that this is the - * wrong approach. I plan to change this soon-ish. - */ -APR_EXPORT(void) ap_consume_buckets(ap_bucket_brigade *b, int nvec); - -/* save the buf out to the specified iol. This can be used to flush the - * data to the disk, or to send it out to the network. This is a poor - * function. It never should have been implemented. Unfortunately, it is - * also required. Once filters have been finished, the whole concept of - * iol's can just go away, and this function can go away with it. The - * correct solution, is to have the functions that are currently calling - * this just call either ap_sendv or ap_writev directly. - */ -APR_EXPORT(apr_status_t) ap_brigade_to_iol(apr_ssize_t *total_bytes, - ap_bucket_brigade *a, - ap_iol *iol); - -/* - * This function writes a bunch of strings into a bucket brigade. How this - * works is a bit strange. If there is already a rwmem bucket at the end of - * the list, we just add the next string to the end. This requires a memcpy, - * but it is assumed that we will have to condense buckets at the bottom of - * the stack anyway, so we would have to do the memcpy anyway. If there is no - * rwmem bucket, then we just allocate a new rmem bucket for each string. - * this avoids the memory allocation, and we hope that one of the intervening - * filters will be removing some of the data. This may be a dubios - * optimization, I just don't know. - */ -APR_EXPORT(int) ap_brigade_vputstrs(ap_bucket_brigade *b, va_list va); - -/* - * Both of these functions evaluate the printf and put the resulting string - * into a bucket at the end of the bucket brigade. The only reason there are - * two of them, is that the ap_r* functions needed both. I would love to be - * able to remove one, but I don't think it's feasible. - */ -APR_EXPORT(int) ap_brigade_printf(ap_bucket_brigade *b, const char *fmt, ...); -APR_EXPORT(int) ap_brigade_vprintf(ap_bucket_brigade *b, const char *fmt, va_list va); - -/* ****** Bucket Functions ***** */ - -/* destroy a bucket, and remove it's memory. This does not necessarily - * free the actual data. For example, an mmap may have multiple buckets - * referenceing it (not currently implemented). Those would only get freed - * when the bucket with the last reference is destroyed. Rwmem buckets - * always have their data destroyed currently. - */ -APR_EXPORT(apr_status_t) ap_bucket_destroy(ap_bucket *e); - -/* get the length of the data in the bucket that is currently being - * referenced. The bucket may contain more data, but if the start or end - * has been moved, we really don't care about it. - */ -APR_EXPORT(int) ap_get_bucket_len(ap_bucket *b); - -/****** Functions to Create Buckets of varying type ******/ - -/* - * All of these functions are responsibly for creating a bucket and filling - * it out with an initial value. Some buckets can be over-written, others - * can't. What should happen, is that buckets that can't be over-written, - * will have NULL write functions. That is currently broken, although it is - * easy to fix. The creation routines may not allocate the space for the - * buckets, because we may be using a free list. Regardless, creation - * routines are responsible for getting space for a bucket from someplace - * and inserting the initial data. - */ - -/* Create a read/write memory bucket */ -APR_EXPORT(ap_bucket *) ap_bucket_rwmem_create(const void *buf, - apr_size_t nbyte, apr_ssize_t *w); - - -/* Create a mmap memory bucket */ -APR_EXPORT(ap_bucket *) ap_bucket_mmap_create(const void *buf, - apr_size_t nbytes, apr_ssize_t *w); - -/* Create a read only memory bucket. */ -APR_EXPORT(ap_bucket *) ap_bucket_rmem_create(const void *buf, - apr_size_t nbyte, apr_ssize_t *w); - -/* Create an End of Stream bucket */ -APR_EXPORT(ap_bucket *) ap_bucket_eos_create(void); - -#endif - diff --git a/buckets/doc_SFmtg.txt b/buckets/doc_SFmtg.txt deleted file mode 100644 index bf2fed23c..000000000 --- a/buckets/doc_SFmtg.txt +++ /dev/null @@ -1,172 +0,0 @@ - -From akosut@leland.Stanford.EDU Thu Jul 23 09:38:40 1998 -Date: Sun, 19 Jul 1998 00:12:37 -0700 (PDT) -From: Alexei Kosut <akosut@leland.Stanford.EDU> -To: new-httpd@apache.org -Subject: Apache 2.0 - an overview - -For those not at the Apache meeting in SF, and even for those who were, -here's a quick overview of (my understanding of) the Apache 2.0 -architecture that we came up with. I present this to make sure that I have -it right, and to get opinions from the rest of the group. Enjoy. - - -1. "Well, if we haven't released 2.0 by Christmas of 1999, it won't - matter anyway." - -A couple of notes about this plan: I'm looking at this right now from a -design standpoint, not an implementation one. If the plan herein were -actually coded as-is, you'd get a very inefficient web server. But as -Donald Knuth (Professor emeritus at Stanford, btw... :) points out, -"premature optimization is the root of all evil." Rest assured there are -plenty of ways to make sure Apache 2.0 is much faster than Apache 1.3. -Taking out all the "slowness" code, for example... :) - -Also, the main ideas in this document mainly come from Dean Gaudet, Simon -Spero, Cliff Skolnick and a bunch of other people, from the Apache Group's -meeting in San Francisco, July 2 and 3, 1998. The other ideas come from -other people. I'm being vague because I can't quite remember. We should -have videotaped it. I've titled the sections of this document with quotes -from our meeting, but they are paraphrased from memory, so don't take them -too seriously. - -2. "But Simon, how can you have a *middle* end?" - -One of the main goals of Apache 2.0 is protocol independence (i.e., -serving HTTP/1.1, HTTP-NG, and maybe FTP or gopher or something). Another -is to rid the server of the belief that everything is a file. Towards this -end, we divide the server up into three parts, the front end, the middle -end, and the back end. - -The front end is essentially a combination of http_main and http_protocol -today. It takes care of all network and protocol matters, interpreting the -request, putting it into a protocol-neutral form, and (possibly) passing -it off to the rest of the server. This is approximately equivalent to the -part of Apache contained in Dean's flow stuff, and it also works very well -in certain non-Unix-like architectures such as clustered mainframes. In -addition, part of this front-end might be optionally run in kernel space, -giving a very fast server indeed... - -The back end is what generates the content. At the back of the back end we -have backing stores (Cliff's term), which contain actual data. These might -represent files on a disk, entries in a database, CGI scripts, etc... The -back end also consists of other modules, which can alter the request in -various fashions. The objects the server acts on can be thought of (Cliff -again) as a filehandle and a set of key/value pairs (metainformation). -The modules are set up as filters that can alter either one of those, -stacking I/O routines onto the stream of data, or altering the -metainformation. - -The middle end is what comes between the front and back ends. Think of -http_request. This section takes care of arranging the modules, backing -stores, etc... into a manner so that the path of the request will result -in the correct entity being delivered to the front end and sent to the -client. - -3. "I won't embarrass you guys with the numbers for how well Apache - performs compared to IIS." (on NT) - -For a server that was designed to handle flat files, Apache does it -surprisingly poorly, compared with other servers that have been optimized -for it. And the performance for non-static files is, of course, worse. -While Apache is still more than fast enough for 95% of Web servers, we'd -be remiss to dismiss those other 5% (they're the fun ones anyway). Another -problem Apache has is its lack of a good, caching, proxy module. - -Put these together, along with the work Dean has done with the flow and -mod_mmap_static stuff, and we realize the most important part of Apache -2.0: a built-in, all-pervasive, cache. Every part of the request process -will involve caching. In the path outlined above, between each layer of -the request, between each module, sits the cache, which can (when it is -useful), cache the response and its metainformation - including its -variance, so it knows when it is safe to give out the cached copy. This -gives every opportunity to increase the speed of the server by making sure -it never has to dynamically create content more than it needs to, and -renders accelerators such as Squid unnecessary. - -This also allows what I alluded to earlier: a kernel (or near-to-kernel) -based web server component, which could read the request, consult the -cache to find the requested object, and spit it back out, without so much -as an interrupt in the way. Of course, the rest of Apache (with all its -modules - it's generally a bad idea to let unknown, untrusted code, insert -itself into the kernel) sits up in user-space, ready to handle any request -the micro-Apache can't. - -A built-in cache also makes a real working HTTP/1.1 proxy server trivially -easy to write. - -4. "Stop asking about backwards compatibility with the API. We'll write a - compatibility module... later." - -If modules are as described above, then obviously they are very much -distinct from how Apache's current modules function. The only module -function that is similar to the current model is the handler, or backing -store, that actually provides the basic stream of data that the server -alters to product a response entity. - -The basic module's approach to its job is to stack a filter onto the -output. But it's better to think of the modules not as a stack that the -request flows through (a layer cake with cache icing between the layers), -but more of a mosaic (pretend I didn't use that word. I wrote collage. You -can't prove anything), with modules stuck onto various sides of the -request at different points, altering the request/response. - -Today's Apache modules take an all-or-nothing approach to request -handlers. They tell Apache what they can do, overestimating, and then are -supposed to DECLINE if they don't pass a number of checks they are -supposed to make. Most modules don't do this correctly. The better -approach is to allow the modules to inform Apache exactly of what they can -do, and have Apache (the middle-end) take care of invoking them when -appropriate. - -The final goal of all of this, of course, is simply to allow CGI output to -be parsed for server-side includes. But don't tell Dean that. - -5. "Will Apache run without any of the normal Unix binaries installed, - only the BSD/POSIX libraries?" - -Another major issue is, of course, configuration of the server. There are -a number of distinct opinions on this, both as to what should be -configured and how it should be done. We talked mainly about the latter, -but the did touch on the former. Obviously, with a radically distinct -module API, the configuration is radically different. We need a good way -to specify how the modules are supposed to interact, and of controlling -what they can do, when and how, balancing what the user asks the server to -do, and what the module (author) wants the server to do. We didn't really -come up with a good answer to this. - -However, we did make some progress on the other side of the issue: We -agreed that the current configuration system is definitely taking the -right approach. Having a well-defined repository of the configuration -scheme, containing the possible directives, when they are applicable, what -their parameters are, etc... is the right way to go. We agreed that more -information and stronger-typing (no RAW_ARGS!) would be good, and may -enable on-the-fly generated configuration managers. - -We agreed that such a program, probably external to Apache, would generate -a configuration and pass it to Apache, either via a standard config file, -or by calling Apache API functions. It is desirable to be able to go the -other way, pulling current configuration from Apache to look at, and -perhaps change it on the fly, but unfortunately is unlikely this -information would always be available; modules may perform optimizations -on their configuration that makes the original configuration unavailable. - -For the language and specification of the configuration, we thought -perhaps XML might be a good approach, and agreed it should be looked -into. Other issues, such as SNMP, were brought up and laughed at. - -6. "So you're saying that the OS that controls half the banks, and 90% of - the airlines, doesn't even have memory protection for seperate - processes?" - -Obviously, there are a lot more items that have to be part of Apache 2.0, -and we talked about a number of them. However, the four points above, I -think, represent the core of the architecture we agreed on as a starting -point. - --- Alexei Kosut <akosut@stanford.edu> <http://www.stanford.edu/~akosut/> - Stanford University, Class of 2001 * Apache <http://www.apache.org> * - - - - diff --git a/buckets/doc_bucket_brigades.txt b/buckets/doc_bucket_brigades.txt deleted file mode 100644 index 9fc3c7a03..000000000 --- a/buckets/doc_bucket_brigades.txt +++ /dev/null @@ -1,381 +0,0 @@ -To: new-httpd@apache.org -Subject: bucket brigades and IOL -Date: Fri, 12 Nov 1999 23:57:43 -0800 -From: "Roy T. Fielding" <fielding@kiwi.ICS.UCI.EDU> -Message-ID: <199911122357.aa18914@gremlin-relay.ics.uci.edu> - -About two years ago I wasted a lot of time writing an Ada95 library -called Onions that provides a stackable stream abstraction for files, -sockets, etc. It is at <http://www.ics.uci.edu/pub/websoft/libwww-ada95/> -if you want to take a look at it, but I don't recommend looking at the -code since it is almost all just working around Ada95's lack of a -system interface. I'll describe the worthwhile bits here. - -The heart of Onions is the input and output stream object -classes and classwide types for building a data stream via a -stack of stream objects (Input_Pipe and Output_Pipe). Reading -from the head of an input pipe causes the head stream object -to read from the next outbound stream object, and on down the line. -Likewise for writing to the head of an output pipe. One of the -main features of streams is that they can filter the data as it -passes, converting, adding to, and/or removing from the data -before giving it to the next object. Since multiple streams can be -cascaded, the complete data conversion is the sum of the individual -data conversions performed by the stream objects. - -So far, no big deal -- this can be manually created by stacking ap_iol -types in a meaningful way. But, the one unique thing I did in Onions was -abstract the memory handling into something called Buckets and moved them -around in Bucket_Brigades. A bucket is an allocated segment of memory -with pointers to its allocation address and current size. If I were doing -this in C, I'd also add a pointer to current start address and allocated -size, so that a single bucket could be shrunk from both ends without -copying, and a function pointer for freeing it at the stream end. -Note that this is the same type of memory structure that IO-Lite uses, -though developed independently and for different reasons. - -A bucket brigade is a list-queue of buckets. Each of the stream read/write -calls would pass a bucket brigade instead of single bucket, since this -made insertion by filters more efficient, with the general idea being that -the outbound end of the sream would be writing them out using writev -or reading them in using readv, which is about as efficient as I could -get with Ada95. [I call it a list-queue instead of just queue because you -have the choice of removing buckets from (or adding to) the queue one -bucket at a time or an entire linked list of buckets.] - -But we could go one step further. A bucket is an ADT, and as such can -be used as a general handle for read-only memory, read-write memory, -cache object, file handle, mmap handle, file name, URL, whatever. -What if, instead of just a stream of memory, it could pass around a -stream of memory interspersed with file handles or references to -remote objects? A filter could then add stuff around the stream without -causing too much parsing overhead, and if it needed to look at all the -bytes in the stream it would just replace the bucket handle with a stream -of memory sucked from that handle. Something like this was talked about -last year (see threads on "Stacking up Response Handling" on 23 Sep 1998 -and "I/O filters & reference counts" in late December 1998 and January 1999). -And Dean started something with ap_buf.h, but I don't know how he meant -to finish it. - -What I was thinking of was - - typedef enum { - AP_BUCKET_rwmem, - AP_BUCKET_rmem, - AP_BUCKET_file_t, - AP_BUCKET_mmap_t, - AP_BUCKET_filename, - AP_BUCKET_cached_entity, - AP_BUCKET_URI, - } ap_bucket_color_t; - - typedef struct ap_bucket_t ap_bucket_t; - struct ap_bucket_t { - ap_bucket_color_t color; - void *content; - ap_status_t (*free)(ap_bucket_t *bucket); - unsigned int refcount; - }; - - typedef struct ap_bucket_rwmem_t ap_bucket_rwmem_t; - struct ap_bucket_rwmem_t { - void *alloc_addr; - size_t alloc_len; - void *addr; - size_t len; - }; - - typedef struct ap_bucket_rmem_t ap_bucket_rmem_t; - struct ap_bucket_rmem_t { - void *addr; - size_t len; - }; - - typedef struct ap_bucket_filename ap_bucket_filename; - struct ap_bucket_filename { - ap_context_t *ctx; - char *name; - ap_stat_t *stat; /* useful if already stat'ed */ - ap_aa_t *conf; /* access control structure for this file */ - }; - - ... - -and then - - typedef struct ap_bucket_list_t ap_bucket_list_t; - struct ap_bucket_list_t { - ap_bucket_t *bucket; - ap_bucket_list_t *prev; - ap_bucket_list_t *next; - }; - - typedef struct ap_brigade_t ap_brigade_t; - struct ap_brigade_t { - ap_context_t *ctx; - ap_bucket_list_t *first; - ap_bucket_list_t *last; - unsigned int count; - }; - -and then construct the input and output streams as pushing these -bucket brigades to or from the client. The streams would have to -be a little more complicated than Onions, since I learned later that -you also need a parallel stream of header fields (in tokenized form) -in order for it to work with anything HTTP-like. - -Why use an enum instead of a bunch of file pointers for each type -of bucket, kind of like ap_iol? Because it allows adjacent memory -buckets (the most frequent kind after a filter operation) to be -gathered into a single writev. Also, we need a way to be able to -set up an operation and figure out what it will produce without -actually performing the operation -- this is for OPTIONS and HEAD. - -Note that this would completely change the way we handle internal -redirects, subrequests, server-side include, mod_proxy, access control, etc. -And then most of the API hooks would need to change. I think that is why -Dean was putting it off until 2.1. The annoying thing is that this is the -most useful rearchitecting of the server -- the MPM, APR, and hook changes -make 2.0 easier/cleaner/faster to port to other platforms, but layering -enables in one fell swoop almost every significant non-config feature -that our users have requested. A cache would just be a hash table or -btree of file buckets, complete with AA info. - -Anyway, that was stuck in the back of my head and had to get out. -I won't be able to work on it until after the dissertation is done, -which every day seems to be further away. Maybe 3.0, with rHTTP/2.0. - -....Roy - -================================================= -To: new-httpd@apache.org -Subject: Re: bucket brigades and IOL -In-reply-to: Your message of "Sat, 13 Nov 1999 20:43:58 GMT." - <382DCD8E.881B8468@algroup.co.uk> -Date: Sun, 14 Nov 1999 22:24:03 -0800 -From: "Roy T. Fielding" <fielding@kiwi.ICS.UCI.EDU> -Message-ID: <199911142224.aa22545@gremlin-relay.ics.uci.edu> - -BenL wrote: ->I've got to say that this is the most coherent suggestion along these ->lines that I've seen yet. I rather like it. One thing I'd add is that if ->you are going to have a movable "start of block" pointer, and changeable ->length, it can be nice to allocate extra around the edges under some ->circumstances, so that lower layers can expand the block without having ->to add extra chunks. - -Or, alternatively, allocate equal size blocks and just pass around -a reference pair within the buckets that, when the bucket is freed, -access a more complicated reference-counting pool. I think that is -closer to what IO-Lite does. - ->Also, the usual objections still apply - i.e. it is awkward to do things ->like searching for particular strings, since they may cross boundaries. ->I'm beginning to think that the right answer to this is to provide nice ->matching functions that know about the chunked structures, and last ->resort functions that'll glue it all back into one chunk... - -Yep, that's what I ended up doing for Ada95, though in that case there -were no easier alternatives. - -....Roy - -================================================= -To: new-httpd@apache.org -Subject: Re: layered I/O (was: cvs commit: ...) -In-reply-to: Your message of "Wed, 29 Mar 2000 01:21:09 PST." - <Pine.LNX.4.21.0003290004100.10357-100000@piglet> -Date: Wed, 29 Mar 2000 02:05:08 -0800 -From: "Roy T. Fielding" <fielding@kiwi.ICS.UCI.EDU> -Message-ID: <200003290205.aa19557@gremlin-relay.ics.uci.edu> - ->Selection of IO Layers -> ->The core selects a source module and IO layers based on the urlspace ->configuration. Content might be generated by mod_perl, and the result is ->piped through mod_chunk, mod_ssl, and mod_net, in turn. When the content ->generator runs, the core enforces that the module set the content type ->before the first call to ap_bput. The content type is set by a function ->call. The function (ap_set_content_type(request_rec *, char *)) examines ->the content type and adds IO layers as neccessary. For server parsed ->html, the core might insert mod_include immediately after mod_perl. - -The problem of thinking of it that way is that, like Dean mentioned, -the output of one module may be filtered and the filter indicate that -content should be embedded from another URL, which turns out to be a -CGI script that outputs further parseable content. In this instance, -the goal of layered-IO is to abstract away such behavior so that the -instance is processed recursively and thus doesn't result in some tangled -mess of processing code for subrequests. Doing it requires that each -layer be able to pass both data and metadata, and have both data and -metadata be processed at each layer (if desired), rather than call a -single function that would set the metadata for the entire response. - -My "solution" to that is to pass three interlaced streams -- data, -metadata, and meta-metadata -- through each layer. The metadata -streams would point to a table of tokenized name-value pairs. -There are lots of ways to do that, going back to my description of -bucket brigades long ago. Basically, each block of memory would -indicate what type of data, with metadata occurring in a block before -the data block(s) that it describes (just like chunk-size describes -the subsequent chunk-data) and the layers could be dynamically -rearranged based on the metadata that passed through them, in -accordance with the purpose of the filter. - ->(Can anyone produce a use case where the IO chain could change after ->output begins?) - -Output is a little easier, but that is the normal case for input. -We don't know what filters to apply to the request body until after -we have passed through the HTTP headers, and the HTTP message processor -is itself a filter in this model. - -....Roy - - -================================================= -To: new-httpd@apache.org -Subject: Re: filtering patches -In-reply-to: Your message of "Mon, 10 Jul 2000 15:33:25 PDT." - <Pine.LNX.4.21.0007101528180.10878-100000@koj.rkbloom.net> -Date: Mon, 10 Jul 2000 16:58:00 -0700 -From: "Roy T. Fielding" <fielding@kiwi.ICS.UCI.EDU> -Message-ID: <200007101657.aa21782@gremlin-relay.ics.uci.edu> - -[...] -I meant that the filters, when written to as part of the output stream, -are treated as a stack (write to the top-most filter without any knowledge -of what may lie underneath it). So the process of arranging filters -for a particular response is like dropping them onto a stack. When a -filter is done or the stream is closed, each instantiated filter cleans -up according to its local state and then destroys itself (as it is popped -off the stack). - -This is completely separate from the registration of filters by -name and purpose, which could be done by hooks. The difference is that -filters are registered at config time but only instantiated (given local -storage) and arranged on a per stream basis. - -Bucket brigades is simply a way to encapsulate and pass data down the stream -such that it can be as efficient as the sender desires, while retaining -a simple interface. The purpose of the bucket is to make handling of the -data uniform regardless of its type, or make type-specific conversions -via a single ADT call if and only if they are needed by some filter. -The purpose of the brigade is to reduce the number of calling arguments -and linearize the calling sequence for insertion filters. Each filter -definition is separate from its instantiation on the stream because -there may be many streams operating at once within a single program. -Each bucket is independent of the brigade so that the filters can rearrange -and insert buckets at will. Each data item is isolated by the bucket -structure, which allows them to be split across child buckets or shared -with multiple streams (e.g., cached objects). We don't need to implement -all of this on the first pass -- we just need to implement the ADT external -interfaces such that they don't have to change as we make the overall -stream more efficient. - -BTW, in case I didn't make this clear in past messages, this design is -an amalgam of the best aspects of the designs from Henrik's Streams -(see w3c-libwww), sfio (AT&T Research), IO-Lite (Rice Univ.), and -libwww-ada95 (UCI). The MIME stuff in MSIE is based on Henrik's streams. -Henrik's stuff is very fast, but is spaghetti code because it relies on -callbacks and legacy stuff in libwww. sfio is great but is intended to -be a complete replacement for stdio and hence does way too much and is -subject to a few patents that I don't appreciate. IO-Lite is cool but -is probably only worth it when the entire OS is based on IO-Lite memory -management, but regardless the code isn't available for commercial use. -As Dean has mentioned many times, we already get most of the performance -benefit of IO-Lite simply by avoiding memory copies on large writes. -libwww-ada95 was an attempt to make Ada95 suck less for systems programming, -which was only a partial success (it is very fast compared to other Ada95 -libraries, but memory management became a problem with complex filters). - -Writing our own streams library isn't NIH syndrome -- both Dean and I -have independently investigated the other available alternatives and they -just aren't suitable for our purpose. Even with all that, my own design -only truly pays off (versus plain old BUFF) when you make good use of -sendfile and shared object caching. - -[...] - - -================================================= -Other stuff Roy wrote on new-httpd: - -My buckets are passed around in list-queues (really just lists with front -and back pointers). My buckets carry data and metadata and meta-metadata. -My buckets are used to indicate stream-end, and the filter configuration -itself is determined by the stream content. It probably sounds weird, but -the effects of this interface are completely different than mere content -filters. They simplify everything. I'm not saying that we have to -simplify everything right away, but I am saying that it is just as easy -to implement a fully-general filter using bucket brigades as it is -to implement string interface filters -- all of the complex parts -are handled by the ADTs. - -... - -The real psychedelic stuff happens when you can pass metadata (tokenized -header fields) as buckets and the filters know how to pass that down the -chain without treating them as data. - -... - -The purpose of passing a list of buckets around is to linearize -the call stack for the frequent case of filtered content -splitting one large bucket into separate buckets with filtered results -interspersed in between. The effect is that a filter chain can frequently -process an entire message in one pass down the chain, which enables the -stream end to send the entire response in one go, which also allows it -to do interesting things like provide a content length by summing the -data length of all the buckets' data, and set a last-modified time -by picking the most recent time from a set of static file buckets. - -I think it would help if we stopped using artificial examples. Let's -try something simple: - - socket <-- http <-- add_footer <-- add_header <-- send_file - -send_file calls its filter with an ap_file_t bucket and End-of-Stream (EOS) -in the bucket list. add_header sets a flag, prepends another ap_file_t -bucket to the list and sends the list to its filter. add_footer looks -at the list, finds the EOS, inserts another ap_file_t bucket in -front of the EOS, and sends the list on to its filter. http walks through -the list picking up the (cached) stat values, notes the EOS and seeing -that its own flag for headers_sent is false, sets the cumulative metadata -and sends the header fields, followed by three calls to the kernel to -send out the three files using whatever mechanism is most efficient. - -The point here isn't that this is the only way to implement filters. -The point is that no other interface can implement them as efficiently. -Not even close. Yes, there are cases where string filters are just as -efficient as any other design, but there is no case in which they are -more efficient than bucket brigades. The reason is that being able -to process a list of strings in one call more than offsets the extra -cost of list processing, regardless of the filter type, and allows -for additional features that have benefits for http processing. -Like, for example, being able to determine the entire set of resources -that make up the source of this dynamic resource without teaching every -filter about WebDAV. - -... - -Making many small calls down the filter chain is something best -avoided, which is why the bucket brigades interface consists of -a linked list of buckets, such that all of the currently available -data can be passed-on in a single call. - -Being able to handle sendfile, cached objects and subrequests is very -effective at improving efficiency, which is why the buckets are typed. -A filter that needs to do byte-level processing will have to call a -routine to convert the typed bucket into a data stream, but that decision -is delayed until no other choice is available and adds no overhead to -the common cases of non-filtered or pre-/post-filtered objects. - -Being able to process header fields (metadata) through the same filter -set as the data is necessary for correctness and simplicity in the -proper ordering of independently developed filter modules, which is -why the buckets can carry metadata on the same stream. Every filter -has to be knowledgeable about metadata because only the filter knows -whether or not its actions will change the nature of the data. - - diff --git a/buckets/doc_dean_iol.txt b/buckets/doc_dean_iol.txt deleted file mode 100644 index 95c0c34c0..000000000 --- a/buckets/doc_dean_iol.txt +++ /dev/null @@ -1,496 +0,0 @@ -goals? we need an i/o abstraction which has these properties: - -- buffered and non-buffered modes - - The buffered mode should look like FILE *. - - The non-buffered mode should look more like read(2)/write(2). - -- blocking and non-blocking modes - - The blocking mode is the "easy" mode -- it's what most module writers - will see. The non-blocking mode is the "hard" mode, this is where - module writers wanting to squeeze out some speed will have to play. - In order to build async/sync hybrid models we need the - non-blocking i/o abstraction. - -- timed reads and writes (for blocking cases) - - This is part of my jihad against asynchronous notification. - -- i/o filtering or layering - - Yet another Holy Grail of computing. But I digress. These are - hard when you take into consideration non-blocking i/o -- you have - to keep lots of state. I expect our core filters will all support - non-blocking i/o, well at least the ones I need to make sure we kick - ass on benchmarks. A filter can deny a switch to non-blocking mode, - the server will have to recover gracefully (ha). - -- copy-avoidance - - Hey what about zero copy a la IO-Lite? After having experienced it - in a production setting I'm no longer convinced of its benefits. - There is an enormous amount of overhead keeping lists of buffers, - and reference counts, and cleanup functions, and such which requires - a lot of tuning to get right. I think there may be something here, - but it's not a cakewalk. - - What I do know is that the heuristics I put into apache-1.3 to choose - writev() at times are almost as good as what you can get from doing - full zero-copy in the cases we *currently* care about. To put it - another way, let's wait another generation to deal with zero copy. - - But sendfile/transmitfile/etc. those are still interesting. - - So instead of listing "zero copy" as a property, I'll list - "copy-avoidance". - -So far? - -- ap_bungetc added -- ap_blookc changed to return the character, rather than take a char *buff -- in theory, errno is always useful on return from a BUFF routine -- ap_bhalfduplex, B_SAFEREAD will be re-implemented using a layer I think -- chunking gone for now, will return as a layer -- ebcdic gone for now... it should be a layer - -- ap_iol.h defined, first crack at the layers... - - Step back a second to think on it. Much like we have fread(3) - and read(2), I've got a BUFF and an ap_iol abstraction. An ap_iol - could use a BUFF if it requires some form of buffering, but many - won't require buffering... or can do a better job themselves. - - Consider filters such as: - - ebcdic -> ascii - - encryption - - compression - These all share the property that no matter what, they're going to make - an extra copy of the data. In some cases they can do it in place (read) - or into a fixed buffer... in most cases their buffering requirements - are different than what BUFF offers. - - Consider a filter such as chunking. This could actually use the writev - method to get its job done... depends on the chunks being used. This - is where zero-copy would be really nice, but we can get by with a few - heuristics. - - At any rate -- the NSPR folks didn't see any reason to included a - buffered i/o abstraction on top of their layered i/o abstraction... so - I feel like I'm not the only one who's thinking this way. - -- iol_unix.c implemented... should hold us for a bit - - -============================== -Date: Mon, 10 Apr 2000 14:39:48 -0700 (PDT) -From: dean gaudet <dgaudet-list-new-httpd@arctic.org> -To: new-httpd@apache.org -Subject: Re: Buff should be an I/O layer -In-Reply-To: <20000410123109.C3931@manojk.users.mindspring.com> -Message-ID: <Pine.LNX.4.21.0004101418410.2626-100000@twinlark.arctic.org> - -[hope you don't mind me taking this back to new-httpd so that it's -archived this time :)] - -On Mon, 10 Apr 2000, Manoj Kasichainula wrote: - -> On Mon, Mar 27, 2000 at 04:48:23PM -0800, Dean Gaudet wrote: -> > On Sat, 25 Mar 2000, Manoj Kasichainula wrote: -> > > (aside: Though my unschooled brain still sees no -> > > problem if our chunking layer maintains a pile of 6-byte blocks that -> > > get used in an iol_writev. I'll read the archived discussions.) -> > -> > there's little in the way of archived discussions, there's just me admitting -> > that i couldn't find a solution which was not complex. -> -> OK, there's got to be something wrong with this: -> -> chunk_iol->iol_write(char *buffer) { -> pull a 10-byte (or whatever) piece out of our local stash -> construct a chunk header in it -> set the iovec = chunk header + buffer -> writev(iovec) -> } -> -> But what is it? - -when i was doing the new apache-2.0 buffering i was focusing a lot on -supporting non-blocking sockets so we could do the async i/o stuff -- and -to support a partial write you need to keep more state than what your -suggestion has. - -also, the real complexity comes when you consider handling a pipelined -HTTP/1.1 connection -- consider what happens when you get 5 requests -for /cgi-bin/printenv smack after the other. - -if you do that against apache-1.3 and the current apache-2.0 you get -back maximally packed packets. but if you make chunking a layer then -every time you add/remove the layer you'll cause a packet boundary -- -unless you add another buffering layer... or otherwise shift around -the buffering. - -as a reminder, visit -<http://www.w3.org/Protocols/HTTP/Performance/Pipeline.html> for a -description of how much we win on the wire from such an effort. - -also, at some point i worry that passing the kernel dozens of tiny -iovecs is more expensive than an extra byte copy into a staging buffer, -and passing it one large buffer. but i haven't done any benchmarks to -prove this. (my suscipions have to do with the way that at least the -linux kernel's copying routine is written regarding aligned copies) - -oh it's totally worth pointing out that at least Solaris allows at -most 16 iovecs in a single writev()... which probably means every sysv -derived system is similarly limited. linux sets the limit at 1024. -freebsd has an optimisation for up to 8, but otherwise handles 1024. - -i'm still doing work in this area though -- after all my ranting about -zero-copy a few weeks back i set out to prove myself wrong by writing -a zero-copy buffering library using every trick in my book. i've no -results to share yet though. - --dean - - -============================== -Date: Tue, 2 May 2000 15:51:30 +0200 -From: Martin Kraemer <Martin.Kraemer@mch.sni.de> -To: new-httpd@apache.org -Subject: BUFF, IOL, Chunking, and Unicode in 2.0 (long) -Message-ID: <20000502155129.A10548@pgtm0035.mch.sni.de> - -Sorry for a long silence in the past weeks, I've been busy with other -stuff. - -Putting the catch-words "Chunking, Unicode and 2.0" into the subject -was on purpose: I didn't want to scare off anyone because of the word -EBCDIC: the problems I describe here, and the proposed new buff.c -layering, are mostly independent from the EBCDIC port. - - -In the past weeks, I've been thinking about today's buff.c (and -studied its applicability for automatic conversion stuff like in the -russian apache, see apache.lexa.ru). I think it would be neat to be -able to do automatic character set conversion in the server, for -example by negotiation (when the client sends an Accept-Charset and -the server doesn't have a document with exactly the right Charset, but -knows how to generate it from an existing representation). - -IMO it is a reoccurring problem, - -* not only in today's russian internet environment (de facto browsers - support 5 different cyrillic character sets, but the server doesn't - want to hold every document in 5 copies, so an automatic translation - is performed by the russian apache, depending on information supplied - by the client, or by explicit configuration). One of the supported - character sets is Unicode (UTF-7 or UTF-8) - -* in japanese/chinese environments, support for 16 bit character sets - is an absolute requirement. (Other oriental scripts like Thai get - along with 8 bit: they only have 44 consonants and 16 vowels). - Having success on the eastern markets depends to a great deal on - having support for these character sets. The japanese Apache - community hasn't had much contact with new-httpd in the past, but - I'm absolutely sure that there is a "standard japanese patch" for - Apache which would well be worth integrating into the standard - distribution. (Anyone on the list to provide a pointer?) - -* In the future, more and more browsers will support unicode, and so - will the demand grow for servers supporting unicode. Why not - integrate ONE solution for the MANY problems worldwide? - -* The EBCDIC port of 1997 has been a simple solution for a rather - simple problem. If we would "do it right" for 2.0 and provide a - generic translation layer, we would solve many problems in a single - blow. The EBCDIC translation would be only one of them. - -Jeff has been digging through the EBCDIC stuff and apparently -succeeded in porting a lot of the 1.3 stuff to 2.0 already. Jeff, I'd -sure be interested in having a look at it. However, when I looked at -buff.c and the new iol_* functionality, I found out that iol's are not -the way to go: they give us no solution for any of the conversion -problems: - -* iol's sit below BUFF. Therefore, they don't have enough information - to know which part of the written byte stream is net client data, - and which part is protocol information (chunks, MIME headers for - multipart/*). - -* iol's don't allow simplification of today's chunking code. It is - spread thruout buff.c and there's a very hairy balance between - efficiency and code correctness. Re-adding (EBCDIC/UTF) conversion, - possibly with sup[port for multi byte character sets (MBCS), would - make a code nightmare out of it. (buff.c in 1.3 was "almost" a - nightmare because we had onlu single byte translations. - -* Putting conversion to a hierarchy level any higher than buff.c is no - solution either: for chunks, as well as for multipart headers and - buffering boundaries, we need character set translation. Pulling it - to a higher level means that a lot of redundant information has to - be passed down and up. - -In my understanding, we need a layered buff.c (which I number from 0 -upwards): - -0) at the lowest layer, there's a "block mode" which basically - supports bread/bwrite/bwritev by calling the equivalent iol_* - routines. It doesn't know about chunking, conversion, buffering and - the like. All it does is read/write with error handling. - -1) the next layer handles chunking. It knows about the current - chunking state and adds chunking information into the written - byte stream at appropriate places. It does not need to know about - buffering, or what the current (ebcdic?) conversion setting is. - -2) this layer handles conversion. I was thinking about a concept - where a generic character set conversion would be possible based on - Unicode-to-any translation tables. This would also deal with - multibyte character sets, because at this layer, it would - be easy to convert SBCS to MBCS. - Note that conversion *MUST* be positioned above the chunking layer - and below the buffering layer. The former guarantees that chunking - information is not converted twice (or not at all), and the latter - guarantees that ap_bgets() is looking at the converted data - (-- otherwise it would fail to find the '\n' which indicates end- - of-line). - Using (loadable?) translation tables based on unicode definitions - is a very similar approach to what libiconv offers you (see - http://clisp.cons.org/~haible/packages-libiconv.html -- though my - inspiration came from the russian apache, and I only heard about - libiconv recently). Every character set can be defined as a list - of <hex code> <unicode equiv> pairs, and translations between - several SBCS's can be collapsed into a single 256 char table. - Efficiently building them once only, and finding them fast is an - optimization task. - -3) This last layer adds buffering to the byte stream of the lower - layers. Because chunking and translation have already been dealt - with, it only needs to implement efficient buffering. Code - complexity is reduced to simple stdio-like buffering. - - -Creating a BUFF stream involves creation of the basic (layer 0) BUFF, -and then pushing zero or more filters (in the right order) on top of -it. Usually, this will always add the chunking layer, optionally add -the conversion layer, and usually add the buffering layer (look for -ap_bcreate() in the code: it almost always uses B_RD/B_WR). - -Here's code from a conceptual prototype I wrote: - BUFF *buf = ap_bcreate(NULL, B_RDWR), *chunked, *buffered; - chunked = ap_bpush_filter(buf, chunked_filter, 0); - buffered = ap_bpush_filter(chunked, buffered_filter, B_RDWR); - ap_bputs("Data for buffered ap_bputs\n", buffered); - - -Using a BUFF stream doesn't change: simply invoke the well known API -and call ap_bputs() or ap_bwrite() as you would today. Only, these -would be wrapper macros - - #define ap_bputs(data, buf) buf->bf_puts(data, buf) - #define ap_write(buf, data, max, lenp) buf->bf_write(buf, data, max, lenp) - -where a BUFF struct would hold function pointers and flags for the -various levels' input/output functions, in addition to today's BUFF -layout. - -For performance improvement, the following can be added to taste: - -* fewer buffering (zero copy where possible) by putting the buffers - for buffered reading/writing down as far as possible (for SBCS: from - layer 3 to layer 0). By doing this, the buffer can also hold a - chunking prefix (used by layer 1) in front of the buffering buffer - to reduce the number of vectors in a writev, or the number of copies - between buffers. Each layer could indicate whether it needs a - private buffer or not. - -* intra-module calls can be hardcoded to call the appropriate lower - layer directly, instead of using the ap_bwrite() etc macros. That - means we don't use the function pointers all the time, but instead - call the lower levels directly. OTOH we have iol_* stuff which uses - function pointers anyway. We decided in 1.3 that we wanted to avoid - the C++ type stuff (esp. function pointers) for performance reasons. - But it would sure reduces the code complexity a lot. - -The resulting layering would look like this: - - | Caller: using ap_bputs() | or ap_bgets/apbwrite etc. - +--------------------------+ - | Layer 3: Buffered I/O | gets/puts/getchar functionality - +--------------------------+ - | Layer 2: Code Conversion | (optional conversions) - +--------------------------+ - | Layer 1: Chunking Layer | Adding chunks on writes - +--------------------------+ - | Layer 0: Binary Output | bwrite/bwritev, error handling - +--------------------------+ - | iol_* functionality | basic i/o - +--------------------------+ - | apr_* functionality | - .... - --- -<Martin.Kraemer@MchP.Siemens.De> | Fujitsu Siemens -Fon: +49-89-636-46021, FAX: +49-89-636-41143 | 81730 Munich, Germany - - -============================== -Date: Tue, 2 May 2000 09:09:28 -0700 (PDT) -From: dean gaudet <dgaudet-list-new-httpd@arctic.org> -To: new-httpd@apache.org -Subject: Re: BUFF, IOL, Chunking, and Unicode in 2.0 (long) -In-Reply-To: <20000502155129.A10548@pgtm0035.mch.sni.de> -Message-ID: <Pine.LNX.4.21.0005020847180.22518-100000@twinlark.arctic.org> - -On Tue, 2 May 2000, Martin Kraemer wrote: - -> * iol's sit below BUFF. Therefore, they don't have enough information -> to know which part of the written byte stream is net client data, -> and which part is protocol information (chunks, MIME headers for -> multipart/*). - -there's not much stopping you from writing an iol which takes a BUFF * in -its initialiser, and then bcreating a second BUFF, and bpushing your iol. -like: - - /* this is in r->pool rather than r->connection->pool because - * we expect to create & destroy this inside request boundaries - * and if we stuck it in r->connection->pool the storage wouldn't - * be reclaimed earlier enough on pipelined connections. - * - * also, no need for buffering in new_buff because the translation - * layer can easily assume lower level BUFF is doing the buffering. - */ - new_buff = ap_bcreate(r->pool, B_WR); - ap_bpush_iol(new_buff, - ap_utf8_to_ebcdic(r->pool, r->connection->client)); - r->connection->client = new_buff; - -main problem is that the new_buff only works for writing, and you -potentially need a separate conversion layer for reading from the -client. - -shouldn't be too hard to split up r->connection->client into a read and -write half. - -think of iol as the equivalent of the low level read/write, and BUFF -as the equivalent of FILE *. there's a reason for both layers in -the interface. - -> * iol's don't allow simplification of today's chunking code. It is -> spread thruout buff.c and there's a very hairy balance between -> efficiency and code correctness. Re-adding (EBCDIC/UTF) conversion, -> possibly with sup[port for multi byte character sets (MBCS), would -> make a code nightmare out of it. (buff.c in 1.3 was "almost" a -> nightmare because we had onlu single byte translations. - -as i've said before, i welcome anyone to do it otherwise without adding -network packets, without adding unnecessary byte copies, and without -making it even more complex. until you've tried it, it's pretty easy -to just say "this is a mess". once you've tried it i suspect you'll -discover why it is a mess. - -that said, i'm still trying to prove to myself that the zero-copy -crud necessary to clean this up can be done in a less complex manner. - -> * Putting conversion to a hierarchy level any higher than buff.c is no -> solution either: for chunks, as well as for multipart headers and -> buffering boundaries, we need character set translation. Pulling it -> to a higher level means that a lot of redundant information has to -> be passed down and up. - -huh? HTTP is in ASCII -- you don't need any conversion -- if a chunking -BUFF below a converting BUFF/iol is writing those things in ascii -it works. no? at least that's my understanding of the code in 1.3. - -you wouldn't do the extra BUFF layer above until after you've written -the headers into the plain-text BUFF. - -i would expect you'd: - - write headers through plain text BUFF - push conversion BUFF - run method - pop conversion BUFF - pump multipart header - push conversion BUFF - ... - pop conversion BUFF - -> In my understanding, we need a layered buff.c (which I number from 0 -> upwards): - -you've already got it :) - -> | Caller: using ap_bputs() | or ap_bgets/apbwrite etc. -> +--------------------------+ -> | Layer 3: Buffered I/O | gets/puts/getchar functionality -> +--------------------------+ -> | Layer 2: Code Conversion | (optional conversions) -> +--------------------------+ -> | Layer 1: Chunking Layer | Adding chunks on writes -> +--------------------------+ -> | Layer 0: Binary Output | bwrite/bwritev, error handling -> +--------------------------+ -> | iol_* functionality | basic i/o -> +--------------------------+ -> | apr_* functionality | - -there are two cases you need to consider: - -chunking and a partial write occurs -- you need to keep track of how much -of the chunk header/trailer was written so that on the next loop around -(which happens in the application at the top) you continue where you -left off. - -and more importantly at the moment, and easier to grasp -- consider what -happens when you've got a pipelined connection. a dozen requests come -in from the client, and apache-1.3 will send back the minimal number -of packets. 2.0-current still needs fixing in this area (specifically -saferead needs to be implemented). - -for example, suppose the client sends one packet: - - GET /images/a.gif HTTP/1.1 - Host: foo - - GET /images/b.gif HTTP/1.1 - Host: foo - -suppose that a.gif and b.gif are small 200 byte files. - -apache-1.3 sends back one response packet: - - HTTP/1.1 OK - headers - - a.gif body - HTTP/1.1 OK - headers - - b.gif body - -consider what happens with your proposal. in between each of those -requests you remove the buffering -- which means you have to flush a -packet boundary. so your proposal generates two network packets. - -like i've said before on this topic -- if all unixes had TCP_CORK, -it'd be a breeze. but only linux has TCP_CORK. - -you pretty much require a layer of buffering right above the iol which -talks to the network. - -and once you put that layer of buffering there, you might as well merge -chunking into it, because chunking needs buffering as well (specifically -for the async i/o case). - -and then you either have to double-buffer, or you can only stack -non-buffered layers above it. fortunately, character-set conversion -should be doable without any buffering. - -*or* you implement a zero-copy library, and hope it all works out in -the end. - --dean - diff --git a/buckets/doc_greg_filters.txt b/buckets/doc_greg_filters.txt deleted file mode 100644 index 346e877f4..000000000 --- a/buckets/doc_greg_filters.txt +++ /dev/null @@ -1,102 +0,0 @@ -Date: Fri, 14 Apr 2000 13:46:50 -0700 (PDT) -From: Greg Stein <gstein@lyra.org> -To: new-httpd@apache.org -Subject: Re: I/O filtering in 2.0 -In-Reply-To: <Pine.LNX.4.21.0004141156120.25805-100000@koj.rkbloom.net> -Message-ID: <Pine.LNX.4.10.10004141314030.13301-100000@nebula.lyra.org> - -On Fri, 14 Apr 2000 rbb@covalent.net wrote: -> I am not calling this I/O Layering, because this is really output -> filtering. The patch I am submitting allows modules to edit data after a -> handler has finished with it. This is basically Greg's approach. - -I'll detail my approach here, as your patch has some pieces, but it is -quite different. - -All of this is obviously IMO... - - -*) we definitely want multiple output filters. each filter is recorded in - a linked list in the request_rec. - -*) a filter has a name and is implemented by a module. this mapping is set - up similarly to handler maps in the 'module' structure. - -*) output from normal modules is identical to today. they use ap_rputs, - ap_rwrite, etc. Filtering occurs under the covers. - -*) Apache defines ap_lwrite(ap_layer *next_layer, - const void *buf, size_t len, - request_rec *r) - and possibly some similar ones for printf, puts, etc - -*) struct ap_layer_s { - const char *layer_name; - layer_func_t *func; - struct ap_layer_s *next; - } - - /* filters implement function with this type: */ - typedef ap_status_t (*layer_func_t)(ap_layer *next_layer, - const void *buf, size_t len, - request_rec *r); - /* ### dunno about that return type */ - /* looks remarkably similar to ap_lwrite(), eh? */ - -*) ap_status_t ap_lwrite(ap_layer *layer, const void *buf, - size_t len, request_rec *r) - { - if (layer == NULL) { - ap_bwrite(r->connection->client, buf, len, &amt); - return OK; - } - return (*layer->func)(layer->next, buf, len, r); - } - -*) a new Apache directive can detail the sequence of filters and install - them into the request_rec. - -*) ap_rwrite() and friends calls ap_lwrite(r->first_layer, ...). this will - perform actual output filtering, or go off to the BUFF stuff. - -*) a new hook is added: install_filters. it is called right before - invoke_handlers and is responsible for setting r->first_layer and/or - elements along the list. - -*) a new, small module can implement a directive which responds to - install_filters and sets up a sequence of filters based on their names. - for example: - SetFilters PHP SSI - -*) content handlers (e.g. during invoke_handler processing) have a new - function to call: ap_set_content_type(r, const char *type). when the - type is changed, such as during CGI processing, this function is called - and an opportunity (somehow? haven't thought on this part) is provided - for new output layers to be inserted. - [ this provides for a CGI output'ing application/x-httpd-php3 ] - - ap_set_content_type() should probably know where it is during the - request processing so that it can be used any time. maybe it should be - allowed to set up layers at any time? - - -That's it. :-) - -Helper functions to set up a pipe and a sub-thread would be handy. That -would allow some modules to keep their "read from an fd" approach, rather -than switching to a stateful parser approach. As Dean stated before, -output filtering is necessarily asynchronous: a sub thread or a state -machine thingy is required. - -[ flipping things around, you could say that the initial content can be - generated asynchronously (where the first filter demands the next chunk - of output). this would be incredibly difficult for things like - mod_autoindex. at some point, somebody is pulling content and shoving it - down the BUFF. the above form is "everybody shoves content" ] - -Cheers, --g - --- -Greg Stein, http://www.lyra.org/ - diff --git a/buckets/doc_page_io.txt b/buckets/doc_page_io.txt deleted file mode 100644 index 7e8d885f1..000000000 --- a/buckets/doc_page_io.txt +++ /dev/null @@ -1,166 +0,0 @@ - -From dgaudet@arctic.org Fri Feb 20 00:36:52 1998 -Date: Fri, 20 Feb 1998 00:35:37 -0800 (PST) -From: Dean Gaudet <dgaudet@arctic.org> -To: new-httpd@apache.org -Subject: page-based i/o -X-Comment: Visit http://www.arctic.org/~dgaudet/legal for information regarding copyright and disclaimer. -Reply-To: new-httpd@apache.org - -Ed asked me for more details on what I mean when I talk about "paged based -zero copy i/o". - -While writing mod_mmap_static I was thinking about the primitives that the -core requires of the filesystem. What exactly is it that ties us into the -filesystem? and how would we abstract it? The metadata (last modified -time, file length) is actually pretty easy to abstract. It's also easy to -define an "index" function so that MultiViews and such can be implemented. -And with layered I/O we can hide the actual details of how you access -these "virtual" files. - -But therein lies an inefficiency. If we had only bread() for reading -virtual files, then we would enforce at least one copy of the data. -bread() supplies the place that the caller wants to see the data, and so -the bread() code has to copy it. But there's very little reason that -bread() callers have to supply the buffer... bread() itself could supply -the buffer. Call this new interface page_read(). It looks something like -this: - - typedef struct { - const void *data; - size_t data_len; /* amt of data on page which is valid */ - ... other stuff necessary for managing the page pool ... - } a_page_head; - - /* returns NULL if an error or EOF occurs, on EOF errno will be - * set to 0 - */ - a_page_head *page_read(BUFF *fb); - - /* queues entire page for writing, returns 0 on success, -1 on - * error - */ - int page_write(BUFF *fb, a_page_head *); - -It's very important that a_page_head structures point to the data page -rather than be part of the data page. This way we can build a_page_head -structures which refer to parts of mmap()d memory. - -This stuff is a little more tricky to do, but is a big win for performance. -With this integrated into our layered I/O it means that we can have -zero-copy performance while still getting the advantages of layering. - -But note I'm glossing over a bunch of details... like the fact that we -have to decide if a_page_heads are shared data, and hence need reference -counting (i.e. I said "queues for writing" up there, which means some -bit of the a_page_head data has to be kept until its actually written). -Similarly for the page data. - -There are other tricks in this area that we can take advantage of -- -like interprocess communication on architectures that do page flipping. -On these boxes if you write() something that's page-aligned and page-sized -to a pipe or unix socket, and the other end read()s into a page-aligned -page-sized buffer then the kernel can get away without copying any data. -It just marks the two pages as shared copy-on-write, and only when -they're written to will the copy be made. So to make this work, your -writer uses a ring of 2+ page-aligned/sized buffers so that it's not -writing on something the reader is still reading. - -Dean - ----- - -For details on HPUX and avoiding extra data copies, see -<ftp://ftp.cup.hp.com/dist/networking/briefs/copyavoid.pdf>. - -(note that if you get the postscript version instead, you have to -manually edit it to remove the front page before any version of -ghostscript that I have used will read it) - ----- - -I've been told by an engineer in Sun's TCP/IP group that zero-copy TCP -in Solaris 2.6 occurs when: - - - you've got the right interface card (OC-12 ATM card I think) - - you use write() - - your write buffer is 16k aligned and a multiple of 16k in size - -We currently get the 16k stuff for free by using mmap(). But sun's -current code isn't smart enough to deal with our initial writev() -of the headers and first part of the response. - ----- - -Systems that have a system call to efficiently send the contents of a -descriptor across the network. This is probably the single best way -to do static content on systems that support it. - -HPUX: (10.30 and on) - - ssize_t sendfile(int s, int fd, off_t offset, size_t nbytes, - const struct iovec *hdtrl, int flags); - - (allows you to add headers and trailers in the form of iovec - structs) Marc has a man page; ask if you want a copy. Not included - due to copyright issues. man page also available from - http://docs.hp.com/ (in particular, - http://docs.hp.com:80/dynaweb/hpux11/hpuxen1a/rvl3en1a/@Generic__BookTextView/59894;td=3 ) - -Windows NT: - - BOOL TransmitFile( SOCKET hSocket, - HANDLE hFile, - DWORD nNumberOfBytesToWrite, - DWORD nNumberOfBytesPerSend, - LPOVERLAPPED lpOverlapped, - LPTRANSMIT_FILE_BUFFERS lpTransmitBuffers, - DWORD dwFlags - ); - - (does it start from the current position in the handle? I would - hope so, or else it is pretty dumb.) - - lpTransmitBuffers allows for headers and trailers. - - Documentation at: - - http://premium.microsoft.com/msdn/library/sdkdoc/wsapiref_3pwy.htm - http://premium.microsoft.com/msdn/library/conf/html/sa8ff.htm - - Even less related to page based IO: just context switching: - AcceptEx does an accept(), and returns the start of the - input data. see: - - http://premium.microsoft.com/msdn/library/sdkdoc/pdnds/sock2/wsapiref_17jm.htm - - What this means is you require one less syscall to do a - typical request, especially if you have a cache of handles - so you don't have to do an open or close. Hmm. Interesting - question: then, if TransmitFile starts from the current - position, you need a mutex around the seek and the - TransmitFile. If not, you are just limited (eg. byte - ranges) in what you can use it for. - - Also note that TransmitFile can specify TF_REUSE_SOCKET, so that - after use the same socket handle can be passed to AcceptEx. - Obviously only good where we don't have a persistent connection - to worry about. - ----- - -Note that all this is shot to bloody hell by HTTP-NG's multiplexing. -If fragment sizes are big enough, it could still be worthwhile to -do copy avoidence. It also causes performance issues because of -its credit system that limits how much you can write in a single -chunk. - -Don't tell me that if HTTP-NG becomes popular we will seen vendors -embedding SMUX (or whatever multiplexing is used) in the kernel to -get around this stuff. There we go, Apache with a loadable kernel -module. - ----- - -Larry McVoy's document for SGI regarding sendfile/TransmitFile: -ftp://ftp.bitmover.com/pub/splice.ps.gz diff --git a/buckets/doc_stacked_io.txt b/buckets/doc_stacked_io.txt deleted file mode 100644 index 9d2ac9ee8..000000000 --- a/buckets/doc_stacked_io.txt +++ /dev/null @@ -1,1312 +0,0 @@ -[djg: comments like this are from dean] - -This past summer, Alexei and I wrote a spec for an I/O Filters API... -this proposal addresses one part of that -- 'stacked' I/O with buff.c. - -We have a couple of options for stacked I/O: we can either use existing -code, such as sfio, or we can rewrite buff.c to do it. We've gone over -the first possibility at length, though, and there were problems with each -implemenation which was mentioned (licensing and compatibility, -specifically); so far as I know, those remain issues. - -Btw -- sfio will be supported w/in this model... it just wouldn't be the -basis for the model's implementation. - - -- Ed Korthof | Web Server Engineer -- - -- ed@organic.com | Organic Online, Inc -- - -- (415) 278-5676 | Fax: (415) 284-6891 -- - ---------------------------------------------------------------------------- -Stacked I/O With BUFFs - Sections: - - 1.) Overview - 2.) The API - User-supplied structures - API functions - 3.) Detailed Description - The bfilter structure - The bbottomfilter structure - The BUFF structure - Public functions in buff.c - 4.) Efficiency Considerations - Buffering - Memory copies - Function chaining - writev - 5.) Code in buff.c - Default Functions - Heuristics for writev - Writing - Reading - Flushing data - Closing stacks and filters - Flags and Options - -************************************************************************* - Overview - -The intention of this API is to make Apache's BUFF structure modular -while retaining high efficiency. Basically, it involves rewriting -buff.c to provide 'stacked' I/O -- where the data passed through a -series of 'filters', which may modify it. - -There are two parts to this, the core code for BUFF structures, and the -"filters" used to implement new behavior. "filter" is used to refer to -both the sets of 5 functions, as shown in the bfilter structure in the -next section, and to BUFFs which are created using a specific bfliter. -These will also be occasionally refered to as "user-supplied", though -the Apache core will need to use these as well for basic functions. - -The user-supplied functions should use only the public BUFF API, rather -than any internal details or functions. One thing which may not be -clear is that in the core BUFF functions, the BUFF pointer passed in -refers to the BUFF on which the operation will happen. OTOH, in the -user-supplied code, the BUFF passed in is the next buffer down the -chain, not the current one. - -************************************************************************* - The API - - User-supplied structures - -First, the bfilter structure is used in all filters: - typedef struct { - int (*writev)(BUFF *, void *, struct iovect *, int); - int (*read)(BUFF *, void *, char *, int); - int (*write)(BUFF *, void *, const char *, int); - int (*flush)(BUFF *, void *, const char *, int, bfilter *); - int (*transmitfile)(BUFF *, void *, file_info_ptr *); - void (*close)(BUFF *, void *); - } bfilter; - -bfilters are placed into a BUFF structure along with a -user-supplied void * pointer. - -Second, the following structure is for use with a filter which can -sit at the bottom of the stack: - - typedef struct { - void *(*bgetfileinfo)(BUFF *, void *); - void (*bpushfileinfo)(BUFF *, void *, void *); - } bbottomfilter; - - - BUFF API functions - -The following functions are new BUFF API functions: - -For filters: - -BUFF * bcreatestack(pool *p, int flags, struct bfilter *, - struct bbottomfilter *, void *); -BUFF * bpushfilter (BUFF *, struct bfilter *, void *); -BUFF * bpushbuffer (BUFF *, BUFF *); -BUFF * bpopfilter(BUFF *); -BUFF * bpopbuffer(BUFF *); -void bclosestack(BUFF *); - -For BUFFs in general: - -int btransmitfile(BUFF *, file_info_ptr *); -int bsetstackopts(BUFF *, int, const void *); -int bsetstackflags(BUFF *, int, int); - -Note that a new flag is needed for bsetstackflags: -B_MAXBUFFERING - -The current bcreate should become - -BUFF * bcreatebuffer (pool *p, int flags, struct bfilter *, void *); - -************************************************************************* - Detailed Explanation - - bfilter structure - -The void * pointer used in all these functions, as well as those in the -bbottomfilter structure and the filter API functions, is always the same -pointer w/in an individual BUFF. - -The first function in a bfilter structure is 'writev'; this is only -needed for high efficiency writing, generally at the level of the system -interface. In it's absence, multiple writes will be done w/ 'write'. -Note that defining 'writev' means you must define 'write'. - -The second is 'write'; this is the generic writing function, taking a BUFF -* to which to write, a block of text, and the length of that block of -text. The expected return is the number of characters (out of that block -of text) which were successfully processed (rather than the number of -characters actually written). - -The third is 'read'; this is the generic reading function, taking a BUFF * -from which to read data, and a void * buffer in which to put text, and the -number of characters to put in that buffer. The expected return is the -number of characters placed in the buffer. - -The fourth is 'flush'; this is intended to force the buffer to spit out -any data it may have been saving, as well as to clear any data the -BUFF code was storing. If the third argument is non-null, then it -contains more text to be printed; that text need not be null terminated, -but the fourth argument contains the length of text to be processed. The -expected return value should be the number of characters handled out -from the third argument (0 if there are none), or -1 on error. Finally, -the fifth argument is a pointer to the bfilter struct containing this -function, so that it may use the write or writev functions in it. Note -that general buffering is handled by BUFF's internal code, and module -writers should not store data for performance reasons. - -The fifth is 'transmitfile', which takes as its arguments a buffer to -which to write (if non-null), the void * pointer containing configuration -(or other) information for this filter, and a system-dependent pointer -(the file_info_ptr structure will be defined on a per-system basis) -containing information required to print the 'file' in question. -This is intended to allow zero-copy TCP in Win32. - -The sixth is 'close'; this is what is called when the connection is being -closed. The 'close' should not be passed on to the next filter in the -stack. Most filters will not need to use this, but if database handles -or some other object is created, this is the point at which to remove it. -Note that flush is called automatically before this. - - bbottomfilter Structure - -The first function, bgetfileinfo, is designed to allow Apache to get -information from a BUFF struct regarding the input and output sources. -This is currently used to get the input file number to select on a -socket to see if there's data waiting to be read. The information -returned is platform specific; the void * pointer passed in holds -the void * pointer passed to all user-supplied functions. - -The second function, bpushfileinfo, is used to push file information -onto a buffer, so that the buffer can be fully constructed and ready -to handle data as soon as possible after a client has connected. -The first void * pointer holds platform specific information (in -Unix, it would be a pair of file descriptors); the second holds the -void * pointer passed to all user-supplied functions. - -[djg: I don't think I really agree with the distinction here between -the bottom and the other filters. Take the select() example, it's -valid for any layer to define a fd that can be used for select... -in fact it's the topmost layer that should really get to make this -definition. Or maybe I just have your top and bottom flipped. In -any event I think this should be part of the filter structure and -not separate.] - - The BUFF structure - -A couple of changes are needed for this structure: remove fd and -fd_in; add a bfilter structure; add a pointer to a bbottomfilter; -add three pointers to the next BUFFs: one for the next BUFF in the -stack, one for the next BUFF which implements write, and one -for the next BUFF which implements read. - - - Public functions in buff.c - -BUFF * bpushfilter (BUFF *, struct bfilter *, void *); - -This function adds the filter functions from bfilter, stacking them on -top of the BUFF. It returns the new top BUFF, or NULL on error. - -BUFF * bpushbuffer (BUFF *, BUFF *); - -This function places the second buffer on the top of the stack that -the first one is on. It returns the new top BUFF, or NULL on error. - -BUFF * bpopfilter(BUFF *); -BUFF * bpopbuffer(BUFF *); - -Unattaches the top-most filter from the stack, and returns the new -top-level BUFF, or NULL on error or when there are no BUFFs -remaining. The two are synonymous. - -void bclosestack(BUFF *); - -Closes the I/O stack, removing all the filters in it. - -BUFF * bcreatestack(pool *p, int flags, struct bfilter *, - struct bbottomfilter *, void *); - -This creates an I/O stack. It returns NULL on error. - -BUFF * bcreatebuffer(pool *p, int flags, struct bfilter *, void *); - -This creates a BUFF for later use with bpushbuffer. The BUFF is -not set up to be used as an I/O stack, however. It returns NULL -on error. - -int bsetstackopts(BUFF *, int, const void *); -int bsetstackflags(BUFF *, int, int); - -These functions, respectively, set options on all the BUFFs in a -stack. The new flag, B_MAXBUFFERING is used to disable a feature -described in the next section, whereby only the first and last -BUFFs will buffer data. - -************************************************************************* - Efficiency Considerations - - Buffering - -All input and output is buffered by the standard buffering code. -People writing code to use buff.c should not concern themselves with -buffering for efficiency, and should not buffer except when necessary. - -The write function will typically be called with large blocks of text; -the read function will attempt to place the specified number of bytes -into the buffer. - -Dean noted that there are possible problems w/ multiple buffers; -further, some applications must not be buffered. This can be -partially dealt with by turning off buffering, or by flushing the -data when appropriate. - -However, some potential problems arise anyway. The simplest example -involves shrinking transformations; suppose that you have a set -of filters, A, B, and C, such that A outputs less text than it -recieves, as does B (say A strips comments, and B gzips the result). -Then after a write to A which fills the buffer, A writes to B. -However, A won't write enough to fill B's buffer, so a memory copy -will be needed. This continues till B's buffer fills up, then -B will write to C's buffer -- with the same effect. - -[djg: I don't think this is the issue I was really worried about -- -in the case of shrinking transformations you are already doing -non-trivial amounts of CPU activity with the data, and there's -no copying of data that you can eliminate anyway. I do recognize -that there are non-CPU intensive filters -- such as DMA-capable -hardware crypto cards. I don't think they're hard to support in -a zero-copy manner though.] - -The maximum additional number of bytes which will be copied in this -scenario is on the order of nk, where n is the total number of bytes, -and k is the number of filters doing shrinking transformations. - -There are several possible solutions to this issue. The first -is to turn off buffering in all but the first filter and the -last filter. This reduces the number of unnecessary byte copies -to at most one per byte, however it means that the functions in -the stack will get called more frequently; but it is the default -behavior, overridable by setting the B_MAXBUFFERING with -bsetstackflags. Most filters won't involve a net shrinking -transformation, so even this will rarely be an issue; however, -if the filters do involve a net shrinking transformation, for -the sake of network-efficiency (sending reasonably sized blocks), -it may be more efficient anyway. - -A second solution is more general use of writev for communication -between different buffers. This complicates the programing work, -however. - - - Memory copies - -Each write function is passed a pointer to constant text; if any changes -are being made to the text, it must be copied. However, if no changes -are made to the text (or to some smaller part of it), then it may be -sent to the next filter without any additional copying. This should -provide the minimal necessary memory copies. - -[djg: Unfortunately this makes it hard to support page-flipping and -async i/o because you don't have any reference counts on the data. -But I go into a little detail that already in docs/page_io.] - - Function chaining - -In order to avoid unnecessary function chaining for reads and writes, -when a filter is pushed onto the stack, the buff.c code will determine -which is the next BUFF which contains a read or write function, and -reads and writes, respectively, will go directly to that BUFF. - - writev - -writev is a function for efficient writing to the system; in terms of -this API, however, it also works for dealing with multiple blocks of -text without doing unnecessary byte copies. It is not required. - -Currently, the system level writev is used in two contexts: for -chunking and when a block of text is writen which, combined with -the text already in the buffer, would make the buffer overflow. - -writev would be implemented both by the default bottom level filter -and by the chunking filter for these operations. In addition, writev -may, be used, as noted above, to pass multiple blocks of text w/o -copying them into a single buffer. Note that if the next filter does -not implement writev, however, this will be equivalent to repeated -calls to write, which may or may not be more efficient. Up to -IOV_MAX-2 blocks of text may be passed along in this manner. Unlike -the system writev call, the writev in this API should be called only -once, with a array with iovec's and a count as to the number of -iovecs in it. - -If a bfilter defines writev, writev will be called whether or not -NO_WRITEV is set; hence, it should deal with that case in a reasonable -manner. - -[djg: We can't guarantee atomicity of writev() when we emulate it. -Probably not a problem, just an observation.] - -************************************************************************* - Code in buff.c - - Default Functions - -The default actions are generally those currently performed by Apache, -save that they they'll only attempt to write to a buffer, and they'll -return an error if there are no more buffers. That is, you must implement -read, write, and flush in the bottom-most filter. - -Except for close(), the default code will simply pass the function call -on to the next filter in the stack. Some samples follow. - - Heuristics for writev - -Currently, we call writev for chunking, and when we get a enough so that -the total overflows the buffer. Since chunking is going to become a -filter, the chunking filter will use writev; in addition, bwrite will -trigger bwritev as shown (note that system specific information should -be kept at the filter level): - -in bwrite: - - if (fb->outcnt > 0 && nbyte + fb->outcnt >= fb->bufsiz) { - /* build iovec structs */ - struct iovec vec[2]; - vec[0].iov_base = (void *) fb->outbase; - vec[0].iov_len = fb->outcnt; - fb->outcnt = 0; - vec[1].iov_base = (void *)buff; - vec[1].iov_length = nbyte; - return bwritev (fb, vec, 2); - } else if (nbye >= fb->bufsiz) { - return write_with_errors(fb,buff,nbyte); - } - -Note that the code above takes the place of large_write (as well -as taking code from it). - -So, bwritev would look something like this (copying and pasting freely -from the current source for writev_it_all, which could be replaced): - ------ -int bwritev (BUFF * fb, struct iovec * vec, int nvecs) { - if (!fb) - return -1; /* the bottom level filter implemented neither write nor - * writev. */ - if (fb->bfilter.bwritev) { - return bf->bfilter.writev(fb->next, vec, nvecs); - } else if (fb->bfilter.write) { - /* while it's nice an easy to build the vector and crud, it's painful - * to deal with partial writes (esp. w/ the vector) - */ - int i = 0,rv; - while (i < nvecs) { - do { - rv = fb->bfilter.write(fb, vec[i].iov_base, vec[i].iov_len); - } while (rv == -1 && (errno == EINTR || errno == EAGAIN) - && !(fb->flags & B_EOUT)); - if (rv == -1) { - if (errno != EINTR && errno != EAGAIN) { - doerror (fb, B_WR); - } - return -1; - } - fb->bytes_sent += rv; - /* recalculate vec to deal with partial writes */ - while (rv > 0) { - if (rv < vec[i].iov_len) { - vec[i].iov_base = (char *)vec[i].iov_base + rv; - vec[i].iov_len -= rv; - rv = 0; - if (vec[i].iov_len == 0) { - ++i; - } - } else { - rv -= vec[i].iov_len; - ++i; - } - } - if (fb->flags & B_EOUT) - return -1; - } - /* if we got here, we wrote it all */ - return 0; - } else { - return bwritev(fb->next,vec,nvecs); - } -} ------ -The default filter's writev function will pretty much like -writev_it_all. - - - Writing - -The general case for writing data is significantly simpler with this -model. Because special cases are not dealt with in the BUFF core, -a single internal interface to writing data is possible; I'm going -to assume it's reasonable to standardize on write_with_errors, but -some other function may be more appropriate. - -In the revised bwrite (which I'll ommit for brievity), the following -must be done: - check for error conditions - check to see if any buffering is done; if not, send the data - directly to the write_with_errors function - check to see if we should use writev or write_with_errors - as above - copy the data to the buffer (we know it fits since we didn't - need writev or write_with_errors) - -The other work the current bwrite is doing is - ifdef'ing around NO_WRITEV - numerous decisions regarding whether or not to send chunks - -Generally, buff.c has a number of functions whose entire purpose is -to handle particular special cases wrt chunking, all of which could -be simplified with a chunking filter. - -write_with_errors would not need to change; buff_write would. Here -is a new version of it: - ------ -/* the lowest level writing primitive */ -static ap_inline int buff_write(BUFF *fb, const void *buf, int nbyte) -{ - if (fb->bfilter.write) - return fb->bfilter.write(fb->next_writer,buff,nbyte); - else - return bwrite(fb->next_writer,buff,nbyte); -} ------ - -If the btransmitfile function is called on a buffer which doesn't implement -it, the system will attempt to read data from the file identified -by the file_info_ptr structure and use other methods to write to it. - - Reading - -One of the basic reading functions in Apache 1.3b3 is buff_read; -here is how it would look within this spec: - ------ -/* the lowest level reading primitive */ -static ap_inline int buff_read(BUFF *fb, void *buf, int nbyte) -{ - int rv; - - if (!fb) - return -1; /* the bottom level filter is not set up properly */ - - if (fb->bfilter.read) - return fb->bfilter.read(fb->next_reader,buf,nbyte,fb->bfilter_info); - else - return bread(fb->next_reader,buff,nbyte); -} ------ -The code currently in buff_read would become part of the default -filter. - - - Flushing data - -flush will get passed on down the stack automatically, with recursive -calls to bflush. The user-supplied flush function will be called then, -and also before close is called. The user-supplied flush should not -call flush on the next buffer. - -[djg: Poorly written "expanding" filters can cause some nastiness -here. In order to flush a layer you have to write out your current -buffer, and that may cause the layer below to overflow a buffer and -flush it. If the filter is expanding then it may have to add more to -the buffer before flushing it to the layer below. It's possible that -the layer below will end up having to flush twice. It's a case where -writev-like capabilities are useful.] - - Closing Stacks and Filters - -When a filter is removed from the stack, flush will be called then close -will be called. When the entire stack is being closed, this operation -will be done automatically on each filter within the stack; generally, -filters should not operate on other filters further down the stack, -except to pass data along when flush is called. - - Flags and Options - -Changes to flags and options using the current functions only affect -one buffer. To affect all the buffers on down the chain, use -bsetstackopts or bsetstackflags. - -bgetopt is currently only used to grab a count of the bytes sent; -it will continue to provide that functionality. bgetflags is -used to provide information on whether or not the connection is -still open; it'll continue to provide that functionality as well. - -The core BUFF operations will remain, though some operations which -are done via flags and options will be done by attaching appropriate -filters instead (eg. chunking). - -[djg: I'd like to consider filesystem metadata as well -- we only need -a few bits of metadata to do HTTP: file size and last modified. We -need an etag generation function, it is specific to the filters in -use. You see, I'm envisioning a bottom layer which pulls data out of -a database rather than reading from a file.] - - -************************************************************** -************************************************************** -Date: Wed, 9 Sep 1998 18:55:40 -0700 (PDT) -From: Alexei Kosut <akosut@leland.stanford.edu> -To: new-httpd@apache.org -Subject: A Magic Cache example -Message-ID: <Pine.GSO.3.96.980909182642.29690A-100000@myth1.Stanford.EDU> - -During the drive home, I came up with a good example of how I envision the -new module/cache/layer model thingy working. Comments please: - -The middle end of the server is responsible for taking the request the -front end gives it and somehow telling the back end how to fulfill it. I -look at it like this: The request is a URI (Uniform Resource Identifier) -and a set of request dimensions (the request headers, the remote IP -address, the time of day, etc...). The middle end, via its configuration, -translates this into a request for content from a backing store module, -plus possibly some filter modules. Since the term "filename" is too -flat-file specific, let's call the parameter we pass to the backing store -a SRI (Specific Resource Identifier), in a format specific to that module. - -Our example is similar to the one I was using earlier, with some -additions: The request is for a URI, say "/skzb/teckla.html". The response -is a lookup from a (slow) database. The URI maps to the mod_database SRI -of "BOOK:0-441-7997-9" (I made that format up). We want to take that -output and convert it from whatever charset it's in into Unicode. We then -have a PHP script that works on a Unicode document and does things based -on whether the browser is Netscape or not. Then we translate the document -to the best charset that matches the characters used and the client's -capabilities and send it. - -So upon request for /skzb/teckla.html, the middle end translates the -request into the following "equation": - - SRI: mod_database("BOOK:0-441-7997-9") - + filter: mod_charset("Unicode") - + filter: mod_php() - + fllter: mod_charset("best_fit") - ------------------------------------------------- - URI: /skzb/teckla.html - -It then constructs a stack of IO (NSPR) filters like this: - -mod_database -> cache-write -> mod_charset -> cache-write -> mod_php -> -cache_write -> mod_charset -> cache-write -> client - -And sets it to running. Each of the cache filters is a write-through -filter that copies its data into the cache with a tag based on what -equation the middle end uses to get to it, plus the request dimensions it -uses (info it gets from the modules). - -The database access is stored under "SRI: mod_database(BOOK:0-441-79977-9" -with no dimensions (because it's the same for all requests). The first -charset manipulation is stored under "SRI: mod_database(BOOK...) + filter: -mod_charset(Unicode)", again with no dimensions. The PHP output is stored -under "SRI: mod_database(BOOK...) + filter: mod_charset(Unicode) + filter: -mod_php()" with dimesions of (User-Agent). The final output is stored both -as "SRI: mod_database(BOOK...) + filter: mod_charset(Unicode) + filter: -mod_php() + filter: mod_charset(best_fit)" and "URI: /skzb/teckla.html" -(they're the same thing), both with dimensions of (User-Agent, -Accept-Charset). - -So far so good. Now, when another request for /skzb/teckla.html comes in, -the cache is consulted to see how much we can use. First, the URI is -looked up. This can be done by a kernel or other streamlined part of the -server. So "URI: /skzb/teckla.html" is looked up, and one entry pops out -with dimensions of (User-Agent, Accept-Charset). The user-agent and -accept-charset of the request are compared against the ones of the stored -entiry(ies). If one matches, it can be sent directly. - -If not, the server proceeds to look up "SRI: mod_database(BOOK...) + -filter: mod_charset(Unicode) + filter: mod_php()". If the request has a -different accept-charset, but the same user-agent, then this can be -reprocessed by mod_charset and used. Otherwise, the server proceeds back -to "SRI: mod_database(BOOK...) + filter: mod_charset(Unicode)", which will -match any request. There's probably some sort of cache invalidation -(expires, etc...) that happens eventually to result in a new database -lookup, but mostly, that very costly operation is avoided. - -I think I've made it out to be a bit more complicated than it is, with the -long equation strings mixed in there. But the above reflects my -understanding of how the new Apache 2.0 system should work. - -Note 1: The cache is smarter than I make it out here when it comes to -adding new entries. It should realize that, since the translation to -Unicode doesn't change or restrict the dimensions of the request, it -really is pointless to cache the original database lookup, since it will -always be translated in exactly the same manner. Knowing this, it will -only cache the Unicode version. - -Note 2: PHP probably doesn't work with Unicode. And there may not be a way -to identify a script as only acting on the User-Agent dimension. That's -not the point. - -Note 3: Ten bonus points to anyone who's read this far, and is the first -person to answer today's trivia question: What does the skzb referred to -in the example URI stand for? There's enough information in this mail to -figure it out (with some help from the Net), even if you don't know -offhand (though if you do, I'd be happier). - --- Alexei Kosut <akosut@stanford.edu> <http://www.stanford.edu/~akosut/> - Stanford University, Class of 2001 * Apache <http://www.apache.org> * - - -************************************************************** -Message-ID: <19980922224326.A16219@aisa.fi.muni.cz> -Date: Tue, 22 Sep 1998 22:43:26 +0200 -From: Honza Pazdziora <adelton@informatics.muni.cz> -To: new-httpd@apache.org -Subject: Re: I/O Layering in next version of Apache. -References: <19980922111627.19784.qmail@hyperreal.org> <3607D53A.1FF6D93@algroup.co.uk> <13831.55021.929560.977122@zap.ml.org> -In-Reply-To: <13831.55021.929560.977122@zap.ml.org>; from Ben Hyde on Tue, Sep 22, 1998 at 01:04:12PM -0400 - -> >Does anyone have a starting point for layered I/O? I know we kicked it - -Hello, - -there has been a thread on modperl mailing list recently about -problems we have with the current architecture. Some of the points -were: what requerements will be put on modules to be new I/O -compliant. I believe it's the Apache::SSI vs. Apache::SSIChain -difference between 1.3.* and 2.*. The first fetches the file _and_ -does the SSI, the second takes input from a different module that -either gets the HTML or runs the CGI or so, and processes its output. -Should all modules be capable of working on some other module's -output? Probably except those that actually go to disk or database for -the primary data. - -Randal's point was that output of any module could be processed, so -that no module should make any assumption whether it's sending data -directly to the browser or to some other module. This can be used both -for caching, but it also one of the things to get the filtering -transparent. - -Also, as Apache::GzipChain module shows, once you process the output, -you may need to modify the headers as well. I was hit by this when I -tried to convert between charsets, to send out those that the browsers -would understand. The Apache::Mason module shows that you can build -a page from pieces. Each of the pieces might have different -characteristics (charset, for example), so with each piece of code we -might need to have its own headers that describe it, or at least the -difference between the final (global) header-outs and its local. - -Sorry for bringing so much Perl module names in, but modperl is -currently a way to get some layered I/O done in 1.3.*, so I only have -practical experiance with it. - -Yours, - ------------------------------------------------------------------------- - Honza Pazdziora | adelton@fi.muni.cz | http://www.fi.muni.cz/~adelton/ - I can take or leave it if I please ------------------------------------------------------------------------- - -************************************************************** -Date: Wed, 23 Sep 1998 10:46:47 -0700 (PDT) -From: Dean Gaudet <dgaudet@arctic.org> -To: new-httpd@apache.org -Subject: Re: I/O Layering in next version of Apache. -In-Reply-To: <36092F2D.BCC4E5C1@algroup.co.uk> -Message-ID: <Pine.LNX.3.96dg4.980923103916.24223K-100000@twinlark.arctic.org> - -On Wed, 23 Sep 1998, Ben Laurie wrote: - -> Dean Gaudet wrote: -> > -> > On Wed, 23 Sep 1998, Ben Laurie wrote: -> > -> > > Is the simplest model that accomodates this actually just a stack -> > > (tree?) of webservers? Naturally, we wouldn't talk HTTP between the -> > > layers, but pass (header,content) pairs around (effectively). -> > > Interesting. -> > -> > We could just talk "compiled" HTTP -- using a parsed representation of -> > everything essentially. -> -> That's pretty much what I had in mind - but does it make sense? I have -> to admit, it makes a certain amount of sense to me, but I still have -> this nagging suspicion that there's a catch. - -We talked about this during the developers meeting earlier this summer... -while we were hiking, so I don't think there were any notes. - -I think it'd be a useful exercise to specify a few example applications we -want to be able to support, and then consider methods of implementing -those applications. Make the set as diverse and small as possible. I'll -take the easiest one :) - -- serve static content from arbitrary backing store (e.g. file, database) - -Once we flesh such a list out it may be easier to consider implementation -variations... - -I think it was Cliff who said it this way: in a multiple layer setup he -wants to be able to partition the layers across servers in an arbtrary -manner. For example, a proxy cache on one box which the world talks to, -and which backends to various other boxes for dynamic and static content. -Or maybe the static content is on the same server as the proxy. If this is -something we want to support then talking (a restricted form of) HTTP -between layers is interesting. - -Now we can all start worrying about performance ;) - -Dean - - -************************************************************** -Date: Wed, 23 Sep 1998 11:23:30 -0700 (PDT) -From: Alexei Kosut <akosut@leland.stanford.edu> -To: new-httpd@apache.org -Subject: Re: I/O Layering in next version of Apache. -In-Reply-To: <36092F2D.BCC4E5C1@algroup.co.uk> -Message-ID: <Pine.GSO.3.96.980923111613.17322C-100000@myth6.Stanford.EDU> - -On Wed, 23 Sep 1998, Ben Laurie wrote: - -> > We could just talk "compiled" HTTP -- using a parsed representation of -> > everything essentially. -> -> That's pretty much what I had in mind - but does it make sense? I have -> to admit, it makes a certain amount of sense to me, but I still have -> this nagging suspicion that there's a catch. - -One important thing to note is that we want this server to be able to -handle non-HTTP requests. So using HTTP as the internal language (as we do -now) is not the way to go. What we talked about in SF was using a basic -set of key/value pairs to represent the metadata of the response. Which -would of course bear an uncanny resemblance to HTTP-style MIME headers... - -Certainly, and this is the point I think the originator of this thread -raised, each module layer (see the emails I sent a few weeks ago for more -details on how I see *that*) needs to provide both a content filter and a -metadata filter. Certainly a module that does encoding has to be able to -alter the headers to add a Content-Encoding, Transfer-Encoding, TE, or -what have you. Many module that does anything to the content will -want to add headers, and many others will need to alter the dimensions on -which the request is served, or what the parameters to those dimensions -are for the current request. The latter is absolutely vital for cacheing. - -The problem, as I see it, is this: Often, I suspect it will be the case -that the module does not know what metadata it will be altering (and how) -until after it has processed the request. i.e., a PHP script may not -discover what dimensions it uses (as we discussed earlier) until after it -has parsed the entire script. But if the module is functioning as an -in-place filter, that can cause massive headaches if we need the metadata -in a complete form *before* we sent the entity, as we do for HTTP. - -I'm not quite sure how to solve that problem. Anyone have any brilliant -ideas? - -(Note that for internal caching, we don't actually need the dimension data -until after the request, because we can alter the state of the cache at -any time, but if we want to place nice with HTTP and send Vary: headers -and such, we do need that information. I guess we could send Vary: -footers...) - --- Alexei Kosut <akosut@stanford.edu> <http://www.stanford.edu/~akosut/> - Stanford University, Class of 2001 * Apache <http://www.apache.org> * - - -************************************************************** -Date: 23 Sep 1998 20:26:58 -0000 -Message-ID: <19980923202658.25736.qmail@zap.ml.org> -From: Ben Hyde <bhyde@pobox.com> -To: new-httpd@apache.org -Subject: Stacking up Response Handling -In-Reply-To: <Pine.GSO.3.96.980923111613.17322C-100000@myth6.Stanford.EDU> -References: <36092F2D.BCC4E5C1@algroup.co.uk> - <Pine.GSO.3.96.980923111613.17322C-100000@myth6.Stanford.EDU> - -Alexei Kosut writes: ->The problem, as I see it, is this: Often, I suspect it will be the case ->that the module does not know what metadata it will be altering (and how) ->until after it has processed the request. i.e., a PHP script may not ->discover what dimensions it uses (as we discussed earlier) until after it ->has parsed the entire script. But if the module is functioning as an ->in-place filter, that can cause massive headaches if we need the metadata ->in a complete form *before* we sent the entity, as we do for HTTP. -> ->I'm not quite sure how to solve that problem. Anyone have any brilliant ->ideas? - -This is the same as building a layout engine that incremental layout -but simpler since I doubt we'd want to allow for reflow. - -Sometimes you can send output right along, sometimes you have to wait. -I visualize the output as a tree/outline and as it is swept out a -stack holds the path to the leave. Handlers for the individual nodes -wait or proceed depending on if they can. - -It's pretty design with the pipeline consisting of this stack of -output transformers/generators. Each pipeline stage accepts a stream -of output_chunks. I think of these output_chunks as coming in plenty -of flavors, for example transmit_file, transmit_memory, etc. Some -pipeline stages might handle very symbolic chunks. For example -transmit_xml_tree might be handed to transform_xml_to_html stage in -the pipeline. - -I'm assuming the core server would have only a few kinds of pipeline -nodes, generate_response, generate_content_from_url_via_file_system, -generate_via_classic_module_api. Things like convert_char_set or -do_cool_transfer_encoding, could easily be loaded at runtime and -authored outside the core. That would be nice. - -For typical fast responses we wouldn't push much on this stack at -all. It might go something like this: Push generate_response node, -it selects an appropriate content generator by consulting the -module community and pushes that. Often this is -generate_content_from_url_via_file_system which in turn does -all that ugly mapping to a file name and then passes -transmit_file down the pipeline and pops it's self off the stack. -generate_response once back on top again does the transmit and -pops off. - -For rich complex output generation we might push all kinds of things -(charset converters, transfer encoders, XML -> HTML rewriters, cache -builders, old style apache module API simulators, what ever). - -The intra-stack element protocol get's interesting around issues -like error handling, blocking, etc. - -I particularly like how this allows simulation of the old module API, -as well as the API of other servers, and experimenting with other -module API which cross process or machine boundaries. - -In many ways this isn't that much different from what was proposed -a year ago. - - - ben - -************************************************************** -From: Ben Hyde <bhyde@pobox.com> -Date: Wed, 23 Sep 1998 21:58:54 -0400 (EDT) -To: new-httpd@apache.org -Subject: Re: Core server caching -In-Reply-To: <Pine.GSO.3.96.980923142800.14009A-100000@elaine40.Stanford.EDU> -References: <19980923210119.25763.qmail@zap.ml.org> - <Pine.GSO.3.96.980923142800.14009A-100000@elaine40.Stanford.EDU> -Message-ID: <13833.39467.942203.885143@zap.ml.org> - -Alexei Kosut writes: ->On 23 Sep 1998, Ben Hyde wrote: -> ->> The core problem of caching seems to me to get confused by the ->> complexity of designing a caching proxy. If one ignores that then the ->> core problem of caching seems quite simple. -> ->Actually, for an HTTP server, they're the same problem, if you want to be ->able to cache any sort of dynamic request. And caching static requests is ->kind of silly (Dean's flow stuff notwithstanding, making copies of static ->files in either memory or on disk is silly, since the OS can do it better ->than we can). - -I don't disagree with any of the things you said, so I guess I'm -failing to get across where in this structure the functions your -pointing out as necessary would reside as versus where the "chunk -cache" mechanism I'm yearning for would fit. - -Well, that's not entirely true I do feel it's helpful to make this -point. - -The HTTP spec's definition of proper caching is terribly constrained -by the poverty of information available to the proxy server. He is -trapped in the middle between an opinionated content provider and an -opinionated content consumer. It was written in an attempt to keep -people like AOL from making their opinions dominate either of those -other two. Proper caching by a server that is right next to the -content generation can and ought to include both more or less -heuristics that are tunable by the opinions of the content provider -who presumably we are right next to. - -Imagine the server that has a loop that goes like so: - - loop - r<-swallow_incomming_request - h<-select_response_handler(r) - initialize_response_pipeline() - push_pipeline_element(h) - tend_pipeline_until_done() - end loop - -In most of the web based applications I've seen the -select_response_handler step evolves into something that looks like an -AI expert system. That said, what I'd like to see is in Apache2 is a -simple dispatch along with a way to plug-in more complex dispatching -mechanisms. I'd very much like to avoid having that get confused with -the suite of response_handlers. - -I ignored the complexity of when to you can safely select -a cached value because I think it's in the select_response_handler -step. And possibly, I'll admit, not part of what I called the -"core server" - -Clearly I'm a fool for using this term 'core server' since it -doesn't mean anything. I wanted it to mean that loop above -and the most minimal implementations for the pipeline and -the select_response_handler one could imagine before starting -to pile on. The server as shipped would have a lot more -stuff in it! - -What I'm focused on is what has to be in that core versus -what has to be, but can be outside of it. - -So. as i thought about the state of the pipeline just after -the call on initialize_response_pipeline I at first thought -it would have something much like the current buffer abstraction -in the pipeline. Then i got to wondering if transfer encoding, -charset conversion, or caching ought to be in there. - -I think there is an argument for putting some caching functionality -in there. Possibly because that entire knot is what you'd move -into the OS if you could. Possibly because this is the bit -that must fly. - -Recall that I think the pipeline takes a stream of response -chunks with things like memory_chunk, transfer_file_chunk, etc. -in that stream. The question is what flavors of chunks does -that bottom element in the pipeline take. It's the chunks -that fly (and nothing more?). So I got to thinking about -what does it mean to have a cached_chunk. - -A cached_chunk needs only the small operation set along -the lines of what I mentioned. A full caching scheme -can build on it. As an added benefit the caching scheme -can be dumb, standard, extremely witty without effecting -this portion of the design. - -A quick point about why I wanted the cache to handle things -smaller than entire responses. This isn't central I guess. - -I want a protocol with content generators that encourages -them to use dynamic programming tricks to quickly generate -portions of pages that are static over long periods. Such -a scheme has worked well in systems we've built. - - - ben hyde - -************************************************************** -From: Ben Hyde <bhyde@pobox.com> -Date: Thu, 29 Oct 1998 23:16:37 -0500 (EST) -To: new-httpd@apache.org -Subject: Re: Core server caching -In-Reply-To: <Pine.LNX.3.96dg4.981029175439.3639X-100000@twinlark.arctic.org> -References: <Pine.WNT.4.05.9810292049480.-445955@helium.jetpen.com> - <Pine.LNX.3.96dg4.981029175439.3639X-100000@twinlark.arctic.org> -Message-ID: <13881.12903.661334.819447@zap.ml.org> - -Dean Gaudet writes: ->On Thu, 29 Oct 1998, Rasmus Lerdorf wrote: -> ->> There are also weird and wacky things you would be able to do if you could ->> stack mod_php on top of mod_perl. -> ->You people scare me. -> ->Isn't that redundant though? -> ->Dean - -Yes it's scary, but oddly erotic, when these behemoths with their -gigantic interpreters try to mate. - -It's interesting syndrome, systems as soon as they get an interpreter -they tend to loose their bearings and grow into vast behemoths that -lumber about slowly crushing little problems with their vast mass. -Turing syndrome? - -I've heard people say modules can help avoid this, but I've rarely -seen it. Olde Unix kinda manages it remember being frightened by -awk. - -Can we nudge alloc.c/buff.c toward a bit of connective glue that -continues to let individual modules evolve their own gigantism while -avoiding vile effects on the core performance of the server? Stuff -like this: - - memory chunk alignment for optimal I/O - memory hand off along the pipeline - memory hand off crossing pool boundaries - memory hand off in zero copy cases - transmit file - transmit cache elements - insert/remove cache elements - leverage unique hardware and instructions - -That memcpy in ap_bread really bugs me. - -I'd be rather have routines that let me handoff chunks. Presumably -these would need to be able to move chunks across pool and buffer -boundaries. But zero copy if I don't touch the content and never a -memcpy just to let my lex the input. - -I've built systems like this with the buffers exposing a emacs -buffer style of abstraction, but with special kinds of marks -to denote what's released for sending, and what's been accepted -and lex'd on the input side. It does create mean all your -lexical and printf stuff has to be able to smoothly slide -over chunk boundaries. - - - ben - -************************************************************************* -Date: Sun, 27 Dec 1998 13:08:22 -0800 (PST) -From: Ed Korthof <ed@bitmechanic.com> -To: new-httpd@apache.org -Subject: I/O filters & reference counts -Message-ID: <Pine.LNX.3.96.981224163237.10687E-100000@crankshaft> - -Hi -- - -A while back, I indicated I'd propose a way to do reference counts w/ the -layered I/O I want to implement for 2.0 (assuming we don't use nspr)... -for single-threaded Apache, this seems unnecessary (assuming you don't use -shared memory in your filters to share data amoung the processes), but in -other situations it does have advantages. - -Anyway, what I'd propose involves using a special syntax when you want to -use reference counts. This allows Apache to continue using the -'pool'-based memory system (it may not be perfect, but imo it's reasonably -good), without creating difficult when you wish to free memory. - -If you're creating memory which you'll want to share amoung multiple -threads, you'll create it using a function more or less like: - - ap_palloc_share(pool *p, size_t size); - -you get back a void * pointer for use as normal. When you want to give -someone else a reference to it, you do the following: - - ap_pshare_data(pool *p1, pool *p2, void * data); - -where data is the return from above (and it must be the same). Then both -pools have a reference to the data & to a counter; when each pool is -cleaned up, it will automatically decrement the counter, and free the data -if the counter is down to zero. - -In addition, a pool can decrement the counter with the following: - - ap_pshare_free(pool * p1, void * data); - -after which the data may be freed. There would also be a function, - - ap_pshare_countrefs(pool * p1, void * data); - -which would return the number of pools holding a ref to 'data', or 1 if -it's not a shared block. - -Internally, the pool might either keep a list of the shared blocks, or a -balanced b-tree; if those are too slow, I'd look into passing back and -forth a (pointer to an) int, and simply use an array. The filter -declaring the shared memory would need to keep track of such an int, but -no one else would. - -In the context of I/O filters, this would mean that each read function -returns a const char *, which should not be cast to a non-const char * (at -least, not without calling ap_pshare_countrefs()). If a filter screwed -this up, you'd have a problem -- but that's more or less unavoidable with -sharing data amoung threads using reference counts. - -It might make sense to build a more general reference counting system; if -that's what people want, I'm also up for working on that. But one of the -advantages the pool system has is its simplicity, some of which would be -lost. - -Anyway, how does this sound? Reasonable or absurd? - -Thanks -- - -Ed - ---------------------------------------- -History repeats itself, first as tragedy, second as farce. - Karl Marx - -************************************************************************* -From: Ben Hyde <bhyde@pobox.com> -Date: Tue, 29 Dec 1998 11:50:01 -0500 (EST) -To: new-httpd@apache.org -Subject: Re: I/O filters & reference counts -In-Reply-To: <Pine.LNX.3.96.981227192210.10687H-100000@crankshaft> -References: <Pine.GSO.3.96.981227185303.8793B-100000@elaine21.Stanford.EDU> - <Pine.LNX.3.96.981227192210.10687H-100000@crankshaft> -Message-ID: <13960.60942.186393.799490@zap.ml.org> - - -There are two problems that reference counts address that we have, -but I still don't like them. - -These two are: pipeline memory management, and response paste up. A -good pipeline ought not _require_ memory proportional to the size of -the response but only proportional to the diameter of the pipe. -Response paste up is interesting because the library of clip art is -longer lived than the response or connection pool. There is a lot to -be said for leveraging the configuration pool life cycle for this kind -of thing. - -The pipeline design, and the handling of the memory it uses become -very entangled after a while - I can't think about one without the -other. This is the right place to look at this problem. I.e. this -is a problem to be lead by buff.c rework, not alloc.c rework. - -Many pipeline operations require tight coupling to primitive -operations that happen to be efficient. Neat instructions, memory -mapping, etc. Extreme efficiency in this pipeline makes it desirable -that the chunks in the pipeline be large. I like the phrase "chunks -and pumps" to summarize that there are two elements to design to get -modularity right here. - -The pasteup problem - one yearns for a library of fragments (call it a -cache, clip art, or templates if you like) which then readers in that -library can assemble these into responses. Some librarians like to -discard stale bits and they need a scheme to know that the readers -have all finished. The library resides in a pool that lives longer -than a single response connection. If the librarian can be convinced -that the server restart cycles are useful we get to a fall back to -there. - -I can't smell yet where the paste up problem belong in the 2.0 design -problem. (a) in the core, (b) in a module, (c) as a subpart of the -pipeline design, or (d) ostracized outside 2.0 to await a gift (XML?) -we then fold into Apache. I could probably argue any one of these. A -good coupling between this mechanism and the pipeline is good, limits -on the pipeline design space are very good. - - - ben - - -************************************************************************* -Date: Mon, 4 Jan 1999 18:26:36 -0800 (PST) -From: Ed Korthof <ed@bitmechanic.com> -To: new-httpd@apache.org -Subject: Re: I/O filters & reference counts -In-Reply-To: <13960.60942.186393.799490@zap.ml.org> -Message-ID: <Pine.LNX.3.96.981231094653.486R-100000@crankshaft> - -On Tue, 29 Dec 1998, Ben Hyde wrote: - -> There are two problems that reference counts address that we have, -> but I still don't like them. - -They certainly add some clutter. But they offer a solution to the -problems listed below... and specifically to an issue which you brought up -a while back: avoiding a memcpy in each read layer which has a read -function other than the default one. Sometimes a memcpy is required, -sometimes not; with "reference counts", you can go either way. - -> These two are: pipeline memory management, and response paste up. A -> good pipeline ought not _require_ memory proportional to the size of -> the response but only proportional to the diameter of the pipe. -> Response paste up is interesting because the library of clip art is -> longer lived than the response or connection pool. There is a lot to -> be said for leveraging the configuration pool life cycle for this kind -> of thing. - -I was indeed assuming that we would use pools which would last from one -restart (and a run through of the configuration functions) to the next. - -So far as limiting the memory requirements of the pipeline -- this is -primarily a function of the module programming. Because the pipeline will -generally live in a single thread (with the possible exception of the data -source, which could be another processes), the thread will only be -operating on a single filter at a time (unless you added custom code to -create a new thread to handle one part of the pipeline -- ugg). - -For writing, the idea would be to print one or more blocks of text with -each call; wait for the write function to return; and then recycle the -buffers used. - -Reading has no writev equivalent, so you only be able to do it one block -at a time, but this seems alright to me (reading data is actually a much -less complicated procedure in practice -- at least, with the applications -which I've seen). - -Recycling read buffers (so as to limit the size of the memory pipeline) -is the hardest part, when we add in this 'reference count' scheme -- but -it can be done, if the modules recieving the data are polite and indicate -when they're done with the buffer. Ie.: - - module 1 module 2 -1.) reads from module 2: - char * ap_bread(BUFF *, pool *, int); - -2.) returns a block of text w/ ref counts: - str= char* ap_pshare_alloc(size_t); - ... - return str; - keeps a ref to str. - -3.) handles the block of data - returned, and indicates it's - finished with: - void ap_pshare_free(char * block); - reads more data via - char * ap_bread(BUFF *, pool *, int); - -4.) tries to recycle the buffer used: - if (ap_pshare_count_refs(str)==1) - reuse str - else - str = ap_pshare_alloc(...) - ... - return str; - -5.) handles the block of data - returned... -... - -One disadvantage is that if module 1 doesn't release its hold on a memory -block it got from step 2 until step 5, then the memory block wouldn't be -reused -- you'd pay w/ a free & a malloc (or with a significant increase -in complexity -- I'd probably choose the free & malloc). And if the module -failed to release the memory (via ap_pshare_free), then the memory -requirements would be as large as the response (or request). - -I believe this is only relevant for clients PUTting large files onto their -servers; but w/ files which are potentially many gigabytes, it is -important that filters handling reading do this correctly. Of course, -that's currently the situation anyhow. - -> The pipeline design, and the handling of the memory it uses become -> very entangled after a while - I can't think about one without the -> other. This is the right place to look at this problem. I.e. this -> is a problem to be lead by buff.c rework, not alloc.c rework. - -Yeah, after thinking about it a little bit I realized that no (or very -little) alloc.c work would be needed to implement the system which I -described. Basically, you'd have an Apache API function which does malloc -on its own, and other functions (also in the API) which register a cleanup -function (for the malloc'ed memory) in appropriate pools. - -IMO, the 'pipeline' is likely to be the easiest place to work with this, -at least in terms of getting the most efficient & clean design which we -can. - -[snip good comments] -> I can't smell yet where the paste up problem belong in the 2.0 design -> problem. (a) in the core, (b) in a module, (c) as a subpart of the -> pipeline design, or (d) ostracized outside 2.0 to await a gift (XML?) -> we then fold into Apache. I could probably argue any one of these. A -> good coupling between this mechanism and the pipeline is good, limits -> on the pipeline design space are very good. - -An overdesigned pipeline system (or an overly large one) would definitely -not be helpful. If it would be useful, I'm happy to work on this (even if -y'all aren't sure if you'd want to use it); if not, I'm sure I can find -things to do with my time. <g> - -Anyway, I went to CPAN and got a copy of sfio... the latest version I -found is from Oct, 1997. I'd guess that using it (assuming this is -possible) might give us slightly less efficency (simply because sfio -wasn't built specifically for Apache, and customizing it is a much more -involved processes), but possibly fewer bugs to work out & lots of -interesting features. - -thanks -- - -Ed, slowly reading through the sfio source code - diff --git a/buckets/doc_wishes.txt b/buckets/doc_wishes.txt deleted file mode 100644 index c85d01ae0..000000000 --- a/buckets/doc_wishes.txt +++ /dev/null @@ -1,269 +0,0 @@ -Wishes -- use cases for layered IO -================================== - -[Feel free to add your own] - -Dirk's original list: ---------------------- - - This file is there so that I do not have to remind myself - about the reasons for Layered IO, apart from the obvious one. - - 0. To get away from a 1 to 1 mapping - - i.e. a single URI can cause multiple backend requests, - in arbitrary configurations, such as in paralel, tunnel/piped, - or in some sort of funnel mode. Such multiple backend - requests, with fully layered IO can be treated exactly - like any URI request; and recursion is born :-) - - 1. To do on the fly charset conversion - - Be, theoretically, be able to send out your content using - latin1, latin2 or any other charset; generated from static - _and_ dynamic content in other charsets (typically unicode - encoded as UTF7 or UTF8). Such conversion is prompted by - things like the user-agent string, a cookie, or other hints - about the capabilities of the OS, language preferences and - other (in)capabilities of the final receipient. - - 2. To be able to do fancy templates - - Have your application/cgi sending out an XML structure of - field/value pair-ed contents; which is substituted into a - template by the web server; possibly based on information - accessible/known to the webserver which you do not want to - be known to the backend script. Ideally that template would - be just as easy to generate by a backend as well (see 0). - - 3. On the fly translation - - And other general text and output mungling, such as translating - an english page in spanish whilst it goes through your Proxy, - or JPEG-ing a GIF generated by mod_perl+gd. - - Dw. - - -Dean's canonical list of use cases ----------------------------------- - -Date: Mon, 27 Mar 2000 17:37:25 -0800 (PST) -From: Dean Gaudet <dgaudet-list-new-httpd@arctic.org> -To: new-httpd@apache.org -Subject: canonical list of i/o layering use cases -Message-ID: <Pine.LNX.4.21.0003271648270.14812-100000@twinlark.arctic.org> - -i really hope this helps this discussion move forward. - -the following is the list of all applications i know of which have been -proposed to benefit from i/o layering. - -- data sink abstractions: - - memory destination (for ipc; for caching; or even for abstracting - things such as strings, which can be treated as an i/o - object) - - pipe/socket destination - - portability variations on the above - -- data source abstraction, such as: - - file source (includes proxy caching) - - memory source (includes most dynamic content generation) - - network source (TCP-to-TCP proxying) - - database source (which is probably, under the covers, something like - a memory source mapped from the db process on the same box, - or from a network source on another box) - - portability variations in the above sources - -- filters: - - encryption - - translation (ebcdic, unicode) - - compression - - chunking - - MUX - - mod_include et al - -and here are some of my thoughts on trying to further quantify filters: - -a filter separates two layers and is both a sink and a source. a -filter takes an input stream of bytes OOOO... and generates an -output stream of bytes which can be broken into blocks such -as: - - OOO NNN O NNNNN ... - - where O = an old or original byte copied from the input - and N = a new byte generated by the filter - -for each filter we can calculate a quantity i'll call the copied-content -ratio, or CCR: - - nbytes_old / nbytes_new - -where: - nbytes_old = number of bytes in the output of the - filter which are copied from the input - (in zero-copy this would mean "copy by - reference counting an input buffer") - nbytes_new = number of bytes which are generated - by the filter which weren't present in the - input - -examples: - -CCR = infinity: who cares -- straight through with no - transformation. the filter shouldn't even be there. - -CCR = 0: encryption, translation (ebcdic, unicode), compression. - these get zero benefit from zero-copy. - -CCR > 0: chunking, MUX, mod_include - -from the point of view of evaluating the benefit of zero-copy we only -care about filters with CCR > 0 -- because CCR = 0 cases degenerate into -a single-copy scheme anyhow. - -it is worth noting that the large_write heuristic in BUFF fairly -clearly handles zero-copy at very little overhead for CCRs larger than -DEFAULT_BUFSIZE. - -what needs further quantification is what the CCR of mod_include would -be. - -for a particular zero-copy implementation we can find some threshold k -where filters with CCRs >= k are faster with the zero-copy implementation -and CCRs < k are slower... faster/slower as compared to a baseline -implementation such as the existing BUFF. - -it's my opinion that when you consider the data sources listed above, and -the filters listed above that *in general* the existing BUFF heuristics -are faster than a complete zero-copy implementation. - -you might ask how does this jive with published research such as the -IO-Lite stuff? well, when it comes right down to it, the research in -the IO-Lite papers deal with very large CCRs and contrast them against -a naive buffering implementation such as stdio -- they don't consider -what a few heuristics such as apache's BUFF can do. - -Dean - - -Jim's summary of a discussion ------------------------------ - - OK, so the main points we wish to address are (in no particular order): - - 1. zero-copy - 2. prevent modules/filters from having to glob the entire - data stream in order to start processing/filtering - 3. the ability to layer and "multiplex" data and meta-data - in the stream - 4. the ability to perform all HTTP processing at the - filter level (including proxy), even if not implemented in - this phase - 5. Room for optimization and recursion - - Jim Jagielski - - -Roy's ramblings ---------------- - - Data flow networks are a very well-defined and understood software - architecture. They have a single, very important constraint: no filter - is allowed to know anything about the nature of its upstream or downstream - neighbors beyond what is defined by the filter's own interface. - That constraint is what makes data flow networks highly configurable and - reusable. Those are properties that we want from our filters. - - ... - - One of the goals of the filter concept was to fix the bird's nest of - interconnected side-effect conditions that allow buff to perform well - without losing the performance. That's why there is so much trepidation - about anyone messin with 1.3.x buff. - - ... - - Content filtering is my least important goal. Completely replacing HTTP - parsing with a filter is my primary goal, followed by a better proxy, - then internal memory caches, and finally zero-copy sendfile (in order of - importance, but in reverse order of likely implementation). Content - filtering is something we get for free using the bucket brigade interface, - but we don't get anything for free if we start with an interface that only - supports content filtering. - - ... - - I don't think it is safe to implement filters in Apache without either - a smart allocation system or a strict limiting mechanism that prevents - filters from buffering more than 8KB [or user-definable amount] of memory - at a time (for the entire non-flushed stream). It isn't possible to - create a robust server implementation using filters that allocate memory - from a pool (or the heap, or a stack, or whatever) without somehow - reclaiming and reusing the memory that gets written out to the network. - There is a certain level of "optimization" that must be present before - any filtering mechanism can be in Apache, and that means meeting the - requirement that the server not keel over and die the first time a user - requests a large filtered file. XML tree manipulation is an example - where that can happen. - - ... - - Disabling content-length just because there are filters in the stream - is a blatant cop-out. If you have to do that then the design is wrong. - At the very least the HTTP filter/buff should be capable of discovering - whether it knows the content length by examing whether it has the whole - response in buffer (or fd) before it sends out the headers. - - ... - - No layered-IO solution will work with the existing memory allocation - mechanisms of Apache. The reason is simply that some filters can - incrementally process data and some filters cannot, and they often - won't know the answer until they have processed the data they are given. - This means the buffering mechanism needs some form of overflow mechanism - that diverts parts of the stream into a slower-but-larger buffer (file), - and the only clean way to do that is to have the memory allocator for the - stream also do paging to disk. You can't do this within the request pool - because each layer may need to allocate more total memory than is available - on the machine, and you can't depend on some parts of the response being - written before later parts are generated because some filtering - decisions require knowledge of the end of the stream before they - can process the beginning. - - ... - - The purpose of the filtering mechanism is to provide a useful - and easy to understand means for extending the functionality of - independent modules (filters) by rearranging them in stacks - via a uniform interface. - - -Paul J. Reder's use cases for filters -------------------------------------- - - 1) Containing only text. - 2) Containing 10 .gif or .jpg references (perhaps filtering - from one format to the other). - 3) Containing an exec of a cgi that generates a text only file - 4) Containing an exec of a cgi that generates an SSI of a text only file. - 5) Containing an exec of a cgi that generates an SSI that execs a cgi - that generates a text only file (that swallows a fly, I don't know why). - 6) Containing an SSI that execs a cgi that generates an SSI that - includes a text only file. - NOTE: Solutions must be able to handle *both* 5 and 6. Order - shouldn't matter. - 7) Containing text that must be altered via a regular expression - filter to change all occurrences of "rederpj" to "misguided" - 8) Containing text that must be altered via a regular expression - filter to change all occurrences of "rederpj" to "lost" - 9) Containing perl or php that must be handed off for processing. - 10) A page in ascii that needs to be converted to ebcdic, or from - one code page to another. - 11) Use the babelfish translation filter to translate text on a - page from Spanish to Martian-Swahili. - 12) Translate to Esperanto, compress, and encrypt the output from - a php program generated by a perl script called from a cgi exec - embedded in a file included by an SSI :) - diff --git a/buckets/greg_patch.txt b/buckets/greg_patch.txt deleted file mode 100644 index 48eb73903..000000000 --- a/buckets/greg_patch.txt +++ /dev/null @@ -1,631 +0,0 @@ -Index: include/util_filter.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/include/util_filter.h,v -retrieving revision 1.3 -diff -u -r1.3 util_filter.h ---- include/util_filter.h 2000/08/05 04:38:57 1.3 -+++ include/util_filter.h 2000/08/05 09:48:20 -@@ -91,8 +91,40 @@ - * unterminated SSI directive). - */ - --/* forward declare the filter type */ -+/* -+ * BUCKETS -+ * -+ * Filtering uses a "bucket" metaphor for holding content to be processed. -+ * These buckets may contain arbitrary types of data. The selection of the -+ * type is dependent upon how the "upstream" filter/generator places content -+ * into the filter chain stream. -+ * -+ * For example, if a content generator uses ap_rwrite(), then the data will -+ * be placed into an AP_BUCKET_PTRLEN bucket. This bucket type contains a -+ * single pointer/length pair which will refer to the data. -+ * -+ * Buckets types are defined around the need to avoid copying the data if -+ * at all possible. Whatever the "natural" input form is for a piece of -+ * content, this is modelled within the bucket types. For example, when a -+ * content generator uses ap_rprintf() or a filter uses ap_fc_printf(), -+ * the format string and arguments are fed into/down the filter chain as -+ * just theat: a format string and its arguments. The filter mechanism avoids -+ * reducing the format/args to a final string for as long as possible. At -+ * some point, a filter or the output of the chain will combine these to -+ * produce actual bytes, but it is most optimal to defer this until it is -+ * truly needed. -+ * -+ * See the ap_bucket_type enumeration for the different bucket types which -+ * are currently defined. -+ * -+ * Buckets may also be linked into a list so that they may be passed as -+ * entire groups of content. The filter may insert/remove/replace the buckets -+ * within this list before passing the list to the next filter. -+ */ -+ -+/* forward declare some types */ - typedef struct ap_filter_t ap_filter_t; -+typedef struct ap_bucket_t ap_bucket_t; - - /* - * ap_filter_func: -@@ -114,7 +146,7 @@ - * next/prev to insert/remove/replace elements in the bucket list, but - * the types and values of the individual buckets should not be altered. - */ --typedef apr_status_t (*ap_filter_func)(); -+typedef void (*ap_filter_func)(ap_filter_t *filter, ap_bucket_t *bucket); - - /* - * ap_filter_type: -@@ -161,12 +193,155 @@ - */ - struct ap_filter_t { - ap_filter_func filter_func; -+ request_rec *r; - - void *ctx; - - ap_filter_type ftype; - ap_filter_t *next; - }; -+ -+/* -+ * ap_bucket_type: -+ * -+ * This enumeration is used to specify what type of bucket is present when -+ * an ap_bucket_t is provided. -+ * -+ * AP_BUCKET_PTRLEN: -+ * This bucket type defines a simple pointer/length pair for the content. -+ * The content is NOT necessarily null-terminated. -+ * -+ * This type occurs when ap_rwrite(), ap_fc_write(), ap_rputs(), -+ * ap_fc_puts(), ap_rputc(), or ap_fc_putc() is used by the upstream -+ * filter/generator. -+ * -+ * AP_BUCKET_STRINGS: -+ * This bucket type defines a set of null-terminated strings. The actual -+ * representation is through varargs' va_list type. A filter can sequence -+ * through the arguments using the va_arg() macro (and the "const char *" -+ * type). The filter should NOT use va_start() or va_end(). When va_arg() -+ * returns a NULL pointer, the list of strings is complete. -+ * -+ * Note that you can only sequence through the strings once, due to the -+ * definition of va_list. Thus, the first filter to do this sequencing -+ * must pass the resulting content to the next filter in a new form (the -+ * bucket cannot simply be passed because ->va is useless). -+ * -+ * This type occurs when ap_rvputs(), ap_fc_putstrs, or ap_fc_vputstrs() -+ * is used by the upstream filter/generator. -+ * -+ * AP_BUCKET_PRINTF: -+ * This bucket type defines a printf-style format and arguments. Similar -+ * to AP_BUCKET_STRINGS, this type also uses the ->va field to refer to -+ * the arguments. The format for the printf is stored in ->fmt. -+ * -+ * Also similar to AP_BUCKET_STRINGS, the va_start/va_end macros should -+ * not be used, and ->va should be processed only once. The bucket may -+ * not be passed after this processing. -+ * -+ * This type occurs when ap_rprintf(), ap_vrprintf(), ap_fc_printf(), or -+ * ap_fc_vprintf() is used by the upstream filter/generator. -+ * -+ * AP_BUCKET_FILE: -+ * This bucket type refers to an open file, from the current position -+ * and extending for ->flen bytes. Since there are some ap_file_t -+ * implementations/types that are not seekable, it may be impossible to -+ * "rewind" the file's current position after reading the contenxt. -+ * Therefore, it is important to note that once the content has been -+ * read, it must be passed to the next filter in a different form. -+ * -+ * Note: if there is a way to determine whether a file is seekable, then -+ * it would be legal to fetch the current position, read the contents, -+ * rewind to the original position, and then pass this bucket/file down -+ * to the next filter in the output chain. -+ * -+ * This type occurs when ap_send_fd(), ap_send_fd_length(), or -+ * ap_fc_sendfile() are used by the upstream filter/generator. -+ * -+ * AP_BUCKET_EOS: -+ * This bucket signals the end of the content stream. The filter should -+ * flush any internal state and issue errors if the state specifies that -+ * and end of stream cannot occur now (e.g. a command directive is -+ * incomplete). -+ * -+ * This type occurs when Apache finalizes a (sub)request, or when an -+ * upstream filter passes this bucket along. -+ */ -+typedef enum { -+ AP_BUCKET_PTRLEN, -+ AP_BUCKET_STRINGS, -+ AP_BUCKET_PRINTF, -+ AP_BUCKET_FILE, -+ AP_BUCKET_EOS -+} ap_bucket_type; -+ -+/* -+ * ap_bucket_t: -+ * -+ * The actual bucket definition. The type is determined by the ->type field. -+ * Which fields are valid/useful in the bucket is determined by the type, -+ * as noted below and in the comments above for each type. -+ * -+ * Buckets are arranged in a doubly-linked list so that a filter may insert, -+ * remove, or replace elements in a list of buckets. Generally, a filter -+ * should not change any bucket values other than these link pointers. -+ */ -+struct ap_bucket_t { -+ ap_bucket_type type; -+ -+ const char *buf; /* AP_BUCKET_PTRLEN */ -+ apr_size_t len; /* AP_BUCKET_PTRLEN */ -+ -+ const char *fmt; /* AP_BUCKET_PRINTF */ -+ va_list va; /* AP_BUCKET_STRINGS, _PRINTF */ -+ -+ apr_file_t *file; /* AP_BUCKET_FILE */ -+ apr_ssize_t flen; /* AP_BUCKET_FILE */ -+ -+ ap_bucket_t *next; /* next bucket in list */ -+ ap_bucket_t *prev; /* previous bucket in list */ -+}; -+ -+/* -+ * FILTER CHAIN OUTPUT FUNCTIONS -+ * -+ * These functions are used to deliver output/content down to the next -+ * filter in the chain. -+ * -+ * ap_fc_write(): write a block of bytes -+ * ap_fc_putc(): write a single character -+ * ap_fc_puts(): write a null-terminated string -+ * ap_fc_putstrs(): write a set of null-termianted strings; the end is -+ * signaled by a NULL parameter -+ * ap_fc_vputstrs(): same as ap_fc_putstrs(), but where the set of strings -+ * is defined by a va_list -+ * ap_fc_printf(): use printf-like semantics for writing a string -+ * ap_fc_vprintf(): use printf-like semantics, but with a va_list for the args -+ * ap_fc_sendfile(): send the file contents, from the current file position, -+ * and extending for "len" bytes; AP_FC_SENDFILE_ALL is -+ * used to send from current-position to the end-of-file. -+ * ap_fc_putbucket(): write a bucket into the filter chain -+ */ -+API_EXPORT(void) ap_fc_write(ap_filter_t *filter, const char *buf, -+ apr_size_t len); -+API_EXPORT(void) ap_fc_putc(ap_filter_t *filter, int c); -+API_EXPORT(void) ap_fc_puts(ap_filter_t *filter, const char *str); -+ -+API_EXPORT_NONSTD(void) ap_fc_putstrs(ap_filter_t *filter, ...); -+API_EXPORT(void) ap_fc_vputstrs(ap_filter_t *filter, va_list va); -+ -+API_EXPORT_NONSTD(void) ap_fc_printf(ap_filter_t *filter, -+ const char *fmt, ...); -+API_EXPORT(void) ap_fc_vprintf(ap_filter_t *filter, -+ const char *fmt, va_list va); -+ -+API_EXPORT(void) ap_fc_sendfile(ap_filter_t *filter, apr_file_t *file, -+ apr_ssize_t flen); -+#define AP_FC_SENDFILE_ALL ((apr_ssize_t) -1) -+ -+/* note: bucket->next and ->prev may be changed upon return from this */ -+API_EXPORT(void) ap_fc_putbucket(ap_filter_t *filter, ap_bucket_t *bucket); -+ - - /* - * ap_register_filter(): -Index: main/http_protocol.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/http_protocol.c,v -retrieving revision 1.100 -diff -u -r1.100 http_protocol.c ---- main/http_protocol.c 2000/08/02 05:26:48 1.100 -+++ main/http_protocol.c 2000/08/05 09:48:23 -@@ -77,6 +77,7 @@ - * support code... */ - #include "util_date.h" /* For parseHTTPdate and BAD_DATE */ - #include "util_charset.h" -+#include "util_filter.h" - #include "mpm_status.h" - #ifdef APR_HAVE_STDARG_H - #include <stdarg.h> -@@ -100,7 +101,10 @@ - ap_bgetopt (r->connection->client, BO_BYTECT, &r->bytes_sent); \ - } while (0) - -+#define DECL_FILTER_HEAD(r, f) \ -+ ap_filter_t f = { NULL, (r), NULL, 0, (r)->filters } - -+ - /* if this is the first error, then log an INFO message and shut down the - * connection. - */ -@@ -407,6 +411,9 @@ - - API_EXPORT(int) ap_set_content_length(request_rec *r, long clength) - { -+ if (r->filters != NULL) -+ return 0; -+ - r->clength = clength; - apr_table_setn(r->headers_out, "Content-Length", apr_psprintf(r->pool, "%ld", clength)); - return 0; -@@ -1280,10 +1287,10 @@ - - static void end_output_stream(request_rec *r) - { -- /* -- ** ### place holder to tell filters that no more content will be -- ** ### arriving. typically, they will flush any pending content -- */ -+ DECL_FILTER_HEAD(r, filter); -+ ap_bucket_t bucket = { AP_BUCKET_EOS }; -+ -+ ap_fc_putbucket(&filter, &bucket); - } - - void ap_finalize_sub_req_protocol(request_rec *sub) -@@ -2501,107 +2508,88 @@ - - API_EXPORT(int) ap_rputc(int c, request_rec *r) - { -+ DECL_FILTER_HEAD(r, filter); -+ - if (r->connection->aborted) - return EOF; - -- if (ap_bputc(c, r->connection->client) < 0) { -- check_first_conn_error(r, "rputc", 0); -- return EOF; -- } -+ ap_fc_putc(&filter, c); -+ - SET_BYTES_SENT(r); -- return c; -+ return 1; - } - - API_EXPORT(int) ap_rputs(const char *str, request_rec *r) - { -- int rcode; -+ DECL_FILTER_HEAD(r, filter); - - if (r->connection->aborted) - return EOF; - -- rcode = ap_bputs(str, r->connection->client); -- if (rcode < 0) { -- check_first_conn_error(r, "rputs", 0); -- return EOF; -- } -+ ap_fc_puts(&filter, str); -+ - SET_BYTES_SENT(r); -- return rcode; -+ return 1; - } - - API_EXPORT(int) ap_rwrite(const void *buf, int nbyte, request_rec *r) - { -- apr_ssize_t n; -- apr_status_t rv; -+ DECL_FILTER_HEAD(r, filter); - - if (r->connection->aborted) - return EOF; - -- /* ### should loop to avoid partial writes */ -- rv = ap_bwrite(r->connection->client, buf, nbyte, &n); -- if (rv != APR_SUCCESS) { -- check_first_conn_error(r, "rwrite", rv); -- return EOF; -- } -+ ap_fc_write(&filter, buf, nbyte); -+ - SET_BYTES_SENT(r); -- return n; -+ return nbyte; - } - - API_EXPORT(int) ap_vrprintf(request_rec *r, const char *fmt, va_list va) - { -- int n; -+ DECL_FILTER_HEAD(r, filter); - - if (r->connection->aborted) - return EOF; - -- n = ap_vbprintf(r->connection->client, fmt, va); -+ ap_fc_vprintf(&filter, fmt, va); - -- if (n < 0) { -- check_first_conn_error(r, "vrprintf", 0); -- return EOF; -- } - SET_BYTES_SENT(r); -- return n; -+ return 1; - } - - API_EXPORT_NONSTD(int) ap_rprintf(request_rec *r, const char *fmt, ...) - { - va_list va; -- int n; - -+ DECL_FILTER_HEAD(r, filter); -+ - if (r->connection->aborted) - return EOF; - - va_start(va, fmt); -- n = ap_vbprintf(r->connection->client, fmt, va); -+ ap_fc_vprintf(&filter, fmt, va); - va_end(va); - -- if (n < 0) { -- check_first_conn_error(r, "rprintf", 0); -- return EOF; -- } - SET_BYTES_SENT(r); -- return n; -+ return 1; - } - - API_EXPORT_NONSTD(int) ap_rvputs(request_rec *r, ...) - { - va_list va; -- int n; -+ -+ DECL_FILTER_HEAD(r, filter); - - if (r->connection->aborted) - return EOF; - - va_start(va, r); -- n = ap_vbputstrs(r->connection->client, va); -+ ap_fc_vputstrs(&filter, va); - va_end(va); - -- if (n < 0) { -- check_first_conn_error(r, "rvputs", 0); -- return EOF; -- } -- - SET_BYTES_SENT(r); -- return n; -+ return 1; - } - - API_EXPORT(int) ap_rflush(request_rec *r) -@@ -2615,6 +2603,210 @@ - return 0; - } - -+static void BUFF_filter_callback(ap_filter_t *filter, ap_bucket_t *bucket) -+{ -+ ap_bucket_t *bscan = bucket; -+ -+ for (bscan = bucket; bscan != NULL; bscan = bscan->next) { -+ int n = 0; -+ -+ switch (bscan->type) { -+ case AP_BUCKET_PTRLEN: -+ if (bscan->len == 1) { -+ n = ap_bputc(*bscan->buf, filter->r->connection->client); -+ } -+ else { -+ apr_status_t rv; -+ apr_ssize_t written; -+ -+ /* ### should loop to ensure everything is written */ -+ rv = ap_bwrite(filter->r->connection->client, bscan->buf, -+ bscan->len, &written); -+ if (rv != APR_SUCCESS) { -+ check_first_conn_error(filter->r, "BUFF_filter_callback", -+ rv); -+ } -+ /* fallthru; n == 0 */ -+ } -+ break; -+ -+ case AP_BUCKET_STRINGS: -+ n = ap_vbputstrs(filter->r->connection->client, bscan->va); -+ break; -+ -+ case AP_BUCKET_PRINTF: -+ n = ap_vbprintf(filter->r->connection->client, bscan->fmt, -+ bscan->va); -+ break; -+ -+ case AP_BUCKET_FILE: -+ /* ### fill in file case */ -+ /* ### fallthru; n == 0 */ -+ break; -+ -+ case AP_BUCKET_EOS: -+ /* there is nothing to do here */ -+ /* fallthru; n == 0 */ -+ break; -+ -+ default: -+ /* ### set some kind of error */ -+ break; -+ } -+ -+ if (n < 0) -+ check_first_conn_error(filter->r, "BUFF_filter_callback", 0); -+ } -+} -+ -+API_EXPORT(void) ap_fc_write(ap_filter_t *filter, const char *buf, -+ apr_size_t len) -+{ -+ ap_filter_t *next; -+ ap_bucket_t bucket = { AP_BUCKET_PTRLEN, buf, len }; -+ -+ if (filter->r->connection->aborted || len == 0) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT(void) ap_fc_putc(ap_filter_t *filter, int c) -+{ -+ ap_filter_t *next; -+ char c2 = (char)c; -+ ap_bucket_t bucket = { AP_BUCKET_PTRLEN, &c2, 1 }; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT(void) ap_fc_puts(ap_filter_t *filter, const char *str) -+{ -+ ap_filter_t *next; -+ ap_bucket_t bucket = { AP_BUCKET_PTRLEN, str, strlen(str) }; -+ -+ if (filter->r->connection->aborted || *str == '\0') -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT_NONSTD(void) ap_fc_putstrs(ap_filter_t *filter, ...) -+{ -+ va_list va; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ va_start(va, filter); -+ ap_fc_vputstrs(filter, va); -+ va_end(va); -+} -+ -+API_EXPORT(void) ap_fc_vputstrs(ap_filter_t *filter, va_list va) -+{ -+ ap_filter_t *next; -+ ap_bucket_t bucket = { AP_BUCKET_STRINGS, NULL, 0, NULL, va }; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT_NONSTD(void) ap_fc_printf(ap_filter_t *filter, const char *fmt, ...) -+{ -+ va_list va; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ va_start(va, fmt); -+ ap_fc_vprintf(filter, fmt, va); -+ va_end(va); -+} -+ -+API_EXPORT(void) ap_fc_vprintf(ap_filter_t *filter, -+ const char *fmt, va_list va) -+{ -+ ap_filter_t *next; -+ ap_bucket_t bucket = { AP_BUCKET_PRINTF, NULL, 0, fmt, va }; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT(void) ap_fc_sendfile(ap_filter_t *filter, apr_file_t *file, -+ apr_ssize_t flen) -+{ -+ ap_filter_t *next; -+ ap_bucket_t bucket = { -+ AP_BUCKET_FILE, NULL, 0, NULL, NULL, file, flen -+ }; -+ -+ if (filter->r->connection->aborted || flen == 0) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, &bucket); -+ } -+ else { -+ (*next->filter_func)(next, &bucket); -+ } -+} -+ -+API_EXPORT(void) ap_fc_putbucket(ap_filter_t *filter, ap_bucket_t *bucket) -+{ -+ ap_filter_t *next; -+ -+ if (filter->r->connection->aborted) -+ return; -+ -+ if ((next = filter->next) == NULL) { -+ /* ### until we really put it into place */ -+ BUFF_filter_callback(filter, bucket); -+ } -+ else { -+ (*next->filter_func)(next, bucket); -+ } -+} -+ - /* We should have named this send_canned_response, since it is used for any - * response that can be generated by the server from the request record. - * This includes all 204 (no content), 3xx (redirect), 4xx (client error), -@@ -3003,6 +3195,7 @@ - ap_finalize_request_protocol(r); - ap_rflush(r); - } -+ - - AP_IMPLEMENT_HOOK_RUN_ALL(int,post_read_request, - (request_rec *r),(r),OK,DECLINED) -Index: main/util_filter.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/util_filter.c,v -retrieving revision 1.3 -diff -u -r1.3 util_filter.c ---- main/util_filter.c 2000/08/05 04:38:58 1.3 -+++ main/util_filter.c 2000/08/05 09:48:23 -@@ -126,6 +126,7 @@ - f->filter_func = frec->filter_func; - f->ctx = ctx; - f->ftype = frec->ftype; -+ f->r = r; - - if (INSERT_BEFORE(f, r->filters)) { - f->next = r->filters; diff --git a/buckets/ryan.patch b/buckets/ryan.patch deleted file mode 100644 index 6e41d0a91..000000000 --- a/buckets/ryan.patch +++ /dev/null @@ -1,651 +0,0 @@ -? build.log -? build.err -? src/build.log -? src/build.err -? src/lib/apr/buckets/Makefile.in -? src/lib/apr/include/apr_buf.h -Index: src/ap/Makefile.in -=================================================================== -RCS file: /home/cvs/apache-2.0/src/ap/Makefile.in,v -retrieving revision 1.4 -diff -u -d -b -w -u -r1.4 Makefile.in ---- src/ap/Makefile.in 2000/06/12 20:41:13 1.4 -+++ src/ap/Makefile.in 2000/08/05 05:01:14 -@@ -1,5 +1,5 @@ - - LTLIBRARY_NAME = libap.la --LTLIBRARY_SOURCES = ap_cache.c ap_base64.c ap_sha1.c ap_buf.c ap_hooks.c -+LTLIBRARY_SOURCES = ap_cache.c ap_base64.c ap_sha1.c ap_hooks.c - - include $(top_srcdir)/build/ltlib.mk -Index: src/include/ap_iol.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/include/ap_iol.h,v -retrieving revision 1.23 -diff -u -d -b -w -u -r1.23 ap_iol.h ---- src/include/ap_iol.h 2000/08/02 17:51:36 1.23 -+++ src/include/ap_iol.h 2000/08/05 05:01:14 -@@ -58,7 +58,9 @@ - #define AP_IOL_H - - #include "apr_general.h" /* For ap_s?size_t */ --#include "apr_errno.h" /* For apr_status_t and the APR_errnos */ -+#include "apr_network_io.h" /* For ap_hdtr_t */ -+#include "apr_errno.h" /* For ap_status_t and the APR_errnos */ -+#include "ap_config.h" /* For ap_status_t and the APR_errnos */ - - typedef struct ap_iol ap_iol; - typedef struct ap_iol_methods ap_iol_methods; -Index: src/include/http_protocol.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/include/http_protocol.h,v -retrieving revision 1.20 -diff -u -d -b -w -u -r1.20 http_protocol.h ---- src/include/http_protocol.h 2000/08/02 05:25:28 1.20 -+++ src/include/http_protocol.h 2000/08/05 05:01:14 -@@ -88,8 +88,19 @@ - */ - API_EXPORT(void) ap_basic_http_header(request_rec *r); - --/* Send the Status-Line and header fields for HTTP response */ -- -+/* Send the Status-Line and header fields for HTTP response. Two functions -+ * are needed here because we are doing two very different things. 1) We -+ * setup the response based on the header values. For example, se setup -+ * chunking based on the values in the headers. This is done in -+ * ap_send_http_header. A slightly incorrect name, but it is the name from -+ * 1.3, so this means modules don't need to change as much. 2) Actually -+ * send the headers over the wire. Currently this is done in -+ * ap_send_http_header_real. This should most likely be changed to just -+ * create a bucket that contains the headers. In this way, the headers are -+ * treated just like regular data, and we avoid BUFF all together. That however -+ * is an enhancement that can be made after the core filtering is in place. -+ */ -+API_EXPORT(void) ap_send_http_header_real(request_rec *l); - API_EXPORT(void) ap_send_http_header(request_rec *l); - - /* Send the response to special method requests */ -Index: src/include/httpd.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/include/httpd.h,v -retrieving revision 1.69 -diff -u -d -b -w -u -r1.69 httpd.h ---- src/include/httpd.h 2000/08/04 17:40:02 1.69 -+++ src/include/httpd.h 2000/08/05 05:01:15 -@@ -589,6 +589,10 @@ - * pointer back to the main request. - */ - -+ int headers_sent; /* Have we sent the headers for this request -+ * yet. -+ */ -+ - /* Info about the request itself... we begin with stuff that only - * protocol.c should ever touch... - */ -Index: src/include/util_filter.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/include/util_filter.h,v -retrieving revision 1.3 -diff -u -d -b -w -u -r1.3 util_filter.h ---- src/include/util_filter.h 2000/08/05 04:38:57 1.3 -+++ src/include/util_filter.h 2000/08/05 05:01:15 -@@ -65,6 +65,7 @@ - - #include "httpd.h" - #include "apr.h" -+#include "apr_buf.h" - - /* - * FILTER CHAIN -@@ -114,7 +115,7 @@ - * next/prev to insert/remove/replace elements in the bucket list, but - * the types and values of the individual buckets should not be altered. - */ --typedef apr_status_t (*ap_filter_func)(); -+typedef apr_status_t (*ap_filter_func)(request_rec *r, ap_filter_t *f, ap_bucket_brigade *b); - - /* - * ap_filter_type: -@@ -168,6 +169,19 @@ - ap_filter_t *next; - }; - -+/* This function just passes the current bucket brigade down to the next -+ * filter on the filter stack. When a filter actually writes to the network -+ * (usually either core or SSL), that filter should return the number of bytes -+ * actually written and it will get propogated back up to the handler. If -+ * nobody writes the data to the network, then this function will most likely -+ * seg fault. I haven't come up with a good way to detect that case yet, and -+ * it should never happen. Regardless, it's an unrecoverable error for the -+ * current request. I would just rather it didn't take out the whole child -+ * process. -+ */ -+API_EXPORT(int) ap_pass_brigade(request_rec *r, ap_filter_t *filter, -+ ap_bucket_brigade *bucket); -+ - /* - * ap_register_filter(): - * -@@ -192,9 +206,28 @@ - * calls to ap_add_filter). If the current filter chain contains filters - * from another request, then this filter will be added before those other - * filters. -+ * -+ * To re-iterate that last comment. This function is building a FIFO -+ * list of filters. Take note of that when adding your filter to the chain. - */ - API_EXPORT(void) ap_add_filter(const char *name, void *ctx, request_rec *r); - -+/* The next two filters are for abstraction purposes only. They could be -+ * done away with, but that would require that we break modules if we ever -+ * want to change our filter registration method. The basic idea, is that -+ * all filters have a place to store data, the ctx pointer. These functions -+ * fill out that pointer with a bucket brigade, and retrieve that data on -+ * the next call. The nice thing about these functions, is that they -+ * automatically concatenate the bucket brigades together for you. This means -+ * that if you have already stored a brigade in the filters ctx pointer, then -+ * when you add more it will be tacked onto the end of that brigade. When -+ * you retrieve data, if you pass in a bucket brigade to the get function, -+ * it will append the current brigade onto the one that you are retrieving. -+ */ -+API_EXPORT(ap_bucket_brigade *) ap_get_saved_data(request_rec *r, -+ ap_filter_t *f, ap_bucket_brigade **b); -+API_EXPORT(void) ap_save_data_to_filter(request_rec *r, ap_filter_t *f, -+ ap_bucket_brigade **b); - - /* - * Things to do later: -@@ -206,12 +239,6 @@ - * bucket_brigade, but I am trying to keep this patch neutral. (If this - * comment breaks that, well sorry, but the information must be there - * somewhere. :-) -- * -- * Add a function like ap_pass_data. This function will basically just -- * call the next filter in the chain, until the current filter is NULL. If the -- * current filter is NULL, that means that nobody wrote to the network, and -- * we have a HUGE bug, so we need to return an error and log it to the -- * log file. - */ - #ifdef __cplusplus - } -Index: src/lib/apr/configure.in -=================================================================== -RCS file: /home/cvs/apache-2.0/src/lib/apr/configure.in,v -retrieving revision 1.143 -diff -u -d -b -w -u -r1.143 configure.in ---- src/lib/apr/configure.in 2000/08/02 05:51:39 1.143 -+++ src/lib/apr/configure.in 2000/08/05 05:01:15 -@@ -688,8 +688,8 @@ - AC_SUBST(EXEEXT) - - echo "Construct Makefiles and header files." --MAKEFILE1="Makefile lib/Makefile strings/Makefile passwd/Makefile tables/Makefile" --SUBDIRS="lib strings passwd tables " -+MAKEFILE1="Makefile lib/Makefile strings/Makefile passwd/Makefile tables/Makefile buckets/Makefile" -+SUBDIRS="lib strings passwd tables buckets " - for dir in $MODULES - do - test -d $dir || $MKDIR -p $dir -Index: src/main/http_core.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/http_core.c,v -retrieving revision 1.94 -diff -u -d -b -w -u -r1.94 http_core.c ---- src/main/http_core.c 2000/08/02 05:26:47 1.94 -+++ src/main/http_core.c 2000/08/05 05:01:22 -@@ -72,6 +72,8 @@ - #include "util_md5.h" - #include "apr_fnmatch.h" - #include "http_connection.h" -+#include "apr_buf.h" -+#include "util_filter.h" - #include "util_ebcdic.h" - #include "mpm.h" - #ifdef HAVE_NETDB_H -@@ -87,6 +89,10 @@ - #include <strings.h> - #endif - -+/* Make sure we don't write less than 4096 bytes at any one time. -+ */ -+#define MIN_SIZE_TO_WRITE 4096 -+ - /* Allow Apache to use ap_mmap */ - #ifdef USE_MMAP_FILES - #include "apr_mmap.h" -@@ -2880,6 +2886,52 @@ - return OK; - } - -+/* Default filter. This filter should almost always be used. It's only job -+ * is to send the headers if they haven't already been sent, and then send -+ * the actual data. To send the data, we create an iovec out of the bucket -+ * brigade and then call the iol's writev function. On platforms that don't -+ * have writev, we have the problem of creating a lot of potentially small -+ * packets that we are sending to the network. -+ * -+ * This can be solved later by making the buckets buffer everything into a -+ * single memory block that can be written using write (on those systems -+ * without writev only !) -+ */ -+static int core_filter(request_rec *r, ap_filter_t *f, ap_bucket_brigade *b) -+{ -+ ap_bucket *dptr = b->head; -+ apr_ssize_t bytes_sent; -+ int len = 0; -+ -+ if (!r->headers_sent) { -+ ap_send_http_header_real(r); -+ ap_bflush(r->connection->client); -+ r->headers_sent = 1; -+ } -+ -+ /* At this point we need to discover if there was any data saved from -+ * the last call to core_filter. -+ */ -+ b = ap_get_saved_data(r, f, &b); -+ -+ /* It is very obvious that we need to make sure it makes sense to send data -+ * out at this point. -+ */ -+ dptr = b->head; -+ while (dptr) { -+ len += ap_get_bucket_len(dptr); -+ dptr = dptr->next; -+ } -+ if (len < MIN_SIZE_TO_WRITE && b->tail->color != AP_BUCKET_eos) { -+ ap_save_data_to_filter(r, f, &b); -+ return 0; -+ } -+ else { -+ ap_brigade_to_iol(&bytes_sent, b, r->connection->client->iol); -+ return bytes_sent; -+ } -+} -+ - static const handler_rec core_handlers[] = { - { "*/*", default_handler }, - { "default-handler", default_handler }, -@@ -2902,6 +2954,11 @@ - static unsigned short core_port(const request_rec *r) - { return DEFAULT_HTTP_PORT; } - -+static void core_register_filter(request_rec *r) -+{ -+ ap_add_filter("CORE", NULL, r); -+} -+ - static void register_hooks(void) - { - ap_hook_post_config(core_post_config,NULL,NULL,AP_HOOK_REALLY_FIRST); -@@ -2914,6 +2971,14 @@ - /* FIXME: I suspect we can eliminate the need for these - Ben */ - ap_hook_type_checker(do_nothing,NULL,NULL,AP_HOOK_REALLY_LAST); - ap_hook_access_checker(do_nothing,NULL,NULL,AP_HOOK_REALLY_LAST); -+ -+ /* This is kind of odd, and it would be cool to clean it up a bit. -+ * The first function just registers the core's register_filter hook. -+ * The other associates a global name with the filter defined -+ * by the core module. -+ */ -+ ap_hook_insert_filter(core_register_filter, NULL, NULL, AP_HOOK_MIDDLE); -+ ap_register_filter("CORE", core_filter, AP_FTYPE_CONNECTION); - } - - API_VAR_EXPORT module core_module = { -Index: src/main/http_protocol.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/http_protocol.c,v -retrieving revision 1.100 -diff -u -d -b -w -u -r1.100 http_protocol.c ---- src/main/http_protocol.c 2000/08/02 05:26:48 1.100 -+++ src/main/http_protocol.c 2000/08/05 05:01:22 -@@ -64,6 +64,8 @@ - */ - - #define CORE_PRIVATE -+#include "apr_buf.h" -+#include "util_filter.h" - #include "ap_config.h" - #include "apr_strings.h" - #include "httpd.h" -@@ -1824,8 +1826,12 @@ - apr_rfc822_date(date, r->request_time); - apr_table_addn(r->headers_out, "Expires", date); - } -+} - -- /* Send the entire apr_table_t of header fields, terminated by an empty line. */ -+API_EXPORT(void) ap_send_http_header_real(request_rec *r) -+{ -+ const long int zero = 0L; -+ /* Send the entire ap_table_t of header fields, terminated by an empty line. */ - - apr_table_do((int (*) (void *, const char *, const char *)) ap_send_header_field, - (void *) r, r->headers_out, NULL); -@@ -2468,101 +2474,84 @@ - API_EXPORT(size_t) ap_send_mmap(apr_mmap_t *mm, request_rec *r, size_t offset, - size_t length) - { -- size_t total_bytes_sent = 0; -- int n; -- apr_ssize_t w; -- char *addr; -- -- if (length == 0) -- return 0; -- -- -- length += offset; -- while (!r->connection->aborted && offset < length) { -- if (length - offset > MMAP_SEGMENT_SIZE) { -- n = MMAP_SEGMENT_SIZE; -- } -- else { -- n = length - offset; -- } -+ size_t bytes_sent = 0; -+ ap_bucket_brigade *bb = NULL; - -- apr_mmap_offset((void**)&addr, mm, offset); -- w = ap_rwrite(addr, n, r); -- if (w < 0) -- break; -- total_bytes_sent += w; -- offset += w; -- } -+ /* WE probably need to do something to make sure we are respecting the -+ * offset and length. I think I know how to do this, but I will wait -+ * until after the commit to actually write the code. -+ */ -+ bb = ap_brigade_create(r->pool); -+ ap_brigade_append_buckets(bb, -+ ap_bucket_mmap_create(mm, mm->size, &bytes_sent)); -+ bytes_sent = ap_pass_brigade(r, NULL, bb); - -- SET_BYTES_SENT(r); -- return total_bytes_sent; -+ return bytes_sent; - } - #endif /* USE_MMAP_FILES */ - - API_EXPORT(int) ap_rputc(int c, request_rec *r) - { -+ ap_bucket_brigade *bb = NULL; -+ apr_ssize_t written; -+ - if (r->connection->aborted) - return EOF; - -- if (ap_bputc(c, r->connection->client) < 0) { -- check_first_conn_error(r, "rputc", 0); -- return EOF; -- } -- SET_BYTES_SENT(r); -+ bb = ap_brigade_create(r->pool); -+ ap_brigade_append_buckets(bb, ap_bucket_rwmem_create(&c, 1, &written)); -+ ap_pass_brigade(r, NULL, bb); -+ - return c; - } - - API_EXPORT(int) ap_rputs(const char *str, request_rec *r) - { -- int rcode; -+ ap_bucket_brigade *bb = NULL; -+ apr_ssize_t written; - - if (r->connection->aborted) - return EOF; - -- rcode = ap_bputs(str, r->connection->client); -- if (rcode < 0) { -- check_first_conn_error(r, "rputs", 0); -- return EOF; -- } -- SET_BYTES_SENT(r); -- return rcode; -+ bb = ap_brigade_create(r->pool); -+ ap_brigade_append_buckets(bb, -+ ap_bucket_rwmem_create(str, strlen(str), &written)); -+ ap_pass_brigade(r, NULL, bb); -+ -+ return written; - } - - API_EXPORT(int) ap_rwrite(const void *buf, int nbyte, request_rec *r) - { -- apr_ssize_t n; -- apr_status_t rv; -+ ap_bucket_brigade *bb = NULL; -+ apr_ssize_t written; - - if (r->connection->aborted) - return EOF; - -- /* ### should loop to avoid partial writes */ -- rv = ap_bwrite(r->connection->client, buf, nbyte, &n); -- if (rv != APR_SUCCESS) { -- check_first_conn_error(r, "rwrite", rv); -- return EOF; -- } -- SET_BYTES_SENT(r); -- return n; -+ bb = ap_brigade_create(r->pool); -+ ap_brigade_append_buckets(bb, ap_bucket_rwmem_create(buf, nbyte, &written)); -+ ap_pass_brigade(r, NULL, bb); -+ return written; - } - - API_EXPORT(int) ap_vrprintf(request_rec *r, const char *fmt, va_list va) - { -- int n; -+ ap_bucket_brigade *bb = NULL; -+ apr_ssize_t written; - - if (r->connection->aborted) - return EOF; - -- n = ap_vbprintf(r->connection->client, fmt, va); -- -- if (n < 0) { -- check_first_conn_error(r, "vrprintf", 0); -- return EOF; -- } -- SET_BYTES_SENT(r); -- return n; -+ bb = ap_brigade_create(r->pool); -+ written = ap_brigade_vprintf(bb, fmt, va); -+ ap_pass_brigade(r, NULL, bb); -+ return written; - } - -+/* TODO: Make ap pa_bucket_vprintf that printfs directly into a -+ * bucket. -+ */ - API_EXPORT_NONSTD(int) ap_rprintf(request_rec *r, const char *fmt, ...) - { - va_list va; -@@ -2572,46 +2561,35 @@ - return EOF; - - va_start(va, fmt); -- n = ap_vbprintf(r->connection->client, fmt, va); -+ n = ap_vrprintf(r, fmt, va); - va_end(va); - -- if (n < 0) { -- check_first_conn_error(r, "rprintf", 0); -- return EOF; -- } -- SET_BYTES_SENT(r); - return n; - } - - API_EXPORT_NONSTD(int) ap_rvputs(request_rec *r, ...) - { -+ ap_bucket_brigade *bb = NULL; -+ apr_ssize_t written; - va_list va; -- int n; - - if (r->connection->aborted) - return EOF; -- -+ bb = ap_brigade_create(r->pool); - va_start(va, r); -- n = ap_vbputstrs(r->connection->client, va); -+ written = ap_brigade_vputstrs(bb, va); - va_end(va); -- -- if (n < 0) { -- check_first_conn_error(r, "rvputs", 0); -- return EOF; -- } -- -- SET_BYTES_SENT(r); -- return n; -+ ap_pass_brigade(r, NULL, bb); -+ return written; - } - - API_EXPORT(int) ap_rflush(request_rec *r) - { -- apr_status_t rv; -+ ap_bucket_brigade *bb; - -- if ((rv = ap_bflush(r->connection->client)) != APR_SUCCESS) { -- check_first_conn_error(r, "rflush", rv); -- return EOF; -- } -+ bb = ap_brigade_create(r->pool); -+ ap_brigade_append_buckets(bb, ap_bucket_eos_create()); -+ ap_pass_brigade(r, NULL, bb); - return 0; - } - -Index: src/main/http_request.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/http_request.c,v -retrieving revision 1.38 -diff -u -d -b -w -u -r1.38 http_request.c ---- src/main/http_request.c 2000/08/02 05:26:48 1.38 -+++ src/main/http_request.c 2000/08/05 05:01:22 -@@ -1276,6 +1276,12 @@ - return; - } - -+ /* We need to flush the data out at this point. We probably only want to -+ * do this on the main request, but this is fine for an initial patch. -+ * Once we look into this more, we won't flush sub-requests. -+ */ -+ ap_rflush(r); -+ - /* Take care of little things that need to happen when we're done */ - ap_finalize_request_protocol(r); - } -Index: src/main/util_filter.c -=================================================================== -RCS file: /home/cvs/apache-2.0/src/main/util_filter.c,v -retrieving revision 1.3 -diff -u -d -b -w -u -r1.3 util_filter.c ---- src/main/util_filter.c 2000/08/05 04:38:58 1.3 -+++ src/main/util_filter.c 2000/08/05 05:01:22 -@@ -52,6 +52,7 @@ - * <http://www.apache.org/>. - */ - -+#include "httpd.h" - #include "util_filter.h" - - /* -@@ -73,7 +74,7 @@ - } ap_filter_rec_t; - - /* ### make this visible for direct manipulation? -- ### use a hash table -+ * ### use a hash table - */ - static ap_filter_rec_t *registered_filters = NULL; - -@@ -144,3 +145,63 @@ - } - } - -+/* Pass the buckets to the next filter in the filter stack. If the -+ * current filter is a handler, we should get NULL passed in instead of -+ * the current filter. At that point, we can just call the first filter in -+ * the stack, or r->filters. -+ */ -+API_EXPORT(int) ap_pass_brigade(request_rec *r, ap_filter_t *filter, -+ ap_bucket_brigade *bb) -+{ -+ if (filter) { -+ return (*filter->next->filter_func)(r, filter->next, bb); -+ } -+ else { -+ return (*r->filters->filter_func)(r, r->filters, bb); -+ } -+} -+ -+API_EXPORT(ap_bucket_brigade *) ap_get_saved_data(request_rec *r, -+ ap_filter_t *f, ap_bucket_brigade **b) -+{ -+ ap_bucket_brigade *bb = (ap_bucket_brigade *)f->ctx; -+ -+ /* If we have never stored any data in the filter, then we had better -+ * create an empty bucket brigade so that we can concat. -+ */ -+ if (!bb) { -+ bb = ap_brigade_create(r->pool); -+ } -+ -+ /* join the two brigades together. *b is now empty so we can -+ * safely destroy it. -+ */ -+ ap_brigade_catenate(bb, *b); -+ ap_brigade_destroy(*b); -+ /* clear out the filter's context pointer. If we don't do this, then -+ * when we save more data to the filter, we will be appended to what is -+ * currently there. This will mean repeating data.... BAD! :-) -+ */ -+ f->ctx = NULL; -+ -+ return bb; -+} -+ -+API_EXPORT(void) ap_save_data_to_filter(request_rec *r, ap_filter_t *f, -+ ap_bucket_brigade **b) -+{ -+ ap_bucket_brigade *bb = (ap_bucket_brigade *)f->ctx; -+ -+ /* If have never stored any data in the filter, then we had better -+ * create an empty bucket brigade so that we can concat. -+ */ -+ if (!bb) { -+ bb = ap_brigade_create(r->pool); -+ } -+ -+ /* Apend b to bb. This means b is now empty, and we can destory it safely. -+ */ -+ ap_brigade_catenate(bb, *b); -+ ap_brigade_destroy(*b); -+ f->ctx = bb; -+} -Index: src/os/unix/os.h -=================================================================== -RCS file: /home/cvs/apache-2.0/src/os/unix/os.h,v -retrieving revision 1.10 -diff -u -d -b -w -u -r1.10 os.h ---- src/os/unix/os.h 2000/05/15 23:02:57 1.10 -+++ src/os/unix/os.h 2000/08/05 05:01:25 -@@ -59,8 +59,6 @@ - #ifndef APACHE_OS_H - #define APACHE_OS_H - --#include "ap_config.h" -- - #ifndef PLATFORM - #define PLATFORM "Unix" - #endif |