diff options
author | Graham Leggett <minfrin@apache.org> | 2021-10-19 14:30:37 +0000 |
---|---|---|
committer | Graham Leggett <minfrin@apache.org> | 2021-10-19 14:30:37 +0000 |
commit | f65813689e5a0061e65eca5d626ebe11d088b87a (patch) | |
tree | f9d88d1e5f251da447c3c9404d21aabb8ac28ed7 | |
parent | 424e4ecebab9e3df6fe25c1507cc4093bc00f715 (diff) | |
download | apr-f65813689e5a0061e65eca5d626ebe11d088b87a.tar.gz |
apr_brigades: Add apr_brigade_split_boundary(), allowing us to split
brigades on boundaries of arbitrary length.
git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1894380 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | buckets/apr_brigade.c | 263 | ||||
-rw-r--r-- | include/apr_buckets.h | 37 | ||||
-rw-r--r-- | test/testbuckets.c | 57 |
3 files changed, 357 insertions, 0 deletions
diff --git a/buckets/apr_brigade.c b/buckets/apr_brigade.c index c81d29a6c..27bcffd96 100644 --- a/buckets/apr_brigade.c +++ b/buckets/apr_brigade.c @@ -387,6 +387,269 @@ APR_DECLARE(apr_status_t) apr_brigade_split_line(apr_bucket_brigade *bbOut, return APR_SUCCESS; } +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade *bbOut, + apr_bucket_brigade *bbIn, + apr_read_type_e block, + const char *boundary, + apr_size_t boundary_len, + apr_off_t maxbytes) +{ + apr_off_t outbytes = 0; + + if (!boundary || !boundary[0]) { + return APR_EINVAL; + } + + if (APR_BUCKETS_STRING == boundary_len) { + boundary_len = strlen(boundary); + } + + /* + * While the call describes itself as searching for a boundary string, + * what we actually do is search for anything that is definitely not + * a boundary string, and allow that not-boundary data to pass through. + * + * If we find data that might be a boundary, we try read more data in + * until we know for sure. + */ + while (!APR_BRIGADE_EMPTY(bbIn)) { + + const char *pos; + const char *str; + apr_bucket *e, *next, *prev; + apr_off_t inbytes = 0; + apr_size_t len; + apr_status_t rv; + + /* We didn't find a boundary within the maximum line length. */ + if (outbytes >= maxbytes) { + return APR_INCOMPLETE; + } + + e = APR_BRIGADE_FIRST(bbIn); + + /* We hit a metadata bucket, stop and let the caller handle it */ + if (APR_BUCKET_IS_METADATA(e)) { + return APR_INCOMPLETE; + } + + rv = apr_bucket_read(e, &str, &len, block); + + if (rv != APR_SUCCESS) { + return rv; + } + + inbytes += len; + + /* + * Fast path. + * + * If we have at least one boundary worth of data, do an optimised + * substring search for the boundary, and split quickly if found. + */ + if (len >= boundary_len) { + + apr_size_t off; + apr_size_t leftover; + + pos = strnstr(str, boundary, len); + + /* definitely found it, we leave */ + if (pos != NULL) { + + off = pos - str; + + /* everything up to the boundary */ + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + /* cut out the boundary */ + apr_bucket_split(e, boundary_len); + apr_bucket_delete(e); + + return APR_SUCCESS; + } + + /* any partial matches at the end? */ + leftover = boundary_len - 1; + off = (len - leftover); + + while (leftover) { + if (!strncmp(str + off, boundary, leftover)) { + + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + outbytes += off; + inbytes -= off; + + goto skip; + } + off++; + leftover--; + } + + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + outbytes += len; + + continue; + + } + + /* + * Slow path. + * + * We need to read ahead at least one boundary worth of data so + * we can search across the bucket edges. + */ + else { + + apr_size_t off = 0; + + /* find all definite non matches */ + while (len) { + if (!strncmp(str + off, boundary, len)) { + + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + inbytes -= off; + + goto skip; + } + off++; + len--; + } + + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + continue; + + } + + /* + * If we reach skip, it means the bucket in e is: + * + * - shorter than the boundary + * - matches the boundary up to the bucket length + * - might match more buckets + * + * Read further buckets and check whether the boundary matches all + * the way to the end. If so, we have a match. If no match, shave off + * one byte and continue round to try again. + */ +skip: + + for (next = APR_BUCKET_NEXT(e); + inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn); + next = APR_BUCKET_NEXT(next)) { + + const char *str; + apr_size_t off; + apr_size_t len; + + rv = apr_bucket_read(next, &str, &len, block); + + if (rv != APR_SUCCESS) { + return rv; + } + + off = boundary_len - inbytes; + + if (len > off) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, off)) { + break; + } + + /* a match! remove the boundary and return */ + apr_bucket_split(next, off); + + e = APR_BUCKET_NEXT(next); + + for (prev = APR_BRIGADE_FIRST(bbIn); + prev != e; + prev = APR_BRIGADE_FIRST(bbIn)) { + + apr_bucket_delete(prev); + + } + + return APR_SUCCESS; + + } + if (len == off) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, off)) { + break; + } + + /* a match! remove the boundary and return */ + e = APR_BUCKET_NEXT(next); + + for (prev = APR_BRIGADE_FIRST(bbIn); + prev != e; + prev = APR_BRIGADE_FIRST(bbIn)) { + + apr_bucket_delete(prev); + + } + + return APR_SUCCESS; + + } + else if (len) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, len)) { + break; + } + + /* still hope for a match */ + inbytes += len; + } + + } + + /* + * If we reach this point, the bucket e did not match the boundary + * in the subsequent buckets. + * + * Bump one byte off, and loop round to search again. + */ + apr_bucket_split(e, 1); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + outbytes++; + + } + + return APR_INCOMPLETE; +} + APR_DECLARE(apr_status_t) apr_brigade_to_iovec(apr_bucket_brigade *b, struct iovec *vec, int *nvec) diff --git a/include/apr_buckets.h b/include/apr_buckets.h index 0725c6cd9..065058316 100644 --- a/include/apr_buckets.h +++ b/include/apr_buckets.h @@ -53,6 +53,11 @@ extern "C" { /** default bucket buffer size - 8KB minus room for memory allocator headers */ #define APR_BUCKET_BUFF_SIZE 8000 +/** if passed to apr_brigade_split_boundary(), the string length will + * be calculated + */ +#define APR_BUCKETS_STRING -1 + /** Determines how a bucket or brigade should be read */ typedef enum { APR_BLOCK_READ, /**< block until data becomes available */ @@ -791,6 +796,38 @@ APR_DECLARE(apr_status_t) apr_brigade_split_line(apr_bucket_brigade *bbOut, __attribute__((nonnull(1,2))); /** + * Split a brigade based on the provided boundary, or metadata buckets, + * whichever are encountered first. + * + * If the boundary is found, all buckets prior to the boundary are passed + * into bbOut, and APR_SUCCESS is returned. + * + * If a metadata bucket is found, or if the boundary is not found within + * the limit specified by maxbytes, all prior buckets are passed into bbOut, + * and APR_INCOMPLETE is returned. + * + * Any partial matches at the end of a bucket will be held back + * If the boundary is NULL or the empty string, APR_EINVAL is returned. + * + * If an error is encountered, the APR error code will be returned. + * + * @param bbOut The bucket brigade that will have the LF line appended to. + * @param bbIn The input bucket brigade to search for a LF-line. + * @param block The blocking mode to be used to split the line. + * @param boundary The boundary string. + * @param boundary_len The length of the boundary string. If set to + * APR_BUCKETS_STRING, the length will be calculated. + * @param maxbytes The maximum bytes to read. + */ +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade *bbOut, + apr_bucket_brigade *bbIn, + apr_read_type_e block, + const char *boundary, + apr_size_t boundary_len, + apr_off_t maxbytes) + __attribute__((nonnull(1,2))); + +/** * Create an iovec of the elements in a bucket_brigade... return number * of elements used. This is useful for writing to a file or to the * network efficiently. diff --git a/test/testbuckets.c b/test/testbuckets.c index 31bed0c1b..2b789f1a0 100644 --- a/test/testbuckets.c +++ b/test/testbuckets.c @@ -209,6 +209,62 @@ static void test_splitline(abts_case *tc, void *data) apr_bucket_alloc_destroy(ba); } +static void test_splitboundary(abts_case *tc, void *data) +{ + apr_bucket_alloc_t *ba = apr_bucket_alloc_create(p); + apr_bucket_brigade *bin, *bout; + + /* fast path */ + bin = make_simple_brigade(ba, "quick brown fox", + " jumped over the lazy dog"); + bout = apr_brigade_create(p, ba); + + APR_ASSERT_SUCCESS(tc, "split boundary", + apr_brigade_split_boundary(bout, bin, + APR_BLOCK_READ, "brown", + APR_BUCKETS_STRING, 100)); + + flatten_match(tc, "split boundary", bout, "quick "); + flatten_match(tc, "remainder", bin, " fox jumped over the lazy dog"); + + apr_brigade_destroy(bout); + apr_brigade_destroy(bin); + + /* slow path */ + bin = make_simple_brigade(ba, "quick brown fox jum", + "ped over the lazy dog"); + bout = apr_brigade_create(p, ba); + + APR_ASSERT_SUCCESS(tc, "split boundary", + apr_brigade_split_boundary(bout, bin, + APR_BLOCK_READ, "jumped", + APR_BUCKETS_STRING, 100)); + + flatten_match(tc, "split boundary", bout, "quick brown fox "); + flatten_match(tc, "remainder", bin, " over the lazy dog"); + + apr_brigade_destroy(bout); + apr_brigade_destroy(bin); + + /* not found */ + bin = make_simple_brigade(ba, "quick brown fox jum", + "ped over the lazy dog"); + bout = apr_brigade_create(p, ba); + + ABTS_ASSERT(tc, "split boundary", + apr_brigade_split_boundary(bout, bin, + APR_BLOCK_READ, "jumping", + APR_BUCKETS_STRING, 100) == APR_INCOMPLETE); + + flatten_match(tc, "split boundary", bout, "quick brown fox jumped over the lazy dog"); + flatten_match(tc, "remainder", bin, ""); + + apr_brigade_destroy(bout); + apr_brigade_destroy(bin); + + apr_bucket_alloc_destroy(ba); +} + /* Test that bucket E has content EDATA of length ELEN. */ static void test_bucket_content(abts_case *tc, apr_bucket *e, @@ -521,6 +577,7 @@ abts_suite *testbuckets(abts_suite *suite) abts_run_test(suite, test_split, NULL); abts_run_test(suite, test_bwrite, NULL); abts_run_test(suite, test_splitline, NULL); + abts_run_test(suite, test_splitboundary, NULL); abts_run_test(suite, test_splits, NULL); abts_run_test(suite, test_insertfile, NULL); abts_run_test(suite, test_manyfile, NULL); |