From f65813689e5a0061e65eca5d626ebe11d088b87a Mon Sep 17 00:00:00 2001 From: Graham Leggett Date: Tue, 19 Oct 2021 14:30:37 +0000 Subject: apr_brigades: Add apr_brigade_split_boundary(), allowing us to split brigades on boundaries of arbitrary length. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1894380 13f79535-47bb-0310-9956-ffa450edef68 --- buckets/apr_brigade.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) (limited to 'buckets') diff --git a/buckets/apr_brigade.c b/buckets/apr_brigade.c index c81d29a6c..27bcffd96 100644 --- a/buckets/apr_brigade.c +++ b/buckets/apr_brigade.c @@ -387,6 +387,269 @@ APR_DECLARE(apr_status_t) apr_brigade_split_line(apr_bucket_brigade *bbOut, return APR_SUCCESS; } +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade *bbOut, + apr_bucket_brigade *bbIn, + apr_read_type_e block, + const char *boundary, + apr_size_t boundary_len, + apr_off_t maxbytes) +{ + apr_off_t outbytes = 0; + + if (!boundary || !boundary[0]) { + return APR_EINVAL; + } + + if (APR_BUCKETS_STRING == boundary_len) { + boundary_len = strlen(boundary); + } + + /* + * While the call describes itself as searching for a boundary string, + * what we actually do is search for anything that is definitely not + * a boundary string, and allow that not-boundary data to pass through. + * + * If we find data that might be a boundary, we try read more data in + * until we know for sure. + */ + while (!APR_BRIGADE_EMPTY(bbIn)) { + + const char *pos; + const char *str; + apr_bucket *e, *next, *prev; + apr_off_t inbytes = 0; + apr_size_t len; + apr_status_t rv; + + /* We didn't find a boundary within the maximum line length. */ + if (outbytes >= maxbytes) { + return APR_INCOMPLETE; + } + + e = APR_BRIGADE_FIRST(bbIn); + + /* We hit a metadata bucket, stop and let the caller handle it */ + if (APR_BUCKET_IS_METADATA(e)) { + return APR_INCOMPLETE; + } + + rv = apr_bucket_read(e, &str, &len, block); + + if (rv != APR_SUCCESS) { + return rv; + } + + inbytes += len; + + /* + * Fast path. + * + * If we have at least one boundary worth of data, do an optimised + * substring search for the boundary, and split quickly if found. + */ + if (len >= boundary_len) { + + apr_size_t off; + apr_size_t leftover; + + pos = strnstr(str, boundary, len); + + /* definitely found it, we leave */ + if (pos != NULL) { + + off = pos - str; + + /* everything up to the boundary */ + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + /* cut out the boundary */ + apr_bucket_split(e, boundary_len); + apr_bucket_delete(e); + + return APR_SUCCESS; + } + + /* any partial matches at the end? */ + leftover = boundary_len - 1; + off = (len - leftover); + + while (leftover) { + if (!strncmp(str + off, boundary, leftover)) { + + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + outbytes += off; + inbytes -= off; + + goto skip; + } + off++; + leftover--; + } + + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + outbytes += len; + + continue; + + } + + /* + * Slow path. + * + * We need to read ahead at least one boundary worth of data so + * we can search across the bucket edges. + */ + else { + + apr_size_t off = 0; + + /* find all definite non matches */ + while (len) { + if (!strncmp(str + off, boundary, len)) { + + if (off) { + + apr_bucket_split(e, off); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + e = APR_BRIGADE_FIRST(bbIn); + } + + inbytes -= off; + + goto skip; + } + off++; + len--; + } + + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + continue; + + } + + /* + * If we reach skip, it means the bucket in e is: + * + * - shorter than the boundary + * - matches the boundary up to the bucket length + * - might match more buckets + * + * Read further buckets and check whether the boundary matches all + * the way to the end. If so, we have a match. If no match, shave off + * one byte and continue round to try again. + */ +skip: + + for (next = APR_BUCKET_NEXT(e); + inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn); + next = APR_BUCKET_NEXT(next)) { + + const char *str; + apr_size_t off; + apr_size_t len; + + rv = apr_bucket_read(next, &str, &len, block); + + if (rv != APR_SUCCESS) { + return rv; + } + + off = boundary_len - inbytes; + + if (len > off) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, off)) { + break; + } + + /* a match! remove the boundary and return */ + apr_bucket_split(next, off); + + e = APR_BUCKET_NEXT(next); + + for (prev = APR_BRIGADE_FIRST(bbIn); + prev != e; + prev = APR_BRIGADE_FIRST(bbIn)) { + + apr_bucket_delete(prev); + + } + + return APR_SUCCESS; + + } + if (len == off) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, off)) { + break; + } + + /* a match! remove the boundary and return */ + e = APR_BUCKET_NEXT(next); + + for (prev = APR_BRIGADE_FIRST(bbIn); + prev != e; + prev = APR_BRIGADE_FIRST(bbIn)) { + + apr_bucket_delete(prev); + + } + + return APR_SUCCESS; + + } + else if (len) { + + /* not a match, bail out */ + if (strncmp(str, boundary + inbytes, len)) { + break; + } + + /* still hope for a match */ + inbytes += len; + } + + } + + /* + * If we reach this point, the bucket e did not match the boundary + * in the subsequent buckets. + * + * Bump one byte off, and loop round to search again. + */ + apr_bucket_split(e, 1); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(bbOut, e); + + outbytes++; + + } + + return APR_INCOMPLETE; +} + APR_DECLARE(apr_status_t) apr_brigade_to_iovec(apr_bucket_brigade *b, struct iovec *vec, int *nvec) -- cgit v1.2.1