summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraham Leggett <minfrin@apache.org>2021-10-19 14:30:37 +0000
committerGraham Leggett <minfrin@apache.org>2021-10-19 14:30:37 +0000
commitf65813689e5a0061e65eca5d626ebe11d088b87a (patch)
treef9d88d1e5f251da447c3c9404d21aabb8ac28ed7
parent424e4ecebab9e3df6fe25c1507cc4093bc00f715 (diff)
downloadapr-f65813689e5a0061e65eca5d626ebe11d088b87a.tar.gz
apr_brigades: Add apr_brigade_split_boundary(), allowing us to split
brigades on boundaries of arbitrary length. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1894380 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--buckets/apr_brigade.c263
-rw-r--r--include/apr_buckets.h37
-rw-r--r--test/testbuckets.c57
3 files changed, 357 insertions, 0 deletions
diff --git a/buckets/apr_brigade.c b/buckets/apr_brigade.c
index c81d29a6c..27bcffd96 100644
--- a/buckets/apr_brigade.c
+++ b/buckets/apr_brigade.c
@@ -387,6 +387,269 @@ APR_DECLARE(apr_status_t) apr_brigade_split_line(apr_bucket_brigade *bbOut,
return APR_SUCCESS;
}
+APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade *bbOut,
+ apr_bucket_brigade *bbIn,
+ apr_read_type_e block,
+ const char *boundary,
+ apr_size_t boundary_len,
+ apr_off_t maxbytes)
+{
+ apr_off_t outbytes = 0;
+
+ if (!boundary || !boundary[0]) {
+ return APR_EINVAL;
+ }
+
+ if (APR_BUCKETS_STRING == boundary_len) {
+ boundary_len = strlen(boundary);
+ }
+
+ /*
+ * While the call describes itself as searching for a boundary string,
+ * what we actually do is search for anything that is definitely not
+ * a boundary string, and allow that not-boundary data to pass through.
+ *
+ * If we find data that might be a boundary, we try read more data in
+ * until we know for sure.
+ */
+ while (!APR_BRIGADE_EMPTY(bbIn)) {
+
+ const char *pos;
+ const char *str;
+ apr_bucket *e, *next, *prev;
+ apr_off_t inbytes = 0;
+ apr_size_t len;
+ apr_status_t rv;
+
+ /* We didn't find a boundary within the maximum line length. */
+ if (outbytes >= maxbytes) {
+ return APR_INCOMPLETE;
+ }
+
+ e = APR_BRIGADE_FIRST(bbIn);
+
+ /* We hit a metadata bucket, stop and let the caller handle it */
+ if (APR_BUCKET_IS_METADATA(e)) {
+ return APR_INCOMPLETE;
+ }
+
+ rv = apr_bucket_read(e, &str, &len, block);
+
+ if (rv != APR_SUCCESS) {
+ return rv;
+ }
+
+ inbytes += len;
+
+ /*
+ * Fast path.
+ *
+ * If we have at least one boundary worth of data, do an optimised
+ * substring search for the boundary, and split quickly if found.
+ */
+ if (len >= boundary_len) {
+
+ apr_size_t off;
+ apr_size_t leftover;
+
+ pos = strnstr(str, boundary, len);
+
+ /* definitely found it, we leave */
+ if (pos != NULL) {
+
+ off = pos - str;
+
+ /* everything up to the boundary */
+ if (off) {
+
+ apr_bucket_split(e, off);
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+
+ e = APR_BRIGADE_FIRST(bbIn);
+ }
+
+ /* cut out the boundary */
+ apr_bucket_split(e, boundary_len);
+ apr_bucket_delete(e);
+
+ return APR_SUCCESS;
+ }
+
+ /* any partial matches at the end? */
+ leftover = boundary_len - 1;
+ off = (len - leftover);
+
+ while (leftover) {
+ if (!strncmp(str + off, boundary, leftover)) {
+
+ if (off) {
+
+ apr_bucket_split(e, off);
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+
+ e = APR_BRIGADE_FIRST(bbIn);
+ }
+
+ outbytes += off;
+ inbytes -= off;
+
+ goto skip;
+ }
+ off++;
+ leftover--;
+ }
+
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+
+ outbytes += len;
+
+ continue;
+
+ }
+
+ /*
+ * Slow path.
+ *
+ * We need to read ahead at least one boundary worth of data so
+ * we can search across the bucket edges.
+ */
+ else {
+
+ apr_size_t off = 0;
+
+ /* find all definite non matches */
+ while (len) {
+ if (!strncmp(str + off, boundary, len)) {
+
+ if (off) {
+
+ apr_bucket_split(e, off);
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+
+ e = APR_BRIGADE_FIRST(bbIn);
+ }
+
+ inbytes -= off;
+
+ goto skip;
+ }
+ off++;
+ len--;
+ }
+
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+ continue;
+
+ }
+
+ /*
+ * If we reach skip, it means the bucket in e is:
+ *
+ * - shorter than the boundary
+ * - matches the boundary up to the bucket length
+ * - might match more buckets
+ *
+ * Read further buckets and check whether the boundary matches all
+ * the way to the end. If so, we have a match. If no match, shave off
+ * one byte and continue round to try again.
+ */
+skip:
+
+ for (next = APR_BUCKET_NEXT(e);
+ inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn);
+ next = APR_BUCKET_NEXT(next)) {
+
+ const char *str;
+ apr_size_t off;
+ apr_size_t len;
+
+ rv = apr_bucket_read(next, &str, &len, block);
+
+ if (rv != APR_SUCCESS) {
+ return rv;
+ }
+
+ off = boundary_len - inbytes;
+
+ if (len > off) {
+
+ /* not a match, bail out */
+ if (strncmp(str, boundary + inbytes, off)) {
+ break;
+ }
+
+ /* a match! remove the boundary and return */
+ apr_bucket_split(next, off);
+
+ e = APR_BUCKET_NEXT(next);
+
+ for (prev = APR_BRIGADE_FIRST(bbIn);
+ prev != e;
+ prev = APR_BRIGADE_FIRST(bbIn)) {
+
+ apr_bucket_delete(prev);
+
+ }
+
+ return APR_SUCCESS;
+
+ }
+ if (len == off) {
+
+ /* not a match, bail out */
+ if (strncmp(str, boundary + inbytes, off)) {
+ break;
+ }
+
+ /* a match! remove the boundary and return */
+ e = APR_BUCKET_NEXT(next);
+
+ for (prev = APR_BRIGADE_FIRST(bbIn);
+ prev != e;
+ prev = APR_BRIGADE_FIRST(bbIn)) {
+
+ apr_bucket_delete(prev);
+
+ }
+
+ return APR_SUCCESS;
+
+ }
+ else if (len) {
+
+ /* not a match, bail out */
+ if (strncmp(str, boundary + inbytes, len)) {
+ break;
+ }
+
+ /* still hope for a match */
+ inbytes += len;
+ }
+
+ }
+
+ /*
+ * If we reach this point, the bucket e did not match the boundary
+ * in the subsequent buckets.
+ *
+ * Bump one byte off, and loop round to search again.
+ */
+ apr_bucket_split(e, 1);
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(bbOut, e);
+
+ outbytes++;
+
+ }
+
+ return APR_INCOMPLETE;
+}
+
APR_DECLARE(apr_status_t) apr_brigade_to_iovec(apr_bucket_brigade *b,
struct iovec *vec, int *nvec)
diff --git a/include/apr_buckets.h b/include/apr_buckets.h
index 0725c6cd9..065058316 100644
--- a/include/apr_buckets.h
+++ b/include/apr_buckets.h
@@ -53,6 +53,11 @@ extern "C" {
/** default bucket buffer size - 8KB minus room for memory allocator headers */
#define APR_BUCKET_BUFF_SIZE 8000
+/** if passed to apr_brigade_split_boundary(), the string length will
+ * be calculated
+ */
+#define APR_BUCKETS_STRING -1
+
/** Determines how a bucket or brigade should be read */
typedef enum {
APR_BLOCK_READ, /**< block until data becomes available */
@@ -791,6 +796,38 @@ APR_DECLARE(apr_status_t) apr_brigade_split_line(apr_bucket_brigade *bbOut,
__attribute__((nonnull(1,2)));
/**
+ * Split a brigade based on the provided boundary, or metadata buckets,
+ * whichever are encountered first.
+ *
+ * If the boundary is found, all buckets prior to the boundary are passed
+ * into bbOut, and APR_SUCCESS is returned.
+ *
+ * If a metadata bucket is found, or if the boundary is not found within
+ * the limit specified by maxbytes, all prior buckets are passed into bbOut,
+ * and APR_INCOMPLETE is returned.
+ *
+ * Any partial matches at the end of a bucket will be held back
+ * If the boundary is NULL or the empty string, APR_EINVAL is returned.
+ *
+ * If an error is encountered, the APR error code will be returned.
+ *
+ * @param bbOut The bucket brigade that will have the LF line appended to.
+ * @param bbIn The input bucket brigade to search for a LF-line.
+ * @param block The blocking mode to be used to split the line.
+ * @param boundary The boundary string.
+ * @param boundary_len The length of the boundary string. If set to
+ * APR_BUCKETS_STRING, the length will be calculated.
+ * @param maxbytes The maximum bytes to read.
+ */
+APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade *bbOut,
+ apr_bucket_brigade *bbIn,
+ apr_read_type_e block,
+ const char *boundary,
+ apr_size_t boundary_len,
+ apr_off_t maxbytes)
+ __attribute__((nonnull(1,2)));
+
+/**
* Create an iovec of the elements in a bucket_brigade... return number
* of elements used. This is useful for writing to a file or to the
* network efficiently.
diff --git a/test/testbuckets.c b/test/testbuckets.c
index 31bed0c1b..2b789f1a0 100644
--- a/test/testbuckets.c
+++ b/test/testbuckets.c
@@ -209,6 +209,62 @@ static void test_splitline(abts_case *tc, void *data)
apr_bucket_alloc_destroy(ba);
}
+static void test_splitboundary(abts_case *tc, void *data)
+{
+ apr_bucket_alloc_t *ba = apr_bucket_alloc_create(p);
+ apr_bucket_brigade *bin, *bout;
+
+ /* fast path */
+ bin = make_simple_brigade(ba, "quick brown fox",
+ " jumped over the lazy dog");
+ bout = apr_brigade_create(p, ba);
+
+ APR_ASSERT_SUCCESS(tc, "split boundary",
+ apr_brigade_split_boundary(bout, bin,
+ APR_BLOCK_READ, "brown",
+ APR_BUCKETS_STRING, 100));
+
+ flatten_match(tc, "split boundary", bout, "quick ");
+ flatten_match(tc, "remainder", bin, " fox jumped over the lazy dog");
+
+ apr_brigade_destroy(bout);
+ apr_brigade_destroy(bin);
+
+ /* slow path */
+ bin = make_simple_brigade(ba, "quick brown fox jum",
+ "ped over the lazy dog");
+ bout = apr_brigade_create(p, ba);
+
+ APR_ASSERT_SUCCESS(tc, "split boundary",
+ apr_brigade_split_boundary(bout, bin,
+ APR_BLOCK_READ, "jumped",
+ APR_BUCKETS_STRING, 100));
+
+ flatten_match(tc, "split boundary", bout, "quick brown fox ");
+ flatten_match(tc, "remainder", bin, " over the lazy dog");
+
+ apr_brigade_destroy(bout);
+ apr_brigade_destroy(bin);
+
+ /* not found */
+ bin = make_simple_brigade(ba, "quick brown fox jum",
+ "ped over the lazy dog");
+ bout = apr_brigade_create(p, ba);
+
+ ABTS_ASSERT(tc, "split boundary",
+ apr_brigade_split_boundary(bout, bin,
+ APR_BLOCK_READ, "jumping",
+ APR_BUCKETS_STRING, 100) == APR_INCOMPLETE);
+
+ flatten_match(tc, "split boundary", bout, "quick brown fox jumped over the lazy dog");
+ flatten_match(tc, "remainder", bin, "");
+
+ apr_brigade_destroy(bout);
+ apr_brigade_destroy(bin);
+
+ apr_bucket_alloc_destroy(ba);
+}
+
/* Test that bucket E has content EDATA of length ELEN. */
static void test_bucket_content(abts_case *tc,
apr_bucket *e,
@@ -521,6 +577,7 @@ abts_suite *testbuckets(abts_suite *suite)
abts_run_test(suite, test_split, NULL);
abts_run_test(suite, test_bwrite, NULL);
abts_run_test(suite, test_splitline, NULL);
+ abts_run_test(suite, test_splitboundary, NULL);
abts_run_test(suite, test_splits, NULL);
abts_run_test(suite, test_insertfile, NULL);
abts_run_test(suite, test_manyfile, NULL);