From 0b705f55ddccb88e0a14ad4e0fa904637f0f9bf9 Mon Sep 17 00:00:00 2001 From: Ivan Zhakov Date: Thu, 30 Jun 2022 17:45:14 +0000 Subject: On 1.8.x branch: Merge r1828509, r1902371 from trunk: *) apr_file_read: Optimize large reads from buffered files on Windows. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.8.x@1902379 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES | 3 + file_io/win32/readwrite.c | 80 +++++++--- test/testfile.c | 365 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 425 insertions(+), 23 deletions(-) diff --git a/CHANGES b/CHANGES index adc000fcd..5e12ceb99 100644 --- a/CHANGES +++ b/CHANGES @@ -26,6 +26,9 @@ Changes for APR 1.8.0 *) apr_file_write: Optimize large writes to buffered files on Windows. [Evgeny Kotkov ] + *) apr_file_read: Optimize large reads from buffered files on Windows. + [Evgeny Kotkov] + Changes for APR 1.7.1 *) SECURITY: CVE-2021-35940 (cve.mitre.org) diff --git a/file_io/win32/readwrite.c b/file_io/win32/readwrite.c index b13b230e9..027ef988d 100644 --- a/file_io/win32/readwrite.c +++ b/file_io/win32/readwrite.c @@ -145,8 +145,9 @@ static apr_status_t read_buffered(apr_file_t *thefile, void *buf, apr_size_t *le { apr_status_t rv; char *pos = (char *)buf; - apr_size_t blocksize; - apr_size_t size = *len; + apr_size_t bytes_read; + apr_size_t size; + apr_size_t remaining = *len; if (thefile->direction == 1) { rv = apr_file_flush(thefile); @@ -158,29 +159,62 @@ static apr_status_t read_buffered(apr_file_t *thefile, void *buf, apr_size_t *le thefile->dataRead = 0; } - rv = 0; - while (rv == 0 && size > 0) { - if (thefile->bufpos >= thefile->dataRead) { - apr_size_t read; - rv = read_with_timeout(thefile, thefile->buffer, - thefile->bufsize, &read); - if (read == 0) { - if (rv == APR_EOF) - thefile->eof_hit = TRUE; - break; - } - else { - thefile->dataRead = read; - thefile->filePtr += thefile->dataRead; - thefile->bufpos = 0; - } + /* Copy the data we have in the buffer. */ + size = thefile->dataRead - thefile->bufpos; + if (size > remaining) { + size = remaining; + } + memcpy(pos, thefile->buffer + thefile->bufpos, size); + pos += size; + remaining -= size; + thefile->bufpos += size; + + if (remaining == 0) { + /* Nothing to do more, keep *LEN unchanged and return. */ + return APR_SUCCESS; + } + /* The buffer is empty, but the caller wants more. + * Decide on the most appropriate way to read from the file: + */ + if (remaining > thefile->bufsize) { + /* If the remaining chunk won't fit into the buffer, read it into + * the destination buffer with a single syscall. + */ + rv = read_with_timeout(thefile, pos, remaining, &bytes_read); + thefile->filePtr += bytes_read; + pos += bytes_read; + /* Also, copy the last BUFSIZE (or less in case of a short read) bytes + * from the chunk to our buffer so that seeking backwards and reading + * would work from the buffer. + */ + size = thefile->bufsize; + if (size > bytes_read) { + size = bytes_read; } + memcpy(thefile->buffer, pos - size, size); + thefile->bufpos = size; + thefile->dataRead = size; + } + else { + /* The remaining chunk fits into the buffer. Read up to BUFSIZE bytes + * from the file to our internal buffer. + */ + rv = read_with_timeout(thefile, thefile->buffer, thefile->bufsize, &bytes_read); + thefile->filePtr += bytes_read; + thefile->bufpos = 0; + thefile->dataRead = bytes_read; + /* Copy the required part to the caller. */ + size = remaining; + if (size > bytes_read) { + size = bytes_read; + } + memcpy(pos, thefile->buffer, size); + pos += size; + thefile->bufpos += size; + } - blocksize = size > thefile->dataRead - thefile->bufpos ? thefile->dataRead - thefile->bufpos : size; - memcpy(pos, thefile->buffer + thefile->bufpos, blocksize); - thefile->bufpos += blocksize; - pos += blocksize; - size -= blocksize; + if (bytes_read == 0 && rv == APR_EOF) { + thefile->eof_hit = TRUE; } *len = pos - (char *)buf; diff --git a/test/testfile.c b/test/testfile.c index 692085c80..660e3d27d 100644 --- a/test/testfile.c +++ b/test/testfile.c @@ -1943,6 +1943,364 @@ static void test_append_read(abts_case *tc, void *data) apr_file_remove(fname, p); } +static void test_empty_read_buffered(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testempty_read_buffered.dat"; + apr_size_t len; + apr_size_t bytes_read; + char buf[64]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "create empty test file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Test an empty read. */ + len = 1; + rv = apr_file_read_full(f, buf, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_large_read_buffered(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testlarge_read_buffered.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char *buf; + char *buf2; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + len = 80000; + buf = apr_palloc(p, len); + memset(buf, 'a', len); + rv = apr_file_write_full(f, buf, len, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Test a single large read. */ + buf2 = apr_palloc(p, len); + rv = apr_file_read_full(f, buf2, len, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, (int)len, (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + /* Test that we receive an EOF. */ + len = 1; + rv = apr_file_read_full(f, buf2, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_two_large_reads_buffered(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testtwo_large_reads_buffered.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char *buf; + char *buf2; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + len = 80000; + buf = apr_palloc(p, len); + memset(buf, 'a', len); + rv = apr_file_write_full(f, buf, len, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Test two consecutive large reads. */ + buf2 = apr_palloc(p, len); + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, len / 2, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, (int)(len / 2), (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, len / 2, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, (int)(len / 2), (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf + len / 2, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + /* Test that we receive an EOF. */ + len = 1; + rv = apr_file_read_full(f, buf2, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_small_and_large_reads_buffered(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testtwo_large_reads_buffered.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char *buf; + char *buf2; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + len = 80000; + buf = apr_palloc(p, len); + memset(buf, 'a', len); + rv = apr_file_write_full(f, buf, len, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Test small read followed by a large read. */ + buf2 = apr_palloc(p, len); + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, 5, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, 5, (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, len - 5, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, (int)(len - 5), (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf + 5, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + /* Test that we receive an EOF. */ + len = 1; + rv = apr_file_read_full(f, buf2, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_read_buffered_spanning_over_bufsize(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testread_buffered_spanning_over_bufsize.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char *buf; + char *buf2; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + len = APR_BUFFERSIZE + 1; + buf = apr_palloc(p, len); + memset(buf, 'a', len); + rv = apr_file_write_full(f, buf, len, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Test reads than span over the default buffer size. */ + buf2 = apr_palloc(p, len); + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, APR_BUFFERSIZE - 1, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, APR_BUFFERSIZE - 1, (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + memset(buf2, 0, len); + rv = apr_file_read_full(f, buf2, 2, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, 2, (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, buf2, bytes_read) == 0); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + /* Test that we receive an EOF. */ + len = 1; + rv = apr_file_read_full(f, buf2, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_single_byte_reads_buffered(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testsingle_byte_reads_buffered.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char *buf; + apr_size_t total; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + len = 40000; + buf = apr_palloc(p, len); + memset(buf, 'a', len); + rv = apr_file_write_full(f, buf, len, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + total = 0; + while (1) { + memset(buf, 0, len); + rv = apr_file_read_full(f, buf, 1, &bytes_read); + if (rv == APR_EOF) { + break; + } + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, 1, (int)bytes_read); + ABTS_INT_EQUAL(tc, 'a', buf[0]); + total += bytes_read; + } + ABTS_INT_EQUAL(tc, (int)len, (int)total); + + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + +static void test_read_buffered_seek(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testtest_read_buffered_seek.dat"; + apr_size_t len; + apr_size_t bytes_written; + apr_size_t bytes_read; + char buf[64]; + apr_off_t off; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for writing", rv); + rv = apr_file_write_full(f, "abcdef", 6, &bytes_written); + APR_ASSERT_SUCCESS(tc, "write to file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file for reading", rv); + + /* Read one byte. */ + memset(buf, 0, sizeof(buf)); + rv = apr_file_read_full(f, buf, 1, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, 1, (int)bytes_read); + ABTS_INT_EQUAL(tc, 'a', buf[0]); + + /* Seek into the middle of the file. */ + off = 3; + rv = apr_file_seek(f, APR_SET, &off); + APR_ASSERT_SUCCESS(tc, "change file read offset", rv); + ABTS_INT_EQUAL(tc, 3, (int)off); + + /* Read three bytes. */ + memset(buf, 0, sizeof(buf)); + rv = apr_file_read_full(f, buf, 3, &bytes_read); + APR_ASSERT_SUCCESS(tc, "read from file", rv); + ABTS_INT_EQUAL(tc, 3, (int)bytes_read); + ABTS_TRUE(tc, memcmp(buf, "def", bytes_read) == 0); + + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_SUCCESS, rv); + + /* Test that we receive an EOF. */ + len = 1; + rv = apr_file_read_full(f, buf, len, &bytes_read); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_INT_EQUAL(tc, 0, (int)bytes_read); + rv = apr_file_eof(f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + apr_file_close(f); + + apr_file_remove(fname, p); +} + abts_suite *testfile(abts_suite *suite) { suite = ADD_SUITE(suite) @@ -2002,6 +2360,13 @@ abts_suite *testfile(abts_suite *suite) abts_run_test(suite, test_atomic_append, NULL); abts_run_test(suite, test_append_locked, NULL); abts_run_test(suite, test_append_read, NULL); + abts_run_test(suite, test_empty_read_buffered, NULL); + abts_run_test(suite, test_large_read_buffered, NULL); + abts_run_test(suite, test_two_large_reads_buffered, NULL); + abts_run_test(suite, test_small_and_large_reads_buffered, NULL); + abts_run_test(suite, test_read_buffered_spanning_over_bufsize, NULL); + abts_run_test(suite, test_single_byte_reads_buffered, NULL); + abts_run_test(suite, test_read_buffered_seek, NULL); return suite; } -- cgit v1.2.1