From 4ab7c270ecf17e38be34bed4fcb195f949b451df Mon Sep 17 00:00:00 2001 From: Evgeny Kotkov Date: Tue, 7 Feb 2023 10:44:12 +0000 Subject: On 1.7.x branch: Merge r1785072, r1788930 from trunk: apr_file_gets: Optimize for buffered files on Windows. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.7.x@1907495 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES | 3 + file_io/win32/readwrite.c | 210 ++++++++++++++++++++++++++++++-------------- test/testfile.c | 217 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 365 insertions(+), 65 deletions(-) diff --git a/CHANGES b/CHANGES index 55037db45..e64568205 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,9 @@ Changes for APR 1.7.2 *) Correct a packaging issue in 1.7.1. The contents of the release were correct, but the top level directory was misnamed. + *) apr_file_gets: Optimize for buffered files on Windows. + [Evgeny Kotkov ] + Changes for APR 1.7.1 *) SECURITY: CVE-2022-24963 (cve.mitre.org) diff --git a/file_io/win32/readwrite.c b/file_io/win32/readwrite.c index 701bec75b..f8c03ee42 100644 --- a/file_io/win32/readwrite.c +++ b/file_io/win32/readwrite.c @@ -140,6 +140,56 @@ static apr_status_t read_with_timeout(apr_file_t *file, void *buf, apr_size_t le return rv; } +static apr_status_t read_buffered(apr_file_t *thefile, void *buf, apr_size_t *len) +{ + apr_status_t rv; + char *pos = (char *)buf; + apr_size_t blocksize; + apr_size_t size = *len; + + if (thefile->direction == 1) { + rv = apr_file_flush(thefile); + if (rv != APR_SUCCESS) { + return rv; + } + thefile->bufpos = 0; + thefile->direction = 0; + thefile->dataRead = 0; + } + + rv = 0; + while (rv == 0 && size > 0) { + if (thefile->bufpos >= thefile->dataRead) { + apr_size_t read; + rv = read_with_timeout(thefile, thefile->buffer, + thefile->bufsize, &read); + if (read == 0) { + if (rv == APR_EOF) + thefile->eof_hit = TRUE; + break; + } + else { + thefile->dataRead = read; + thefile->filePtr += thefile->dataRead; + thefile->bufpos = 0; + } + } + + blocksize = size > thefile->dataRead - thefile->bufpos ? thefile->dataRead - thefile->bufpos : size; + memcpy(pos, thefile->buffer + thefile->bufpos, blocksize); + thefile->bufpos += blocksize; + pos += blocksize; + size -= blocksize; + } + + *len = pos - (char *)buf; + if (*len) { + rv = APR_SUCCESS; + } + + return rv; +} + APR_DECLARE(apr_status_t) apr_file_read(apr_file_t *thefile, void *buf, apr_size_t *len) { apr_status_t rv; @@ -177,57 +227,10 @@ APR_DECLARE(apr_status_t) apr_file_read(apr_file_t *thefile, void *buf, apr_size } } if (thefile->buffered) { - char *pos = (char *)buf; - apr_size_t blocksize; - apr_size_t size = *len; - if (thefile->flags & APR_FOPEN_XTHREAD) { apr_thread_mutex_lock(thefile->mutex); } - - if (thefile->direction == 1) { - rv = apr_file_flush(thefile); - if (rv != APR_SUCCESS) { - if (thefile->flags & APR_FOPEN_XTHREAD) { - apr_thread_mutex_unlock(thefile->mutex); - } - return rv; - } - thefile->bufpos = 0; - thefile->direction = 0; - thefile->dataRead = 0; - } - - rv = 0; - while (rv == 0 && size > 0) { - if (thefile->bufpos >= thefile->dataRead) { - apr_size_t read; - rv = read_with_timeout(thefile, thefile->buffer, - thefile->bufsize, &read); - if (read == 0) { - if (rv == APR_EOF) - thefile->eof_hit = TRUE; - break; - } - else { - thefile->dataRead = read; - thefile->filePtr += thefile->dataRead; - thefile->bufpos = 0; - } - } - - blocksize = size > thefile->dataRead - thefile->bufpos ? thefile->dataRead - thefile->bufpos : size; - memcpy(pos, thefile->buffer + thefile->bufpos, blocksize); - thefile->bufpos += blocksize; - pos += blocksize; - size -= blocksize; - } - - *len = pos - (char *)buf; - if (*len) { - rv = APR_SUCCESS; - } - + rv = read_buffered(thefile, buf, len); if (thefile->flags & APR_FOPEN_XTHREAD) { apr_thread_mutex_unlock(thefile->mutex); } @@ -455,30 +458,107 @@ APR_DECLARE(apr_status_t) apr_file_puts(const char *str, apr_file_t *thefile) APR_DECLARE(apr_status_t) apr_file_gets(char *str, int len, apr_file_t *thefile) { - apr_size_t readlen; apr_status_t rv = APR_SUCCESS; - int i; - - for (i = 0; i < len-1; i++) { - readlen = 1; - rv = apr_file_read(thefile, str+i, &readlen); + apr_size_t nbytes; + const char *str_start = str; + char *final = str + len - 1; - if (rv != APR_SUCCESS && rv != APR_EOF) + /* If the file is open for xthread support, allocate and + * initialize the overlapped and io completion event (hEvent). + * Threads should NOT share an apr_file_t or its hEvent. + */ + if ((thefile->flags & APR_FOPEN_XTHREAD) && !thefile->pOverlapped) { + thefile->pOverlapped = (OVERLAPPED*) apr_pcalloc(thefile->pool, + sizeof(OVERLAPPED)); + thefile->pOverlapped->hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!thefile->pOverlapped->hEvent) { + rv = apr_get_os_error(); return rv; + } + } - if (readlen == 0) { - /* If we have bytes, defer APR_EOF to the next call */ - if (i > 0) - rv = APR_SUCCESS; - break; + /* Handle the ungetchar if there is one. */ + if (thefile->ungetchar != -1 && str < final) { + *str = thefile->ungetchar; + thefile->ungetchar = -1; + if (*str == '\n') { + *(++str) = '\0'; + return APR_SUCCESS; } - - if (str[i] == '\n') { - i++; /* don't clobber this char below */ - break; + ++str; + } + + /* If we have an underlying buffer, we can be *much* more efficient + * and skip over the read_with_timeout() calls. + */ + if (thefile->buffered) { + if (thefile->flags & APR_FOPEN_XTHREAD) { + apr_thread_mutex_lock(thefile->mutex); + } + + if (thefile->direction == 1) { + rv = apr_file_flush(thefile); + if (rv) { + if (thefile->flags & APR_FOPEN_XTHREAD) { + apr_thread_mutex_unlock(thefile->mutex); + } + return rv; + } + + thefile->direction = 0; + thefile->bufpos = 0; + thefile->dataRead = 0; + } + + while (str < final) { /* leave room for trailing '\0' */ + if (thefile->bufpos < thefile->dataRead) { + *str = thefile->buffer[thefile->bufpos++]; + } + else { + nbytes = 1; + rv = read_buffered(thefile, str, &nbytes); + if (rv != APR_SUCCESS) { + break; + } + } + if (*str == '\n') { + ++str; + break; + } + ++str; + } + if (thefile->flags & APR_FOPEN_XTHREAD) { + apr_thread_mutex_unlock(thefile->mutex); + } + } + else { + while (str < final) { /* leave room for trailing '\0' */ + nbytes = 1; + rv = read_with_timeout(thefile, str, nbytes, &nbytes); + if (rv == APR_EOF) + thefile->eof_hit = TRUE; + + if (rv != APR_SUCCESS) { + break; + } + if (*str == '\n') { + ++str; + break; + } + ++str; } } - str[i] = 0; + + /* We must store a terminating '\0' if we've stored any chars. We can + * get away with storing it if we hit an error first. + */ + *str = '\0'; + if (str > str_start) { + /* We stored chars; don't report EOF or any other errors; + * the app will find out about that on the next call. + */ + return APR_SUCCESS; + } return rv; } diff --git a/test/testfile.c b/test/testfile.c index b1e9c554b..01f886d54 100644 --- a/test/testfile.c +++ b/test/testfile.c @@ -430,6 +430,10 @@ static void test_gets(abts_case *tc, void *data) rv = apr_file_gets(str, 256, f); ABTS_INT_EQUAL(tc, APR_EOF, rv); ABTS_STR_EQUAL(tc, "", str); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(str, 256, f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", str); apr_file_close(f); } @@ -453,6 +457,214 @@ static void test_gets_buffered(abts_case *tc, void *data) rv = apr_file_gets(str, 256, f); ABTS_INT_EQUAL(tc, APR_EOF, rv); ABTS_STR_EQUAL(tc, "", str); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(str, 256, f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", str); + apr_file_close(f); +} + +static void test_gets_empty(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testgets_empty.txt"; + char buf[256]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ, APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "re-open test file", rv); + + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + apr_file_close(f); +} + +static void test_gets_multiline(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testgets_multiline.txt"; + char buf[256]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file", rv); + rv = apr_file_puts("a\nb\n", f); + APR_ASSERT_SUCCESS(tc, "write test data", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ, APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "re-open test file", rv); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read first line", rv); + ABTS_STR_EQUAL(tc, "a\n", buf); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read second line", rv); + ABTS_STR_EQUAL(tc, "b\n", buf); + + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + apr_file_close(f); +} + +static void test_gets_small_buf(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testgets_small_buf.txt"; + char buf[2]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file", rv); + rv = apr_file_puts("ab\n", f); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ, APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "re-open test file", rv); + /* Buffer is too small to hold the full line, test that gets properly + * returns the line content character by character. + */ + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read first chunk", rv); + ABTS_STR_EQUAL(tc, "a", buf); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read second chunk", rv); + ABTS_STR_EQUAL(tc, "b", buf); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read third chunk", rv); + ABTS_STR_EQUAL(tc, "\n", buf); + + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + apr_file_close(f); +} + +static void test_gets_ungetc(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testgets_ungetc.txt"; + char buf[256]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file", rv); + rv = apr_file_puts("a\n", f); + APR_ASSERT_SUCCESS(tc, "write test data", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ, APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "re-open test file", rv); + + rv = apr_file_ungetc('b', f); + APR_ASSERT_SUCCESS(tc, "call ungetc", rv); + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read line", rv); + ABTS_STR_EQUAL(tc, "ba\n", buf); + + rv = apr_file_ungetc('\n', f); + APR_ASSERT_SUCCESS(tc, "call ungetc with EOL", rv); + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read line", rv); + ABTS_STR_EQUAL(tc, "\n", buf); + + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + apr_file_close(f); +} + +static void test_gets_buffered_big(abts_case *tc, void *data) +{ + apr_status_t rv; + apr_file_t *f; + const char *fname = "data/testgets_buffered_big.txt"; + char hugestr[APR_BUFFERSIZE + 2]; + char buf[APR_BUFFERSIZE + 2]; + + apr_file_remove(fname, p); + + rv = apr_file_open(&f, fname, APR_FOPEN_CREATE | APR_FOPEN_WRITE, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "open test file", rv); + /* Test an edge case with a buffered file and the line that exceeds + * the default buffer size by 1 (the line itself fits into the buffer, + * but the line + EOL does not). + */ + memset(hugestr, 'a', sizeof(hugestr)); + hugestr[sizeof(hugestr) - 2] = '\n'; + hugestr[sizeof(hugestr) - 1] = '\0'; + rv = apr_file_puts(hugestr, f); + APR_ASSERT_SUCCESS(tc, "write first line", rv); + rv = apr_file_puts("b\n", f); + APR_ASSERT_SUCCESS(tc, "write second line", rv); + apr_file_close(f); + + rv = apr_file_open(&f, fname, APR_FOPEN_READ | APR_FOPEN_BUFFERED, + APR_FPROT_OS_DEFAULT, p); + APR_ASSERT_SUCCESS(tc, "re-open test file", rv); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read first line", rv); + ABTS_STR_EQUAL(tc, hugestr, buf); + + memset(buf, 0, sizeof(buf)); + rv = apr_file_gets(buf, sizeof(buf), f); + APR_ASSERT_SUCCESS(tc, "read second line", rv); + ABTS_STR_EQUAL(tc, "b\n", buf); + + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); + /* Calling gets after EOF should return EOF. */ + rv = apr_file_gets(buf, sizeof(buf), f); + ABTS_INT_EQUAL(tc, APR_EOF, rv); + ABTS_STR_EQUAL(tc, "", buf); apr_file_close(f); } @@ -1286,6 +1498,11 @@ abts_suite *testfile(abts_suite *suite) abts_run_test(suite, test_ungetc, NULL); abts_run_test(suite, test_gets, NULL); abts_run_test(suite, test_gets_buffered, NULL); + abts_run_test(suite, test_gets_empty, NULL); + abts_run_test(suite, test_gets_multiline, NULL); + abts_run_test(suite, test_gets_small_buf, NULL); + abts_run_test(suite, test_gets_ungetc, NULL); + abts_run_test(suite, test_gets_buffered_big, NULL); abts_run_test(suite, test_puts, NULL); abts_run_test(suite, test_writev, NULL); abts_run_test(suite, test_writev_full, NULL); -- cgit v1.2.1