diff options
author | Sebastian Pipping <sebastian@pipping.org> | 2022-02-08 04:06:21 +0100 |
---|---|---|
committer | Sebastian Pipping <sebastian@pipping.org> | 2022-02-18 18:02:19 +0100 |
commit | 6a5510bc6b7efe743356296724e0b38300f05379 (patch) | |
tree | 69a583ebb4ae939530399e7cd0b15e39aef4ca9d /expat | |
parent | c85a3025e7a1be086dc34e7559fbc543914d047f (diff) | |
download | libexpat-git-6a5510bc6b7efe743356296724e0b38300f05379.tar.gz |
tests: Cover missing validation of encoding (CVE-2022-25235)
Diffstat (limited to 'expat')
-rw-r--r-- | expat/tests/runtests.c | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index bc5344b1..9b155b82 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) { } END_TEST +START_TEST(test_utf8_in_start_tags) { + struct test_case { + bool goodName; + bool goodNameStart; + const char *tagName; + }; + + // The idea with the tests below is this: + // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences + // go to isNever and are hence not a concern. + // + // We start with a character that is a valid name character + // (or even name-start character, see XML 1.0r4 spec) and then we flip + // single bits at places where (1) the result leaves the UTF-8 encoding space + // and (2) we stay in the same n-byte sequence family. + // + // The flipped bits are highlighted in angle brackets in comments, + // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped + // the most significant bit to 1 to leave UTF-8 encoding space. + struct test_case cases[] = { + // 1-byte UTF-8: [0xxx xxxx] + {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' + {false, false, "\xBA"}, // [<1>011 1010] + {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' + {false, false, "\xB9"}, // [<1>011 1001] + + // 2-byte UTF-8: [110x xxxx] [10xx xxxx] + {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = + // Arabic small waw U+06E5 + {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] + {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] + {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] + {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = + // combining char U+0301 + {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] + {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] + {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] + + // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] + {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = + // Devanagari Letter A U+0905 + {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] + {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] + {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] + {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] + {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] + {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = + // combining char U+0901 + {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] + {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] + {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] + {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] + {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] + }; + const bool atNameStart[] = {true, false}; + + size_t i = 0; + char doc[1024]; + size_t failCount = 0; + + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { + const bool expectedSuccess + = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; + sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName); + XML_Parser parser = XML_ParserCreate(NULL); + + const enum XML_Status status + = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); + + bool success = true; + if ((status == XML_STATUS_OK) != expectedSuccess) { + success = false; + } + if ((status == XML_STATUS_ERROR) + && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { + success = false; + } + + if (! success) { + fprintf( + stderr, + "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", + (unsigned)i + 1u, atNameStart[j] ? " " : "not ", + (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); + failCount++; + } + + XML_ParserFree(parser); + } + } + + if (failCount > 0) { + fail("UTF-8 regression detected"); + } +} +END_TEST + /* Test trailing spaces in elements are accepted */ static void XMLCALL record_element_end_handler(void *userData, const XML_Char *name) { @@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) { } END_TEST +START_TEST(test_bad_doctype_utf8) { + const char *text = "<!DOCTYPE \xDB\x25" + "doc><doc/>"; // [1101 1011] [<0>010 0101] + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid UTF-8 in DOCTYPE not faulted"); +} +END_TEST + START_TEST(test_bad_doctype_utf16) { const char text[] = /* <!DOCTYPE doc [ \x06f2 ]><doc/> @@ -11870,6 +11977,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); tcase_add_test(tc_basic, test_utf8_in_cdata_section); tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); + tcase_add_test(tc_basic, test_utf8_in_start_tags); tcase_add_test(tc_basic, test_trailing_spaces_in_elements); tcase_add_test(tc_basic, test_utf16_attribute); tcase_add_test(tc_basic, test_utf16_second_attr); @@ -11878,6 +11986,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_bad_attr_desc_keyword); tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); tcase_add_test(tc_basic, test_bad_doctype); + tcase_add_test(tc_basic, test_bad_doctype_utf8); tcase_add_test(tc_basic, test_bad_doctype_utf16); tcase_add_test(tc_basic, test_bad_doctype_plus); tcase_add_test(tc_basic, test_bad_doctype_star); |