summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2022-02-18 17:59:50 +0100
committerGitHub <noreply@github.com>2022-02-18 17:59:50 +0100
commitbbdfcfef4747d2d66e81c19f4a55e29e291aa171 (patch)
treeec11cd2e7db908b27e71463c9d4d79946ad1b900
parentf1a444ef64680ebd4ff89091a2c388cd046ece2d (diff)
parent9b4ce651b26557f16103c3a366c91934ecd439ab (diff)
downloadlibexpat-git-bbdfcfef4747d2d66e81c19f4a55e29e291aa171.tar.gz
Merge pull request #558 from ferivoz/model
[CVE-2022-25313] lib: Prevent stack exhaustion in build_model
-rw-r--r--expat/lib/xmlparse.c116
1 files changed, 79 insertions, 37 deletions
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
index 902895d5..ee9a114c 100644
--- a/expat/lib/xmlparse.c
+++ b/expat/lib/xmlparse.c
@@ -7317,44 +7317,15 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}
-static void
-build_node(XML_Parser parser, int src_node, XML_Content *dest,
- XML_Content **contpos, XML_Char **strpos) {
- DTD *const dtd = parser->m_dtd; /* save one level of indirection */
- dest->type = dtd->scaffold[src_node].type;
- dest->quant = dtd->scaffold[src_node].quant;
- if (dest->type == XML_CTYPE_NAME) {
- const XML_Char *src;
- dest->name = *strpos;
- src = dtd->scaffold[src_node].name;
- for (;;) {
- *(*strpos)++ = *src;
- if (! *src)
- break;
- src++;
- }
- dest->numchildren = 0;
- dest->children = NULL;
- } else {
- unsigned int i;
- int cn;
- dest->numchildren = dtd->scaffold[src_node].childcnt;
- dest->children = *contpos;
- *contpos += dest->numchildren;
- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
- i++, cn = dtd->scaffold[cn].nextsib) {
- build_node(parser, cn, &(dest->children[i]), contpos, strpos);
- }
- dest->name = NULL;
- }
-}
-
static XML_Content *
build_model(XML_Parser parser) {
+ /* Function build_model transforms the existing parser->m_dtd->scaffold
+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
+ * XML_Content tree nodes followed by a gapless list of zero-terminated
+ * strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
- XML_Content *cpos;
- XML_Char *str;
+ XML_Char *str; /* the current string writing location */
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
@@ -7380,10 +7351,81 @@ build_model(XML_Parser parser) {
if (! ret)
return NULL;
- str = (XML_Char *)(&ret[dtd->scaffCount]);
- cpos = &ret[1];
+ /* What follows is an iterative implementation (of what was previously done
+ * recursively in a dedicated function called "build_node". The old recursive
+ * build_node could be forced into stack exhaustion from input as small as a
+ * few megabyte, and so that was a security issue. Hence, a function call
+ * stack is avoided now by resolving recursion.)
+ *
+ * The iterative approach works as follows:
+ *
+ * - We use space in the target array for building a temporary stack structure
+ * while that space is still unused.
+ * The stack grows from the array's end downwards and the "actual data"
+ * grows from the start upwards, sequentially.
+ * (Because stack grows downwards, pushing onto the stack is a decrement
+ * while popping off the stack is an increment.)
+ *
+ * - A stack element appears as a regular XML_Content node on the outside,
+ * but only uses a single field -- numchildren -- to store the source
+ * tree node array index. These are the breadcrumbs leading the way back
+ * during pre-order (node first) depth-first traversal.
+ *
+ * - The reason we know the stack will never grow into (or overlap with)
+ * the area with data of value at the start of the array is because
+ * the overall number of elements to process matches the size of the array,
+ * and the sum of fully processed nodes and yet-to-be processed nodes
+ * on the stack, cannot be more than the total number of nodes.
+ * It is possible for the top of the stack and the about-to-write node
+ * to meet, but that is safe because we get the source index out
+ * before doing any writes on that node.
+ */
+ XML_Content *dest = ret; /* tree node writing location, moves upwards */
+ XML_Content *const destLimit = &ret[dtd->scaffCount];
+ XML_Content *const stackBottom = &ret[dtd->scaffCount];
+ XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+ str = (XML_Char *)&ret[dtd->scaffCount];
+
+ /* Push source tree root node index onto the stack */
+ (--stackTop)->numchildren = 0;
+
+ for (; dest < destLimit; dest++) {
+ /* Pop source tree node index off the stack */
+ const int src_node = (int)(stackTop++)->numchildren;
+
+ /* Convert item */
+ dest->type = dtd->scaffold[src_node].type;
+ dest->quant = dtd->scaffold[src_node].quant;
+ if (dest->type == XML_CTYPE_NAME) {
+ const XML_Char *src;
+ dest->name = str;
+ src = dtd->scaffold[src_node].name;
+ for (;;) {
+ *str++ = *src;
+ if (! *src)
+ break;
+ src++;
+ }
+ dest->numchildren = 0;
+ dest->children = NULL;
+ } else {
+ unsigned int i;
+ int cn;
+ dest->name = NULL;
+ dest->numchildren = dtd->scaffold[src_node].childcnt;
+ dest->children = &dest[1];
+
+ /* Push children to the stack
+ * in a way where the first child ends up at the top of the
+ * (downwards growing) stack, in order to be processed first. */
+ stackTop -= dest->numchildren;
+ for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+ (stackTop + i)->numchildren = (unsigned int)cn;
+ }
+ }
+ }
- build_node(parser, 0, ret, &cpos, &str);
return ret;
}