diff options
author | Nick Wellnhofer <wellnhofer@aevum.de> | 2023-03-17 13:15:35 +0100 |
---|---|---|
committer | Nick Wellnhofer <wellnhofer@aevum.de> | 2023-03-17 14:02:23 +0100 |
commit | 3e85d7b7ab511b67d54ccf5dfd2f5953050eec83 (patch) | |
tree | 61342892eca4f399adc350c5868cc57bde5304a8 | |
parent | 62f199ed7d1c99999030810495bd12fd5b86fee1 (diff) | |
download | libxml2-3e85d7b7ab511b67d54ccf5dfd2f5953050eec83.tar.gz |
parser: Rely on CUR_CHAR/NEXT to grow the input buffer
The input buffer is now grown reliably when calling CUR_CHAR
(xmlCurrentChar) or NEXT (xmlNextChar). This allows to remove many
other invocations of GROW.
-rw-r--r-- | parser.c | 246 |
1 files changed, 57 insertions, 189 deletions
@@ -3201,7 +3201,6 @@ static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3213,9 +3212,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { /* * Handler for more complex cases */ - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); c = CUR_CHAR(l); if ((ctxt->options & XML_PARSE_OLD10) == 0) { /* @@ -3264,12 +3260,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { ((c >= 0xFDF0) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0xEFFFF)) )) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); @@ -3291,18 +3281,14 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); c = CUR_CHAR(l); } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); return(NULL); @@ -3393,7 +3379,6 @@ static const xmlChar * xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3406,9 +3391,6 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { /* * Handler for more complex cases */ - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); startPosition = CUR_PTR - BASE_PTR; c = CUR_CHAR(l); if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ @@ -3418,31 +3400,13 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); c = CUR_CHAR(l); - if (c == 0) { - count = 0; - /* - * when shrinking to extend the buffer we really need to preserve - * the part of the name we already parsed. Hence rolling back - * by current length. - */ - ctxt->input->cur -= l; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - ctxt->input->cur += l; - c = CUR_CHAR(l); - } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); return(NULL); @@ -3668,7 +3632,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3677,28 +3640,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { nbParseNmToken++; #endif - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); c = CUR_CHAR(l); while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); - if (c == 0) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -3714,14 +3661,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } memcpy(buffer, buf, len); while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buffer); - return(NULL); - } - } if (len + 10 > max) { xmlChar *tmp; @@ -3735,18 +3674,22 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { buffer = tmp; } COPY_BUF(l,buffer,len,c); - NEXTL(l); - c = CUR_CHAR(l); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); xmlFree(buffer); return(NULL); } + NEXTL(l); + c = CUR_CHAR(l); } buffer[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); return(buffer); } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len == 0) return(NULL); if (len > maxLength) { @@ -4237,7 +4180,6 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { XML_MAX_NAME_LENGTH; xmlChar stop; int state = ctxt->instate; - int count = 0; SHRINK; if (RAW == '"') { @@ -4272,32 +4214,21 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { } buf = tmp; } - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } COPY_BUF(l,buf,len,cur); - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - GROW; - SHRINK; - cur = CUR_CHAR(l); - } if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); xmlFree(buf); ctxt->instate = (xmlParserInputState) state; return(NULL); } + NEXTL(l); + cur = CUR_CHAR(l); } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } ctxt->instate = (xmlParserInputState) state; if (!IS_CHAR(cur)) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); @@ -4330,7 +4261,6 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { XML_MAX_NAME_LENGTH; xmlChar cur; xmlChar stop; - int count = 0; xmlParserInputState oldstate = ctxt->instate; SHRINK; @@ -4365,34 +4295,23 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { buf = tmp; } buf[len++] = cur; - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } - NEXT; - cur = CUR; - if (cur == 0) { - GROW; - SHRINK; - cur = CUR; - } if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); xmlFree(buf); return(NULL); } + NEXT; + cur = CUR; } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } if (cur != stop) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); } else { - NEXT; + NEXTL(1); } ctxt->instate = oldstate; return(buf); @@ -4611,10 +4530,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; - int count = 0; SHRINK; - GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ (cur != '&') && @@ -4625,7 +4542,6 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { COPY_BUF(l,buf,nbchar,cur); /* move current position before possible calling of ctxt->sax->characters */ NEXTL(l); - cur = CUR_CHAR(l); if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { buf[nbchar] = 0; @@ -4650,16 +4566,12 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { /* something really bad happened in the SAX callback */ if (ctxt->instate != XML_PARSER_CONTENT) return; + SHRINK; } - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) - return; - } + cur = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + return; if (nbchar != 0) { buf[nbchar] = 0; /* @@ -4784,7 +4696,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int q, ql; int r, rl; int cur, l; - size_t count = 0; size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_HUGE_LENGTH : XML_MAX_TEXT_LENGTH; @@ -4801,7 +4712,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, return; } } - GROW; /* Assure there's enough input data */ q = CUR_CHAR(ql); if (q == 0) goto not_terminated; @@ -4848,37 +4758,27 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, size = new_size; } COPY_BUF(ql,buf,len,q); + if (len > maxLength) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } + q = r; ql = rl; r = cur; rl = l; - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } - } NEXTL(l); cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } - if (len > maxLength) { - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment too big found", NULL); - xmlFree (buf); - return; - } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } if (cur == 0) { xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated \n<!--%.50s\n", buf); @@ -5222,7 +5122,6 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { int cur, l; const xmlChar *target; xmlParserInputState state; - int count = 0; if ((RAW == '<') && (NXT(1) == '?')) { int inputid = ctxt->input->id; @@ -5285,24 +5184,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { buf = tmp; size = new_size; } - count++; - if (count > 50) { - SHRINK; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } - count = 0; - } COPY_BUF(l,buf,len,cur); - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } if (len > maxLength) { xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, "PI %s too big found", target); @@ -5310,8 +5192,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { ctxt->instate = state; return; } + NEXTL(l); + cur = CUR_CHAR(l); } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } if (cur != '?') { xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, "ParsePI: PI %s never end ...\n", target); @@ -8143,7 +8031,6 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; xmlBufferPtr buf; int l, c; - int count = 0; if ((ctxt == NULL) || (entity == NULL) || ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && @@ -8189,25 +8076,12 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && (IS_CHAR(c))) { xmlBufferAdd(buf, ctxt->input->cur, l); - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlBufferFree(buf); - return(-1); - } - } NEXTL(l); c = CUR_CHAR(l); - if (c == 0) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlBufferFree(buf); - return(-1); - } - c = CUR_CHAR(l); - } + } + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); } if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { @@ -9850,7 +9724,6 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { int r, rl; int s, sl; int cur, l; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_HUGE_LENGTH : XML_MAX_TEXT_LENGTH; @@ -9896,28 +9769,23 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { size *= 2; } COPY_BUF(rl,buf,len,r); + if (len > maxLength) { + xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED, + "CData section too big found\n"); + goto out; + } r = s; rl = sl; s = cur; sl = l; - count++; - if (count > 50) { - SHRINK; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - goto out; - } - count = 0; - } NEXTL(l); cur = CUR_CHAR(l); - if (len > maxLength) { - xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED, - "CData section too big found\n"); - goto out; - } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } if (cur != '>') { xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, "CData section not finished\n%.50s\n", buf); |