summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2022-02-27 16:58:08 +0100
committerSebastian Pipping <sebastian@pipping.org>2022-03-04 16:54:01 +0100
commit2ba6c76fca21397959145e18c5ef376201209020 (patch)
tree59d03e07bfe2b1a7870f1af7c40056f3316c94d1
parentc99e0e7f2b15b48848038992ecbb4480f957cfe9 (diff)
downloadlibexpat-git-2ba6c76fca21397959145e18c5ef376201209020.tar.gz
lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters
-rw-r--r--expat/lib/xmlparse.c139
1 files changed, 131 insertions, 8 deletions
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
index 59da19c8..6fe2cf1e 100644
--- a/expat/lib/xmlparse.c
+++ b/expat/lib/xmlparse.c
@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+ // For the RFC 3986 ANBF grammar see
+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+ switch (candidate) {
+ // From rule "ALPHA" (uppercase half)
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+
+ // From rule "ALPHA" (lowercase half)
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+
+ // From rule "DIGIT"
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+
+ // From rule "pct-encoded"
+ case '%':
+
+ // From rule "unreserved"
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+
+ // From rule "gen-delims"
+ case ':':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ case '@':
+
+ // From rule "sub-delims"
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ return XML_TRUE;
+
+ default:
+ return XML_FALSE;
+ }
+}
+
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
- // NOTE: While Expat does not validate namespace URIs against RFC 3986,
- // we have to at least make sure that the XML processor on top of
- // Expat (that is splitting tag names by namespace separator into
- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
- // by an attacker putting additional namespace separator characters
- // into namespace declarations. That would be ambiguous and not to
- // be expected.
- if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
+ // today (and is not REQUIRED to do so with regard to the XML 1.0
+ // namespaces specification) we have to at least make sure, that
+ // the application on top of Expat (that is likely splitting expanded
+ // element names ("qualified names") of form
+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+ // in its element handler code) cannot be confused by an attacker
+ // putting additional namespace separator characters into namespace
+ // declarations. That would be ambiguous and not to be expected.
+ //
+ // While the HTML API docs of function XML_ParserCreateNS have been
+ // advising against use of a namespace separator character that can
+ // appear in a URI for >20 years now, some widespread applications
+ // are using URI characters (':' (colon) in particular) for a
+ // namespace separator, in practice. To keep these applications
+ // functional, we only reject namespaces URIs containing the
+ // application-chosen namespace separator if the chosen separator
+ // is a non-URI character with regard to RFC 3986.
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+ && ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}