diff options
author | Sebastian Pipping <sebastian@pipping.org> | 2021-05-11 14:53:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-11 14:53:22 +0200 |
commit | 309cd4aa4b470a3e496a5d72014148dd8a583529 (patch) | |
tree | e6f79b145d532483d162ce9931fdfbf7e54be43f | |
parent | fb78bfdae55408af50eb0da558f288775fd92baf (diff) | |
parent | 3f2f8786623cc3e89a1f4384715b3ad178c5ee2c (diff) | |
download | libexpat-git-309cd4aa4b470a3e496a5d72014148dd8a583529.tar.gz |
Merge pull request #466 from libexpat/protect-against-billion-laughs-attacks
[CVE-2013-0340, CWE-776] Protect against billion laughs attacks (fixes #34)
-rw-r--r-- | .github/workflows/data/exported-symbols.txt | 2 | ||||
-rwxr-xr-x | .github/workflows/scripts/mass-cppcheck.sh | 1 | ||||
-rw-r--r-- | expat/CMakeLists.txt | 4 | ||||
-rw-r--r-- | expat/Changes | 34 | ||||
-rw-r--r-- | expat/configure.ac | 2 | ||||
-rw-r--r-- | expat/doc/reference.html | 99 | ||||
-rw-r--r-- | expat/doc/xmlwf.xml | 47 | ||||
-rw-r--r-- | expat/lib/expat.h | 21 | ||||
-rw-r--r-- | expat/lib/internal.h | 42 | ||||
-rw-r--r-- | expat/lib/libexpat.def | 3 | ||||
-rw-r--r-- | expat/lib/libexpatw.def | 3 | ||||
-rw-r--r-- | expat/lib/xmlparse.c | 1150 | ||||
-rw-r--r-- | expat/tests/runtests.c | 386 | ||||
-rw-r--r-- | expat/xmlwf/xmltchar.h | 6 | ||||
-rw-r--r-- | expat/xmlwf/xmlwf.c | 71 | ||||
-rwxr-xr-x | expat/xmlwf/xmlwf_helpgen.py | 8 |
16 files changed, 1817 insertions, 62 deletions
diff --git a/.github/workflows/data/exported-symbols.txt b/.github/workflows/data/exported-symbols.txt index 7d5983a2..8fa1cef2 100644 --- a/.github/workflows/data/exported-symbols.txt +++ b/.github/workflows/data/exported-symbols.txt @@ -29,6 +29,8 @@ XML_ParserReset XML_ResumeParser XML_SetAttlistDeclHandler XML_SetBase +XML_SetBillionLaughsAttackProtectionActivationThreshold +XML_SetBillionLaughsAttackProtectionMaximumAmplification XML_SetCdataSectionHandler XML_SetCharacterDataHandler XML_SetCommentHandler diff --git a/.github/workflows/scripts/mass-cppcheck.sh b/.github/workflows/scripts/mass-cppcheck.sh index 84ba45f9..582f8b36 100755 --- a/.github/workflows/scripts/mass-cppcheck.sh +++ b/.github/workflows/scripts/mass-cppcheck.sh @@ -44,6 +44,7 @@ cppcheck_args=( --error-exitcode=1 --force --suppress=objectIndex + --suppress=unknownMacro ) find_args=( diff --git a/expat/CMakeLists.txt b/expat/CMakeLists.txt index 951df0ed..96ac5daa 100644 --- a/expat/CMakeLists.txt +++ b/expat/CMakeLists.txt @@ -248,6 +248,10 @@ if(FLAG_VISIBILITY) add_definitions(-DXML_ENABLE_VISIBILITY=1) set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -fvisibility=hidden") endif() +if(MINGW) + # Without __USE_MINGW_ANSI_STDIO the compiler produces a false positive + set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -Wno-pedantic-ms-format") +endif() if (EXPAT_WARNINGS_AS_ERRORS) if(MSVC) add_definitions(/WX) diff --git a/expat/Changes b/expat/Changes index 69c39f37..770d3079 100644 --- a/expat/Changes +++ b/expat/Changes @@ -3,11 +3,40 @@ NOTE: We are looking for help with a few things: If you can help, please get in touch. Thanks! Release X.X.X XXX XXXXX XX XXXX + Security fixes: + #34 #466 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks + (denial-of-service; flavors targeting CPU time or RAM or both, + leveraging general entities or parameter entities or both) + by tracking and limiting the input amplification factor + (<amplification> := (<direct> + <indirect>) / <direct>). + By conservative default, amplification up to a factor of 100.0 + is tolerated and rejection only starts after 8 MiB of output bytes + (=<direct> + <indirect>) have been processed. + A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH signals + this condition. + Bug fixes: #332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake) or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault for UTF-16 payloads containing CDATA sections. + New features: + #34 #466 Add two new API functions to further tighten billion laughs + protection parameters when desired. + - XML_SetBillionLaughsAttackProtectionMaximumAmplification + - XML_SetBillionLaughsAttackProtectionActivationThreshold + Please see file "doc/reference.html" for more details. + If you ever need to increase the defaults for non-attack XML + payload, please file a bug report with libexpat. + #34 #466 Introduce environment switches EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) + and EXPAT_ENTITY_DEBUG=(0|1) for runtime debugging of accounting + and entity processing; specific behavior of these values may + change in the future. + #34 #466 xmlwf: Add arguments "-a FACTOR" and "-b BYTES" to further tighten + billion laughs protection parameters when desired. + If you ever need to increase the defaults for non-attack XML + payload, please file a bug report with libexpat. + Other changes: #457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters #458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR @@ -16,6 +45,11 @@ Release X.X.X XXX XXXXX XX XXXX Special thanks to: Dimitry Andric + Nick Wellnhofer + Yury Gribov + and + Clang LeakSan + JetBrains Release 2.3.0 Thu March 25 2021 Bug fixes: diff --git a/expat/configure.ac b/expat/configure.ac index f26ce6cc..7d60a2c2 100644 --- a/expat/configure.ac +++ b/expat/configure.ac @@ -111,7 +111,7 @@ AS_IF([test "$GCC" = yes], AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS]) - AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wmisleading-indentation], [AM_CFLAGS])]) + AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])]) AC_LANG_PUSH([C++]) AC_PROG_CXX diff --git a/expat/doc/reference.html b/expat/doc/reference.html index 82868d5d..8e77a10c 100644 --- a/expat/doc/reference.html +++ b/expat/doc/reference.html @@ -148,6 +148,13 @@ interface.</p> <li><a href="#XML_GetInputContext">XML_GetInputContext</a></li> </ul> </li> + <li> + <a href="#billion-laughs">Billion Laughs Attack Protection</a> + <ul> + <li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li> + <li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li> + </ul> + </li> <li><a href="#miscellaneous">Miscellaneous Functions</a> <ul> <li><a href="#XML_SetUserData">XML_SetUserData</a></li> @@ -2073,6 +2080,98 @@ parse position may be before the beginning of the buffer.</p> return NULL.</p> </div> +<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3> + +<p>The functions in this section configure the built-in + protection against various forms of + <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p> + +<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4> +<pre class="fcndec"> +/* Added in Expat 2.4.0. */ +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p, + float maximumAmplificationFactor); +</pre> +<div class="fcndef"> + <p> + Sets the maximum tolerated amplification factor + for protection against + <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> + (default: <code>100.0</code>) + of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and + returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error. + </p> + + The amplification factor is calculated as .. + <pre> + amplification := (direct + indirect) / direct + </pre> + .. while parsing, whereas + <code>direct</code> is the number of bytes read from the primary document in parsing and + <code>indirect</code> is the number of bytes added by expanding entities and reading of external DTD files, combined. + + <p>For a call to <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to succeed:</p> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li> + <li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li> + </ul> + + <p> + <strong>Note:</strong> + If you ever need to increase this value for non-attack payload, + please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. + </p> + + <p> + <strong>Note:</strong> + Peak amplifications + of factor 15,000 for the entire payload and + of factor 30,000 in the middle of parsing + have been observed with small benign files in practice. + + So if you do reduce the maximum allowed amplification, + please make sure that the activation threshold is still big enough + to not end up with undesired false positives (i.e. benign files being rejected). + </p> +</div> + +<h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</h4> +<pre class="fcndec"> +/* Added in Expat 2.4.0. */ +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, + unsigned long long activationThresholdBytes); +</pre> +<div class="fcndef"> + <p> + Sets number of output bytes (including amplification from entity expansion and reading DTD files) + needed to activate protection against + <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> + (default: <code>8 MiB</code>) + of parser <code>p</code> to <code>activationThresholdBytes</code>, and + returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error. + </p> + + <p>For a call to <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to succeed:</p> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li> + </ul> + + <p> + <strong>Note:</strong> + If you ever need to increase this value for non-attack payload, + please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. + </p> + + <p> + <strong>Note:</strong> + Activation thresholds below 4 MiB are known to break support for + <a href="https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> 1.3 payload + and are hence not recommended. + </p> +</div> + <h3><a name="miscellaneous">Miscellaneous functions</a></h3> <p>The functions in this section either obtain state information from diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml index 1105bff4..c79041bc 100644 --- a/expat/doc/xmlwf.xml +++ b/expat/doc/xmlwf.xml @@ -4,7 +4,7 @@ <!ENTITY dhfirstname "<firstname>Scott</firstname>"> <!ENTITY dhsurname "<surname>Bronson</surname>"> <!-- Please adjust the date whenever revising the manpage. --> - <!ENTITY dhdate "<date>April 25, 2021</date>"> + <!ENTITY dhdate "<date>May 4, 2021</date>"> <!-- SECTION should be 1-8, maybe w/ subsection other parameters are allowed: see man(7), man(1). --> <!ENTITY dhsection "<manvolnum>1</manvolnum>"> @@ -132,6 +132,50 @@ supports both. <variablelist> <varlistentry> + <term><option>-a</option> <replaceable>factor</replaceable></term> + <listitem> + <para> + Sets the maximum tolerated amplification factor + for protection against billion laughs attacks (default: 100.0). + The amplification factor is calculated as .. + </para> + <literallayout> + amplification := (direct + indirect) / direct + </literallayout> + <para> + .. while parsing, whereas + <direct> is the number of bytes read + from the primary document in parsing and + <indirect> is the number of bytes + added by expanding entities and reading of external DTD files, + combined. + </para> + <para> + <emphasis>NOTE</emphasis>: + If you ever need to increase this value for non-attack payload, + please file a bug report. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-b</option> <replaceable>bytes</replaceable></term> + <listitem> + <para> + Sets the number of output bytes (including amplification) + needed to activate protection against billion laughs attacks + (default: 8 MiB). + This can be thought of as an "activation threshold". + </para> + <para> + <emphasis>NOTE</emphasis>: + If you ever need to increase this value for non-attack payload, + please file a bug report. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-c</option></term> <listitem> <para> @@ -458,6 +502,7 @@ supports both. <literallayout> The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/ +Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack </literallayout> </para> diff --git a/expat/lib/expat.h b/expat/lib/expat.h index 13f540a1..30a064ad 100644 --- a/expat/lib/expat.h +++ b/expat/lib/expat.h @@ -124,7 +124,9 @@ enum XML_Error { /* Added in 2.2.1. */ XML_ERROR_INVALID_ARGUMENT, /* Added in 2.3.0. */ - XML_ERROR_NO_BUFFER + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ + XML_ERROR_AMPLIFICATION_LIMIT_BREACH }; enum XML_Content_Type { @@ -1006,7 +1008,10 @@ enum XML_FeatureEnum { XML_FEATURE_SIZEOF_XML_LCHAR, XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, - XML_FEATURE_ATTR_INFO + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT /* Additional features must be added to the end of this enum. */ }; @@ -1019,6 +1024,18 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); +#ifdef XML_DTD +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor); + +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +#endif + /* Expat follows the semantic versioning convention. See http://semver.org. */ diff --git a/expat/lib/internal.h b/expat/lib/internal.h index 66417ccf..444eba0f 100644 --- a/expat/lib/internal.h +++ b/expat/lib/internal.h @@ -105,10 +105,46 @@ # endif #endif +#include <limits.h> // ULONG_MAX + +#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO) +# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" +# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#else +# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" +# if ! defined(ULONG_MAX) +# error Compiler did not define ULONG_MAX for us +# elif ULONG_MAX == 18446744073709551615u // 2^64-1 +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#endif + #ifndef UNUSED_P # define UNUSED_P(p) (void)p #endif +/* NOTE BEGIN If you ever patch these defaults to greater values + for non-attack XML payload in your environment, + please file a bug report with libexpat. Thank you! +*/ +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ + 100.0f +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 +/* NOTE END */ + +#include "expat.h" // so we can use type XML_Parser below + #ifdef __cplusplus extern "C" { #endif @@ -116,6 +152,12 @@ extern "C" { void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef); +#if defined(XML_DTD) +unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); +unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); +const char *unsignedCharToPrintable(unsigned char c); +#endif + #ifdef __cplusplus } #endif diff --git a/expat/lib/libexpat.def b/expat/lib/libexpat.def index 5360c83d..5aebbd07 100644 --- a/expat/lib/libexpat.def +++ b/expat/lib/libexpat.def @@ -75,3 +75,6 @@ EXPORTS ; XML_GetAttributeInfo @66 XML_SetHashSalt @67 ; internal @68 removed with version 2.3.1 +; added with version 2.4.0 + XML_SetBillionLaughsAttackProtectionActivationThreshold @69 + XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 diff --git a/expat/lib/libexpatw.def b/expat/lib/libexpatw.def index 5360c83d..5aebbd07 100644 --- a/expat/lib/libexpatw.def +++ b/expat/lib/libexpatw.def @@ -75,3 +75,6 @@ EXPORTS ; XML_GetAttributeInfo @66 XML_SetHashSalt @67 ; internal @68 removed with version 2.3.1 +; added with version 2.4.0 + XML_SetBillionLaughsAttackProtectionActivationThreshold @69 + XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c index 97a4a4c1..a1aadd86 100644 --- a/expat/lib/xmlparse.c +++ b/expat/lib/xmlparse.c @@ -72,6 +72,7 @@ #include <stdio.h> /* fprintf */ #include <stdlib.h> /* getenv, rand_s */ #include <stdint.h> /* uintptr_t */ +#include <math.h> /* isnan */ #ifdef _WIN32 # define getpid GetCurrentProcessId @@ -396,6 +397,31 @@ typedef struct open_internal_entity { XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; +enum XML_Account { + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity + expansion */ + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ +}; + +#ifdef XML_DTD +typedef unsigned long long XmlBigCount; +typedef struct accounting { + XmlBigCount countBytesDirect; + XmlBigCount countBytesIndirect; + int debugLevel; + float maximumAmplificationFactor; // >=1.0 + unsigned long long activationThresholdBytes; +} ACCOUNTING; + +typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; + unsigned int maximumDepthSeen; + int debugLevel; +} ENTITY_STATS; +#endif /* XML_DTD */ + typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); @@ -426,13 +452,14 @@ static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, - XML_Bool haveMore, XML_Bool allowClosingDoctype); + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, - XML_Bool haveMore); + XML_Bool haveMore, enum XML_Account account); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); @@ -445,7 +472,8 @@ static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, static void freeBindings(XML_Parser parser, BINDING *bindings); static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr); + BINDING **bindingsPtr, + enum XML_Account account); static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); @@ -454,15 +482,18 @@ static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, XML_Parser parser); static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, - const char *, STRING_POOL *); + const char *, STRING_POOL *, + enum XML_Account account); static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, - const char *, STRING_POOL *); + const char *, STRING_POOL *, + enum XML_Account account); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end); + const char *start, const char *end, + enum XML_Account account); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, @@ -526,6 +557,34 @@ static XML_Parser parserCreate(const XML_Char *encodingName, static void parserInit(XML_Parser parser, const XML_Char *encodingName); +#ifdef XML_DTD +static float accountingGetCurrentAmplification(XML_Parser rootParser); +static void accountingReportStats(XML_Parser originParser, const char *epilog); +static void accountingOnAbort(XML_Parser originParser); +static void accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, + const char *before, const char *after, + ptrdiff_t bytesMore, int source_line, + enum XML_Account account); +static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, + const char *before, const char *after, + int source_line, + enum XML_Account account); + +static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, + const char *action, int sourceLine); +static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, + int sourceLine); +static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, + int sourceLine); + +static XML_Parser getRootParserOf(XML_Parser parser, + unsigned int *outLevelDiff); +#endif /* XML_DTD */ + +static unsigned long getDebugLevel(const char *variableName, + unsigned long defaultDebugLevel); + #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) @@ -639,6 +698,10 @@ struct XML_ParserStruct { enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; +#ifdef XML_DTD + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; +#endif }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) @@ -823,9 +886,8 @@ gather_time_entropy(void) { static unsigned long ENTROPY_DEBUG(const char *label, unsigned long entropy) { - const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG"); - if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) { - fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); } return entropy; @@ -1087,6 +1149,18 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif parser->m_hash_secret_salt = 0; + +#ifdef XML_DTD + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); + parser->m_accounting.maximumAmplificationFactor + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_accounting.activationThresholdBytes + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; + + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); +#endif } /* moves list of bindings to m_freeBindingList */ @@ -2361,6 +2435,10 @@ XML_ErrorString(enum XML_Error code) { case XML_ERROR_NO_BUFFER: return XML_L( "a successful prior call to function XML_GetBuffer is required"); + /* Added in 2.4.0. */ + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); } return NULL; } @@ -2397,41 +2475,75 @@ XML_ExpatVersionInfo(void) { const XML_Feature *XMLCALL XML_GetFeatureList(void) { - static const XML_Feature features[] - = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), - sizeof(XML_Char)}, - {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), - sizeof(XML_LChar)}, + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, #ifdef XML_UNICODE - {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T - {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD - {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES - {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), - XML_CONTEXT_BYTES}, + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE - {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, #endif #ifdef XML_NS - {XML_FEATURE_NS, XML_L("XML_NS"), 0}, + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif #ifdef XML_LARGE_SIZE - {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, #endif #ifdef XML_ATTR_INFO - {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif +#ifdef XML_DTD + /* Added in Expat 2.4.0. */ + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_BLAP_MAX_AMP"), + (long int) + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, #endif - {XML_FEATURE_END, NULL, 0}}; + {XML_FEATURE_END, NULL, 0}}; return features; } +#ifdef XML_DTD +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} +#endif /* XML_DTD */ + /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was processed, and not yet closed, we need to store tag->rawName in a more @@ -2484,9 +2596,9 @@ storeRawNames(XML_Parser parser) { static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { - enum XML_Error result - = doContent(parser, 0, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + enum XML_Error result = doContent( + parser, 0, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; @@ -2511,6 +2623,14 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start, int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif /* XML_DTD */ + /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to doContent (by detecting XML_TOK_NONE) without processing any xml text @@ -2548,6 +2668,10 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start, const char *next = start; /* XmlContentTok doesn't always set the last arg */ parser->m_eventPtr = start; tok = XmlContentTok(parser->m_encoding, start, end, &next); + /* Note: These bytes are accounted later in: + - processXmlDecl + - externalEntityContentProcessor + */ parser->m_eventEndPtr = next; switch (tok) { @@ -2589,7 +2713,8 @@ externalEntityContentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; @@ -2600,7 +2725,7 @@ externalEntityContentProcessor(XML_Parser parser, const char *start, static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *s, const char *end, const char **nextPtr, - XML_Bool haveMore) { + XML_Bool haveMore, enum XML_Account account) { /* save one level of indirection */ DTD *const dtd = parser->m_dtd; @@ -2618,6 +2743,17 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); +#ifdef XML_DTD + const char *accountAfter + = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) + ? (haveMore ? s /* i.e. 0 bytes */ : end) + : next; + if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, + account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: @@ -2673,6 +2809,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ if (parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); else if (parser->m_defaultHandler) @@ -2791,7 +2935,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } tag->name.str = (XML_Char *)tag->buf; *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); + result + = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); if (result) return result; if (parser->m_startElementHandler) @@ -2815,7 +2960,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, if (! name.str) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); + result = storeAtts(parser, enc, s, &name, &bindings, + XML_ACCOUNT_NONE /* token spans whole start tag */); if (result != XML_ERROR_NONE) { freeBindings(parser, bindings); return result; @@ -3079,7 +3225,8 @@ freeBindings(XML_Parser parser, BINDING *bindings) { */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr) { + TAG_NAME *tagNamePtr, BINDING **bindingsPtr, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ELEMENT_TYPE *elementType; int nDefaultAtts; @@ -3189,7 +3336,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, /* normalize the attribute value */ result = storeAttributeValue( parser, enc, isCdata, parser->m_atts[i].valuePtr, - parser->m_atts[i].valueEnd, &parser->m_tempPool); + parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; appAtts[attIndex] = poolStart(&parser->m_tempPool); @@ -3618,6 +3765,13 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, for (;;) { const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: @@ -3762,6 +3916,13 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: @@ -3846,6 +4007,15 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *versionend; const XML_Char *storedversion = NULL; int standalone = -1; + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, &version, &versionend, &encodingName, &newEncoding, &standalone)) { @@ -3995,6 +4165,10 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, for (;;) { tok = XmlPrologTok(parser->m_encoding, start, end, &next); + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: + - storeEntityValue + - processXmlDecl + */ parser->m_eventEndPtr = next; if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { @@ -4013,7 +4187,8 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, parser->m_encoding, s, end); + return storeEntityValue(parser, parser->m_encoding, s, end, + XML_ACCOUNT_DIRECT); } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); @@ -4040,6 +4215,14 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, */ else if (tok == XML_TOK_BOM && next == end && ! parser->m_parsingStatus.finalBuffer) { +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif + *nextPtr = next; return XML_ERROR_NONE; } @@ -4082,16 +4265,24 @@ externalParEntProcessor(XML_Parser parser, const char *s, const char *end, } /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. However, when parsing an external subset, doProlog will not accept a BOM - as valid, and report a syntax error, so we have to skip the BOM + as valid, and report a syntax error, so we have to skip the BOM, and + account for the BOM bytes. */ else if (tok == XML_TOK_BOM) { + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + s = next; tok = XmlPrologTok(parser->m_encoding, s, end, &next); } parser->m_processor = prologProcessor; return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error PTRCALL @@ -4104,6 +4295,9 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, for (;;) { tok = XmlPrologTok(enc, start, end, &next); + /* Note: These bytes are accounted later in: + - storeEntityValue + */ if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; @@ -4121,7 +4315,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); } start = next; } @@ -4135,13 +4329,14 @@ prologProcessor(XML_Parser parser, const char *s, const char *end, const char *next = s; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, XML_Bool haveMore, - XML_Bool allowClosingDoctype) { + XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ @@ -4168,6 +4363,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; +#ifndef XML_DTD + UNUSED_P(account); +#endif + /* save one level of indirection */ DTD *const dtd = parser->m_dtd; @@ -4232,6 +4431,19 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } } role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); +#ifdef XML_DTD + switch (role) { + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl + break; + default: + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + } +#endif switch (role) { case XML_ROLE_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 0, s, next); @@ -4507,7 +4719,8 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, const XML_Char *attVal; enum XML_Error result = storeAttributeValue( parser, enc, parser->m_declAttributeIsCdata, - s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool); + s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, + XML_ACCOUNT_NONE); if (result) return result; attVal = poolStart(&dtd->pool); @@ -4540,8 +4753,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { - enum XML_Error result = storeEntityValue( - parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + enum XML_Error result + = storeEntityValue(parser, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); if (parser->m_declEntity) { parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen @@ -4931,12 +5145,15 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; handleDefault = XML_FALSE; if (! dtd->paramEntityRead) { @@ -5134,6 +5351,13 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, for (;;) { const char *next = NULL; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif parser->m_eventEndPtr = next; switch (tok) { /* report partial linebreak - it might be the last token */ @@ -5207,6 +5431,9 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif entity->processed = 0; openEntity->next = parser->m_openInternalEntities; parser->m_openInternalEntities = openEntity; @@ -5225,17 +5452,22 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_FALSE); + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, - textStart, textEnd, &next, XML_FALSE); + textStart, textEnd, &next, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); parser->m_processor = internalEntityProcessor; } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif /* XML_DTD */ entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ @@ -5268,12 +5500,13 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_TRUE); + tok, next, &next, XML_FALSE, XML_TRUE, + XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding, textStart, textEnd, &next, - XML_FALSE); + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result != XML_ERROR_NONE) return result; @@ -5282,6 +5515,9 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, entity->processed = (int)(next - (const char *)entity->textPtr); return result; } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ @@ -5295,7 +5531,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, parser->m_processor = prologProcessor; tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } else #endif /* XML_DTD */ { @@ -5303,7 +5540,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, /* see externalEntityContentProcessor vs contentProcessor */ return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); } } @@ -5318,9 +5556,10 @@ errorProcessor(XML_Parser parser, const char *s, const char *end, static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, STRING_POOL *pool) { + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { enum XML_Error result - = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); + = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) @@ -5332,11 +5571,23 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, STRING_POOL *pool) { + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +#ifndef XML_DTD + UNUSED_P(account); +#endif + for (;;) { - const char *next; + const char *next + = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; @@ -5396,6 +5647,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; @@ -5473,9 +5732,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, (const char *)entity->textPtr, - (const char *)textEnd, pool); + (const char *)textEnd, pool, + XML_ACCOUNT_ENTITY_EXPANSION); +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif entity->open = XML_FALSE; if (result) return result; @@ -5505,13 +5771,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, - const char *entityTextPtr, const char *entityTextEnd) { + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; #ifdef XML_DTD int oldInEntityValue = parser->m_prologState.inEntityValue; parser->m_prologState.inEntityValue = 1; +#else + UNUSED_P(account); #endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we @@ -5522,8 +5791,19 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, } for (;;) { - const char *next; + const char *next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, + account)) { + accountingOnAbort(parser); + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + goto endEntityValue; + } +#endif + switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD @@ -5559,13 +5839,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; goto endEntityValue; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (! dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; @@ -5573,9 +5856,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, dtd->keepProcessing = dtd->standalone; } else { entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); result = storeEntityValue( parser, parser->m_internalEncoding, (const char *)entity->textPtr, - (const char *)(entity->textPtr + entity->textLen)); + (const char *)(entity->textPtr + entity->textLen), + XML_ACCOUNT_ENTITY_EXPANSION); + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (result) goto endEntityValue; @@ -6936,3 +7222,755 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { memcpy(result, s, charsRequired * sizeof(XML_Char)); return result; } + +#ifdef XML_DTD + +static float +accountingGetCurrentAmplification(XML_Parser rootParser) { + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) + : 1.0f; + assert(! rootParser->m_parentParser); + return amplificationFactor; +} + +static void +accountingReportStats(XML_Parser originParser, const char *epilog) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + if (rootParser->m_accounting.debugLevel < 1) { + return; + } + + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + fprintf(stderr, + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( + "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, + rootParser->m_accounting.countBytesIndirect, + (double)amplificationFactor, epilog); +} + +static void +accountingOnAbort(XML_Parser originParser) { + accountingReportStats(originParser, " ABORTING\n"); +} + +static void +accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, const char *before, + const char *after, ptrdiff_t bytesMore, int source_line, + enum XML_Account account) { + assert(! rootParser->m_parentParser); + + fprintf(stderr, + " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", + bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", + levelsAwayFromRootParser, source_line, 10, ""); + + const char ellipis[] = "[..]"; + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; + const unsigned int contextLength = 10; + + /* Note: Performance is of no concern here */ + const char *walker = before; + if ((rootParser->m_accounting.debugLevel >= 3) + || (after - before) + <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } else { + for (; walker < before + contextLength; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + fprintf(stderr, ellipis); + walker = after - contextLength; + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } + fprintf(stderr, "\"\n"); +} + +static XML_Bool +accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, + const char *after, int source_line, + enum XML_Account account) { + /* Note: We need to check the token type *first* to be sure that + * we can even access variable <after>, safely. + * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ + switch (tok) { + case XML_TOK_INVALID: + case XML_TOK_PARTIAL: + case XML_TOK_PARTIAL_CHAR: + case XML_TOK_NONE: + return XML_TRUE; + } + + if (account == XML_ACCOUNT_NONE) + return XML_TRUE; /* because these bytes have been accounted for, already */ + + unsigned int levelsAwayFromRootParser; + const XML_Parser rootParser + = getRootParserOf(originParser, &levelsAwayFromRootParser); + assert(! rootParser->m_parentParser); + + const int isDirect + = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); + const ptrdiff_t bytesMore = after - before; + + XmlBigCount *const additionTarget + = isDirect ? &rootParser->m_accounting.countBytesDirect + : &rootParser->m_accounting.countBytesIndirect; + + /* Detect and avoid integer overflow */ + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) + return XML_FALSE; + *additionTarget += bytesMore; + + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + const XML_Bool tolerated + = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) + || (amplificationFactor + <= rootParser->m_accounting.maximumAmplificationFactor); + + if (rootParser->m_accounting.debugLevel >= 2) { + accountingReportStats(rootParser, ""); + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, + bytesMore, source_line, account); + } + + return tolerated; +} + +unsigned long long +testingAccountingGetCountBytesDirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesDirect; +} + +unsigned long long +testingAccountingGetCountBytesIndirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesIndirect; +} + +static void +entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, + const char *action, int sourceLine) { + assert(! rootParser->m_parentParser); + if (rootParser->m_entity_stats.debugLevel < 1) + return; + +# if defined(XML_UNICODE) + const char *const entityName = "[..]"; +# else + const char *const entityName = entity->name; +# endif + + fprintf( + stderr, + "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_entity_stats.countEverOpened, + rootParser->m_entity_stats.currentDepth, + rootParser->m_entity_stats.maximumDepthSeen, + (rootParser->m_entity_stats.currentDepth - 1) * 2, "", + entity->is_param ? "%" : "&", entityName, action, entity->textLen, + sourceLine); +} + +static void +entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + rootParser->m_entity_stats.countEverOpened++; + rootParser->m_entity_stats.currentDepth++; + if (rootParser->m_entity_stats.currentDepth + > rootParser->m_entity_stats.maximumDepthSeen) { + rootParser->m_entity_stats.maximumDepthSeen++; + } + + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); +} + +static void +entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); + rootParser->m_entity_stats.currentDepth--; +} + +static XML_Parser +getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { + XML_Parser rootParser = parser; + unsigned int stepsTakenUpwards = 0; + while (rootParser->m_parentParser) { + rootParser = rootParser->m_parentParser; + stepsTakenUpwards++; + } + assert(! rootParser->m_parentParser); + if (outLevelDiff != NULL) { + *outLevelDiff = stepsTakenUpwards; + } + return rootParser; +} + +const char * +unsignedCharToPrintable(unsigned char c) { + switch (c) { + case 0: + return "\\0"; + case 1: + return "\\x1"; + case 2: + return "\\x2"; + case 3: + return "\\x3"; + case 4: + return "\\x4"; + case 5: + return "\\x5"; + case 6: + return "\\x6"; + case 7: + return "\\x7"; + case 8: + return "\\x8"; + case 9: + return "\\t"; + case 10: + return "\\n"; + case 11: + return "\\xB"; + case 12: + return "\\xC"; + case 13: + return "\\r"; + case 14: + return "\\xE"; + case 15: + return "\\xF"; + case 16: + return "\\x10"; + case 17: + return "\\x11"; + case 18: + return "\\x12"; + case 19: + return "\\x13"; + case 20: + return "\\x14"; + case 21: + return "\\x15"; + case 22: + return "\\x16"; + case 23: + return "\\x17"; + case 24: + return "\\x18"; + case 25: + return "\\x19"; + case 26: + return "\\x1A"; + case 27: + return "\\x1B"; + case 28: + return "\\x1C"; + case 29: + return "\\x1D"; + case 30: + return "\\x1E"; + case 31: + return "\\x1F"; + case 32: + return " "; + case 33: + return "!"; + case 34: + return "\\\""; + case 35: + return "#"; + case 36: + return "$"; + case 37: + return "%"; + case 38: + return "&"; + case 39: + return "'"; + case 40: + return "("; + case 41: + return ")"; + case 42: + return "*"; + case 43: + return "+"; + case 44: + return ","; + case 45: + return "-"; + case 46: + return "."; + case 47: + return "/"; + case 48: + return "0"; + case 49: + return "1"; + case 50: + return "2"; + case 51: + return "3"; + case 52: + return "4"; + case 53: + return "5"; + case 54: + return "6"; + case 55: + return "7"; + case 56: + return "8"; + case 57: + return "9"; + case 58: + return ":"; + case 59: + return ";"; + case 60: + return "<"; + case 61: + return "="; + case 62: + return ">"; + case 63: + return "?"; + case 64: + return "@"; + case 65: + return "A"; + case 66: + return "B"; + case 67: + return "C"; + case 68: + return "D"; + case 69: + return "E"; + case 70: + return "F"; + case 71: + return "G"; + case 72: + return "H"; + case 73: + return "I"; + case 74: + return "J"; + case 75: + return "K"; + case 76: + return "L"; + case 77: + return "M"; + case 78: + return "N"; + case 79: + return "O"; + case 80: + return "P"; + case 81: + return "Q"; + case 82: + return "R"; + case 83: + return "S"; + case 84: + return "T"; + case 85: + return "U"; + case 86: + return "V"; + case 87: + return "W"; + case 88: + return "X"; + case 89: + return "Y"; + case 90: + return "Z"; + case 91: + return "["; + case 92: + return "\\\\"; + case 93: + return "]"; + case 94: + return "^"; + case 95: + return "_"; + case 96: + return "`"; + case 97: + return "a"; + case 98: + return "b"; + case 99: + return "c"; + case 100: + return "d"; + case 101: + return "e"; + case 102: + return "f"; + case 103: + return "g"; + case 104: + return "h"; + case 105: + return "i"; + case 106: + return "j"; + case 107: + return "k"; + case 108: + return "l"; + case 109: + return "m"; + case 110: + return "n"; + case 111: + return "o"; + case 112: + return "p"; + case 113: + return "q"; + case 114: + return "r"; + case 115: + return "s"; + case 116: + return "t"; + case 117: + return "u"; + case 118: + return "v"; + case 119: + return "w"; + case 120: + return "x"; + case 121: + return "y"; + case 122: + return "z"; + case 123: + return "{"; + case 124: + return "|"; + case 125: + return "}"; + case 126: + return "~"; + case 127: + return "\\x7F"; + case 128: + return "\\x80"; + case 129: + return "\\x81"; + case 130: + return "\\x82"; + case 131: + return "\\x83"; + case 132: + return "\\x84"; + case 133: + return "\\x85"; + case 134: + return "\\x86"; + case 135: + return "\\x87"; + case 136: + return "\\x88"; + case 137: + return "\\x89"; + case 138: + return "\\x8A"; + case 139: + return "\\x8B"; + case 140: + return "\\x8C"; + case 141: + return "\\x8D"; + case 142: + return "\\x8E"; + case 143: + return "\\x8F"; + case 144: + return "\\x90"; + case 145: + return "\\x91"; + case 146: + return "\\x92"; + case 147: + return "\\x93"; + case 148: + return "\\x94"; + case 149: + return "\\x95"; + case 150: + return "\\x96"; + case 151: + return "\\x97"; + case 152: + return "\\x98"; + case 153: + return "\\x99"; + case 154: + return "\\x9A"; + case 155: + return "\\x9B"; + case 156: + return "\\x9C"; + case 157: + return "\\x9D"; + case 158: + return "\\x9E"; + case 159: + return "\\x9F"; + case 160: + return "\\xA0"; + case 161: + return "\\xA1"; + case 162: + return "\\xA2"; + case 163: + return "\\xA3"; + case 164: + return "\\xA4"; + case 165: + return "\\xA5"; + case 166: + return "\\xA6"; + case 167: + return "\\xA7"; + case 168: + return "\\xA8"; + case 169: + return "\\xA9"; + case 170: + return "\\xAA"; + case 171: + return "\\xAB"; + case 172: + return "\\xAC"; + case 173: + return "\\xAD"; + case 174: + return "\\xAE"; + case 175: + return "\\xAF"; + case 176: + return "\\xB0"; + case 177: + return "\\xB1"; + case 178: + return "\\xB2"; + case 179: + return "\\xB3"; + case 180: + return "\\xB4"; + case 181: + return "\\xB5"; + case 182: + return "\\xB6"; + case 183: + return "\\xB7"; + case 184: + return "\\xB8"; + case 185: + return "\\xB9"; + case 186: + return "\\xBA"; + case 187: + return "\\xBB"; + case 188: + return "\\xBC"; + case 189: + return "\\xBD"; + case 190: + return "\\xBE"; + case 191: + return "\\xBF"; + case 192: + return "\\xC0"; + case 193: + return "\\xC1"; + case 194: + return "\\xC2"; + case 195: + return "\\xC3"; + case 196: + return "\\xC4"; + case 197: + return "\\xC5"; + case 198: + return "\\xC6"; + case 199: + return "\\xC7"; + case 200: + return "\\xC8"; + case 201: + return "\\xC9"; + case 202: + return "\\xCA"; + case 203: + return "\\xCB"; + case 204: + return "\\xCC"; + case 205: + return "\\xCD"; + case 206: + return "\\xCE"; + case 207: + return "\\xCF"; + case 208: + return "\\xD0"; + case 209: + return "\\xD1"; + case 210: + return "\\xD2"; + case 211: + return "\\xD3"; + case 212: + return "\\xD4"; + case 213: + return "\\xD5"; + case 214: + return "\\xD6"; + case 215: + return "\\xD7"; + case 216: + return "\\xD8"; + case 217: + return "\\xD9"; + case 218: + return "\\xDA"; + case 219: + return "\\xDB"; + case 220: + return "\\xDC"; + case 221: + return "\\xDD"; + case 222: + return "\\xDE"; + case 223: + return "\\xDF"; + case 224: + return "\\xE0"; + case 225: + return "\\xE1"; + case 226: + return "\\xE2"; + case 227: + return "\\xE3"; + case 228: + return "\\xE4"; + case 229: + return "\\xE5"; + case 230: + return "\\xE6"; + case 231: + return "\\xE7"; + case 232: + return "\\xE8"; + case 233: + return "\\xE9"; + case 234: + return "\\xEA"; + case 235: + return "\\xEB"; + case 236: + return "\\xEC"; + case 237: + return "\\xED"; + case 238: + return "\\xEE"; + case 239: + return "\\xEF"; + case 240: + return "\\xF0"; + case 241: + return "\\xF1"; + case 242: + return "\\xF2"; + case 243: + return "\\xF3"; + case 244: + return "\\xF4"; + case 245: + return "\\xF5"; + case 246: + return "\\xF6"; + case 247: + return "\\xF7"; + case 248: + return "\\xF8"; + case 249: + return "\\xF9"; + case 250: + return "\\xFA"; + case 251: + return "\\xFB"; + case 252: + return "\\xFC"; + case 253: + return "\\xFD"; + case 254: + return "\\xFE"; + case 255: + return "\\xFF"; + default: + assert(0); /* never gets here */ + return "dead code"; + } + assert(0); /* never gets here */ +} + +#endif /* XML_DTD */ + +static unsigned long +getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { + const char *const valueOrNull = getenv(variableName); + if (valueOrNull == NULL) { + return defaultDebugLevel; + } + const char *const value = valueOrNull; + + errno = 0; + char *afterValue = (char *)value; + unsigned long debugLevel = strtoul(value, &afterValue, 10); + if ((errno != 0) || (afterValue[0] != '\0')) { + errno = 0; + return defaultDebugLevel; + } + + return debugLevel; +} diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index 40fdfb43..0e2b49fa 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -53,6 +53,7 @@ #include <ctype.h> #include <limits.h> #include <stdint.h> /* intptr_t uint64_t */ +#include <math.h> /* NAN, INFINITY, isnan */ #if ! defined(__cplusplus) # include <stdbool.h> @@ -61,7 +62,7 @@ #include "expat.h" #include "chardata.h" #include "structdata.h" -#include "internal.h" /* for UNUSED_P only */ +#include "internal.h" #include "minicheck.h" #include "memcheck.h" #include "siphash.h" @@ -11225,6 +11226,379 @@ START_TEST(test_nsalloc_prefixed_element) { } END_TEST +#if defined(XML_DTD) +typedef enum XML_Status (*XmlParseFunction)(XML_Parser, const char *, int, int); + +struct AccountingTestCase { + const char *primaryText; + const char *firstExternalText; /* often NULL */ + const char *secondExternalText; /* often NULL */ + const unsigned long long expectedCountBytesIndirectExtra; + XML_Bool singleBytesWanted; +}; + +static int +accounting_external_entity_ref_handler(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) { + UNUSED_P(context); + UNUSED_P(base); + UNUSED_P(publicId); + + const struct AccountingTestCase *const testCase + = (const struct AccountingTestCase *)XML_GetUserData(parser); + + const char *externalText = NULL; + if (xcstrcmp(systemId, XCS("first.ent")) == 0) { + externalText = testCase->firstExternalText; + } else if (xcstrcmp(systemId, XCS("second.ent")) == 0) { + externalText = testCase->secondExternalText; + } else { + assert(! "systemId is neither \"first.ent\" nor \"second.ent\""); + } + assert(externalText); + + XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); + assert(entParser); + + const XmlParseFunction xmlParseFunction + = testCase->singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; + + const enum XML_Status status = xmlParseFunction( + entParser, externalText, (int)strlen(externalText), XML_TRUE); + + XML_ParserFree(entParser); + return status; +} + +START_TEST(test_accounting_precision) { + const XML_Bool filled_later = XML_TRUE; /* value is arbitrary */ + struct AccountingTestCase cases[] = { + {"<e/>", NULL, NULL, 0, 0}, + {"<e></e>", NULL, NULL, 0, 0}, + + /* Attributes */ + {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0, filled_later}, + {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0, 0}, + {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0, + filled_later}, + {"<e k=\"&'><"\" />", NULL, NULL, + sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, + {"<e1 xmlns='https://example.org/'>\n" + " <e2 xmlns=''/>\n" + "</e1>", + NULL, NULL, 0, filled_later}, + + /* Text */ + {"<e>text</e>", NULL, NULL, 0, filled_later}, + {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0, filled_later}, + {"<e>&'><"</e>", NULL, NULL, + sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, + {"<e>A)</e>", NULL, NULL, 0, filled_later}, + + /* Prolog */ + {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0, filled_later}, + + /* Whitespace */ + {" <e1> <e2> </e2> </e1> ", NULL, NULL, 0, filled_later}, + {"<e1 ><e2 /></e1 >", NULL, NULL, 0, filled_later}, + {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0, filled_later}, + + /* Comments */ + {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0, filled_later}, + + /* Processing instructions */ + {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>", + NULL, NULL, 0, filled_later}, + {"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>", + "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>", + 0, filled_later}, + + /* CDATA */ + {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later}, + + /* Conditional sections */ + {"<!DOCTYPE r [\n" + "<!ENTITY % draft 'INCLUDE'>\n" + "<!ENTITY % final 'IGNORE'>\n" + "<!ENTITY % import SYSTEM \"first.ent\">\n" + "%import;\n" + "]>\n" + "<r/>\n", + "<![%draft;[<!--1-->]]>\n" + "<![%final;[<!--22-->]]>", + NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")), + filled_later}, + + /* General entities */ + {"<!DOCTYPE root [\n" + "<!ENTITY nine \"123456789\">\n" + "]>\n" + "<root>&nine;</root>", + NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, + {"<!DOCTYPE root [\n" + "<!ENTITY nine \"123456789\">\n" + "]>\n" + "<root k1=\"&nine;\"/>", + NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, + {"<!DOCTYPE root [\n" + "<!ENTITY nine \"123456789\">\n" + "<!ENTITY nine2 \"&nine;&nine;\">\n" + "]>\n" + "<root>&nine2;&nine2;&nine2;</root>", + NULL, NULL, + sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */ + * (strlen("&nine;") + strlen("123456789")), + filled_later}, + {"<!DOCTYPE r [\n" + " <!ENTITY five SYSTEM 'first.ent'>\n" + "]>\n" + "<r>&five;</r>", + "12345", NULL, 0, filled_later}, + + /* Parameter entities */ + {"<!DOCTYPE r [\n" + "<!ENTITY % comment \"<!---->\">\n" + "%comment;\n" + "]>\n" + "<r/>", + NULL, NULL, sizeof(XML_Char) * strlen("<!---->"), filled_later}, + {"<!DOCTYPE r [\n" + "<!ENTITY % ninedef \"<!ENTITY nine "123456789">\">\n" + "%ninedef;\n" + "]>\n" + "<r>&nine;</r>", + NULL, NULL, + sizeof(XML_Char) + * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789")), + filled_later}, + {"<!DOCTYPE r [\n" + "<!ENTITY % comment \"<!--1-->\">\n" + "<!ENTITY % comment2 \"%comment;<!--22-->%comment;\">\n" + "%comment2;\n" + "]>\n" + "<r/>\n", + NULL, NULL, + sizeof(XML_Char) + * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->")), + filled_later}, + {"<!DOCTYPE r [\n" + " <!ENTITY % five \"12345\">\n" + " <!ENTITY % five2def \"<!ENTITY five2 "[%five;][%five;]]]]">\">\n" + " %five2def;\n" + "]>\n" + "<r>&five2;</r>", + NULL, NULL, /* from "%five2def;": */ + sizeof(XML_Char) + * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">") + + 2 /* calls to "%five;" */ * strlen("12345") + + /* from "&five2;": */ strlen("[12345][12345]]]]")), + filled_later}, + {"<!DOCTYPE r SYSTEM \"first.ent\">\n" + "<r/>", + "<!ENTITY % comment '<!--1-->'>\n" + "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n" + "%comment2;", + NULL, + sizeof(XML_Char) + * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->") + + 2 /* calls to "%comment;" */ * strlen("<!---->")), + filled_later}, + {"<!DOCTYPE r SYSTEM 'first.ent'>\n" + "<r/>", + "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n" + "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n" + "%e2;\n", + "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->"), + filled_later}, + { + "<!DOCTYPE r SYSTEM 'first.ent'>\n" + "<r/>", + "<!ENTITY % e1 SYSTEM 'second.ent'>\n" + "<!ENTITY % e2 '%e1;'>", + "<?xml version='1.0' encoding='utf-8'?>\n" + "hello\n" + "xml" /* without trailing newline! */, + 0, + filled_later, + }, + { + "<!DOCTYPE r SYSTEM 'first.ent'>\n" + "<r/>", + "<!ENTITY % e1 SYSTEM 'second.ent'>\n" + "<!ENTITY % e2 '%e1;'>", + "<?xml version='1.0' encoding='utf-8'?>\n" + "hello\n" + "xml\n" /* with trailing newline! */, + 0, + filled_later, + }, + {"<!DOCTYPE doc SYSTEM 'first.ent'>\n" + "<doc></doc>\n", + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM 'second.ent'>\n" + "<!ENTITY % e2 '%e1;'>\n" + "%e1;\n", + "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */, + strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later}, + {"<!DOCTYPE r [\n" + " <!ENTITY five SYSTEM 'first.ent'>\n" + "]>\n" + "<r>&five;</r>", + "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later}, + }; + + const size_t countCases = sizeof(cases) / sizeof(cases[0]); + size_t u = 0; + for (; u < countCases; u++) { + size_t v = 0; + for (; v < 2; v++) { + const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE; + const unsigned long long expectedCountBytesDirect + = strlen(cases[u].primaryText); + const unsigned long long expectedCountBytesIndirect + = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) + : 0) + + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText) + : 0) + + cases[u].expectedCountBytesIndirectExtra; + + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + if (cases[u].firstExternalText) { + XML_SetExternalEntityRefHandler(parser, + accounting_external_entity_ref_handler); + XML_SetUserData(parser, (void *)&cases[u]); + cases[u].singleBytesWanted = singleBytesWanted; + } + + const XmlParseFunction xmlParseFunction + = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; + + enum XML_Status status + = xmlParseFunction(parser, cases[u].primaryText, + (int)strlen(cases[u].primaryText), XML_TRUE); + if (status != XML_STATUS_OK) { + _xml_failure(parser, __FILE__, __LINE__); + } + + const unsigned long long actualCountBytesDirect + = testingAccountingGetCountBytesDirect(parser); + const unsigned long long actualCountBytesIndirect + = testingAccountingGetCountBytesIndirect(parser); + + XML_ParserFree(parser); + + if (actualCountBytesDirect != expectedCountBytesDirect) { + fprintf( + stderr, + "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( + "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n", + u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", + expectedCountBytesDirect, actualCountBytesDirect); + fail("Count of direct bytes is off"); + } + + if (actualCountBytesIndirect != expectedCountBytesIndirect) { + fprintf( + stderr, + "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( + "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n", + u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", + expectedCountBytesIndirect, actualCountBytesIndirect); + fail("Count of indirect bytes is off"); + } + } + } +} +END_TEST + +START_TEST(test_billion_laughs_attack_protection_api) { + XML_Parser parserWithoutParent = XML_ParserCreate(NULL); + XML_Parser parserWithParent + = XML_ExternalEntityParserCreate(parserWithoutParent, NULL, NULL); + if (parserWithoutParent == NULL) + fail("parserWithoutParent is NULL"); + if (parserWithParent == NULL) + fail("parserWithParent is NULL"); + + // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f) + == XML_TRUE) + fail("Call with NULL parser is NOT supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent, + 123.0f) + == XML_TRUE) + fail("Call with non-root parser is NOT supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, NAN) + == XML_TRUE) + fail("Call with NaN limit is NOT supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, -1.0f) + == XML_TRUE) + fail("Call with negative limit is NOT supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, 0.9f) + == XML_TRUE) + fail("Call with positive limit <1.0 is NOT supposed to succeed"); + + // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, 1.0f) + == XML_FALSE) + fail("Call with positive limit >=1.0 is supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, 123456.789f) + == XML_FALSE) + fail("Call with positive limit >=1.0 is supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parserWithoutParent, INFINITY) + == XML_FALSE) + fail("Call with positive limit >=1.0 is supposed to succeed"); + + // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases + if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123) + == XML_TRUE) + fail("Call with NULL parser is NOT supposed to succeed"); + if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent, + 123) + == XML_TRUE) + fail("Call with non-root parser is NOT supposed to succeed"); + + // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases + if (XML_SetBillionLaughsAttackProtectionActivationThreshold( + parserWithoutParent, 123) + == XML_FALSE) + fail("Call with non-NULL parentless parser is supposed to succeed"); + + XML_ParserFree(parserWithParent); + XML_ParserFree(parserWithoutParent); +} +END_TEST + +START_TEST(test_helper_unsigned_char_to_printable) { + // Smoke test + unsigned char uc = 0; + for (; uc < (unsigned char)-1; uc++) { + const char *const printable = unsignedCharToPrintable(uc); + if (printable == NULL) + fail("unsignedCharToPrintable returned NULL"); + if (strlen(printable) < (size_t)1) + fail("unsignedCharToPrintable returned empty string"); + } + + // Two concrete samples + if (strcmp(unsignedCharToPrintable('A'), "A") != 0) + fail("unsignedCharToPrintable result mistaken"); + if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0) + fail("unsignedCharToPrintable result mistaken"); +} +END_TEST +#endif // defined(XML_DTD) + static Suite * make_suite(void) { Suite *s = suite_create("basic"); @@ -11233,6 +11607,9 @@ make_suite(void) { TCase *tc_misc = tcase_create("miscellaneous tests"); TCase *tc_alloc = tcase_create("allocation tests"); TCase *tc_nsalloc = tcase_create("namespace allocation tests"); +#if defined(XML_DTD) + TCase *tc_accounting = tcase_create("accounting tests"); +#endif suite_add_tcase(s, tc_basic); tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); @@ -11593,6 +11970,13 @@ make_suite(void) { tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); +#if defined(XML_DTD) + suite_add_tcase(s, tc_accounting); + tcase_add_test(tc_accounting, test_accounting_precision); + tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); + tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); +#endif + return s; } diff --git a/expat/xmlwf/xmltchar.h b/expat/xmlwf/xmltchar.h index 4843fbe7..30283d08 100644 --- a/expat/xmlwf/xmltchar.h +++ b/expat/xmlwf/xmltchar.h @@ -7,7 +7,7 @@ |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Licensed under the MIT license: @@ -55,6 +55,8 @@ # define tmain wmain # define tremove _wremove # define tchar wchar_t +# define tcstof wcstof +# define tcstoull wcstoull #else /* not XML_UNICODE */ # define T(x) x # define ftprintf fprintf @@ -72,4 +74,6 @@ # define tmain main # define tremove remove # define tchar char +# define tcstof strtof +# define tcstoull strtoull #endif /* not XML_UNICODE */ diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index 4242e1c7..342d6c59 100644 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -39,11 +39,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include <expat_config.h> + #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stddef.h> #include <string.h> +#include <math.h> /* for isnan */ +#include <errno.h> #include "expat.h" #include "codepage.h" @@ -903,6 +907,12 @@ usage(const XML_Char *prog, int rc) { T(" -t write no XML output for [t]iming of plain parsing\n") T(" -N enable adding doctype and [n]otation declarations\n") T("\n") + T("billion laughs attack protection:\n") + T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n") + T("\n") + T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") + T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") + T("\n") T("info arguments:\n") T(" -h show this [h]elp message and exit\n") T(" -v show program's [v]ersion number and exit\n") @@ -951,6 +961,11 @@ tmain(int argc, XML_Char **argv) { int requireStandalone = 0; int requiresNotations = 0; int continueOnError = 0; + + float attackMaximumAmplification = -1.0f; /* signaling "not set" */ + unsigned long long attackThresholdBytes; + XML_Bool attackThresholdGiven = XML_FALSE; + int exitCode = XMLWF_EXIT_SUCCESS; enum XML_ParamEntityParsing paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; @@ -1030,6 +1045,49 @@ tmain(int argc, XML_Char **argv) { continueOnError = 1; j++; break; + case T('a'): { + const XML_Char *valueText = NULL; + XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); + + errno = 0; + XML_Char *afterValueText = (XML_Char *)valueText; + attackMaximumAmplification = tcstof(valueText, &afterValueText); + if ((errno != 0) || (afterValueText[0] != T('\0')) + || isnan(attackMaximumAmplification) + || (attackMaximumAmplification < 1.0f)) { + // This prevents tperror(..) from reporting misleading "[..]: Success" + errno = ERANGE; + tperror(T("invalid amplification limit") T( + " (needs a floating point number greater or equal than 1.0)")); + exit(XMLWF_EXIT_USAGE_ERROR); + } +#ifndef XML_DTD + ftprintf(stderr, T("Warning: Given amplification limit ignored") T( + ", xmlwf has been compiled without DTD support.\n")); +#endif + break; + } + case T('b'): { + const XML_Char *valueText = NULL; + XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); + + errno = 0; + XML_Char *afterValueText = (XML_Char *)valueText; + attackThresholdBytes = tcstoull(valueText, &afterValueText, 10); + if ((errno != 0) || (afterValueText[0] != T('\0'))) { + // This prevents tperror(..) from reporting misleading "[..]: Success" + errno = ERANGE; + tperror(T("invalid ignore threshold") + T(" (needs an integer from 0 to 2^64-1)")); + exit(XMLWF_EXIT_USAGE_ERROR); + } + attackThresholdGiven = XML_TRUE; +#ifndef XML_DTD + ftprintf(stderr, T("Warning: Given attack threshold ignored") T( + ", xmlwf has been compiled without DTD support.\n")); +#endif + break; + } case T('\0'): if (j > 1) { i++; @@ -1060,6 +1118,19 @@ tmain(int argc, XML_Char **argv) { exit(XMLWF_EXIT_INTERNAL_ERROR); } + if (attackMaximumAmplification != -1.0f) { +#ifdef XML_DTD + XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parser, attackMaximumAmplification); +#endif + } + if (attackThresholdGiven) { +#ifdef XML_DTD + XML_SetBillionLaughsAttackProtectionActivationThreshold( + parser, attackThresholdBytes); +#endif + } + if (requireStandalone) XML_SetNotStandaloneHandler(parser, notStandalone); XML_SetParamEntityParsing(parser, paramEntityParsing); diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py index 8ec8d4ea..c2a527fd 100755 --- a/expat/xmlwf/xmlwf_helpgen.py +++ b/expat/xmlwf/xmlwf_helpgen.py @@ -73,6 +73,14 @@ output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') +billion_laughs = parser.add_argument_group('billion laughs attack protection', + description='NOTE: ' + 'If you ever need to increase these values ' + 'for non-attack payload, please file a bug report.') +billion_laughs.add_argument('-a', metavar='FACTOR', + help='set maximum tolerated [a]mplification factor (default: 100.0)') +billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') + parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') info = parser.add_argument_group('info arguments') |