summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2021-05-11 14:53:22 +0200
committerGitHub <noreply@github.com>2021-05-11 14:53:22 +0200
commit309cd4aa4b470a3e496a5d72014148dd8a583529 (patch)
treee6f79b145d532483d162ce9931fdfbf7e54be43f
parentfb78bfdae55408af50eb0da558f288775fd92baf (diff)
parent3f2f8786623cc3e89a1f4384715b3ad178c5ee2c (diff)
downloadlibexpat-git-309cd4aa4b470a3e496a5d72014148dd8a583529.tar.gz
Merge pull request #466 from libexpat/protect-against-billion-laughs-attacks
[CVE-2013-0340, CWE-776] Protect against billion laughs attacks (fixes #34)
-rw-r--r--.github/workflows/data/exported-symbols.txt2
-rwxr-xr-x.github/workflows/scripts/mass-cppcheck.sh1
-rw-r--r--expat/CMakeLists.txt4
-rw-r--r--expat/Changes34
-rw-r--r--expat/configure.ac2
-rw-r--r--expat/doc/reference.html99
-rw-r--r--expat/doc/xmlwf.xml47
-rw-r--r--expat/lib/expat.h21
-rw-r--r--expat/lib/internal.h42
-rw-r--r--expat/lib/libexpat.def3
-rw-r--r--expat/lib/libexpatw.def3
-rw-r--r--expat/lib/xmlparse.c1150
-rw-r--r--expat/tests/runtests.c386
-rw-r--r--expat/xmlwf/xmltchar.h6
-rw-r--r--expat/xmlwf/xmlwf.c71
-rwxr-xr-xexpat/xmlwf/xmlwf_helpgen.py8
16 files changed, 1817 insertions, 62 deletions
diff --git a/.github/workflows/data/exported-symbols.txt b/.github/workflows/data/exported-symbols.txt
index 7d5983a2..8fa1cef2 100644
--- a/.github/workflows/data/exported-symbols.txt
+++ b/.github/workflows/data/exported-symbols.txt
@@ -29,6 +29,8 @@ XML_ParserReset
XML_ResumeParser
XML_SetAttlistDeclHandler
XML_SetBase
+XML_SetBillionLaughsAttackProtectionActivationThreshold
+XML_SetBillionLaughsAttackProtectionMaximumAmplification
XML_SetCdataSectionHandler
XML_SetCharacterDataHandler
XML_SetCommentHandler
diff --git a/.github/workflows/scripts/mass-cppcheck.sh b/.github/workflows/scripts/mass-cppcheck.sh
index 84ba45f9..582f8b36 100755
--- a/.github/workflows/scripts/mass-cppcheck.sh
+++ b/.github/workflows/scripts/mass-cppcheck.sh
@@ -44,6 +44,7 @@ cppcheck_args=(
--error-exitcode=1
--force
--suppress=objectIndex
+ --suppress=unknownMacro
)
find_args=(
diff --git a/expat/CMakeLists.txt b/expat/CMakeLists.txt
index 951df0ed..96ac5daa 100644
--- a/expat/CMakeLists.txt
+++ b/expat/CMakeLists.txt
@@ -248,6 +248,10 @@ if(FLAG_VISIBILITY)
add_definitions(-DXML_ENABLE_VISIBILITY=1)
set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -fvisibility=hidden")
endif()
+if(MINGW)
+ # Without __USE_MINGW_ANSI_STDIO the compiler produces a false positive
+ set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -Wno-pedantic-ms-format")
+endif()
if (EXPAT_WARNINGS_AS_ERRORS)
if(MSVC)
add_definitions(/WX)
diff --git a/expat/Changes b/expat/Changes
index 69c39f37..770d3079 100644
--- a/expat/Changes
+++ b/expat/Changes
@@ -3,11 +3,40 @@ NOTE: We are looking for help with a few things:
If you can help, please get in touch. Thanks!
Release X.X.X XXX XXXXX XX XXXX
+ Security fixes:
+ #34 #466 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks
+ (denial-of-service; flavors targeting CPU time or RAM or both,
+ leveraging general entities or parameter entities or both)
+ by tracking and limiting the input amplification factor
+ (<amplification> := (<direct> + <indirect>) / <direct>).
+ By conservative default, amplification up to a factor of 100.0
+ is tolerated and rejection only starts after 8 MiB of output bytes
+ (=<direct> + <indirect>) have been processed.
+ A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH signals
+ this condition.
+
Bug fixes:
#332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake)
or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault
for UTF-16 payloads containing CDATA sections.
+ New features:
+ #34 #466 Add two new API functions to further tighten billion laughs
+ protection parameters when desired.
+ - XML_SetBillionLaughsAttackProtectionMaximumAmplification
+ - XML_SetBillionLaughsAttackProtectionActivationThreshold
+ Please see file "doc/reference.html" for more details.
+ If you ever need to increase the defaults for non-attack XML
+ payload, please file a bug report with libexpat.
+ #34 #466 Introduce environment switches EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)
+ and EXPAT_ENTITY_DEBUG=(0|1) for runtime debugging of accounting
+ and entity processing; specific behavior of these values may
+ change in the future.
+ #34 #466 xmlwf: Add arguments "-a FACTOR" and "-b BYTES" to further tighten
+ billion laughs protection parameters when desired.
+ If you ever need to increase the defaults for non-attack XML
+ payload, please file a bug report with libexpat.
+
Other changes:
#457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters
#458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR
@@ -16,6 +45,11 @@ Release X.X.X XXX XXXXX XX XXXX
Special thanks to:
Dimitry Andric
+ Nick Wellnhofer
+ Yury Gribov
+ and
+ Clang LeakSan
+ JetBrains
Release 2.3.0 Thu March 25 2021
Bug fixes:
diff --git a/expat/configure.ac b/expat/configure.ac
index f26ce6cc..7d60a2c2 100644
--- a/expat/configure.ac
+++ b/expat/configure.ac
@@ -111,7 +111,7 @@ AS_IF([test "$GCC" = yes],
AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS])
AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS])
AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS])
- AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wmisleading-indentation], [AM_CFLAGS])])
+ AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])])
AC_LANG_PUSH([C++])
AC_PROG_CXX
diff --git a/expat/doc/reference.html b/expat/doc/reference.html
index 82868d5d..8e77a10c 100644
--- a/expat/doc/reference.html
+++ b/expat/doc/reference.html
@@ -148,6 +148,13 @@ interface.</p>
<li><a href="#XML_GetInputContext">XML_GetInputContext</a></li>
</ul>
</li>
+ <li>
+ <a href="#billion-laughs">Billion Laughs Attack Protection</a>
+ <ul>
+ <li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
+ <li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
+ </ul>
+ </li>
<li><a href="#miscellaneous">Miscellaneous Functions</a>
<ul>
<li><a href="#XML_SetUserData">XML_SetUserData</a></li>
@@ -2073,6 +2080,98 @@ parse position may be before the beginning of the buffer.</p>
return NULL.</p>
</div>
+<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3>
+
+<p>The functions in this section configure the built-in
+ protection against various forms of
+ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p>
+
+<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4>
+<pre class="fcndec">
+/* Added in Expat 2.4.0. */
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p,
+ float maximumAmplificationFactor);
+</pre>
+<div class="fcndef">
+ <p>
+ Sets the maximum tolerated amplification factor
+ for protection against
+ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>
+ (default: <code>100.0</code>)
+ of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and
+ returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error.
+ </p>
+
+ The amplification factor is calculated as ..
+ <pre>
+ amplification := (direct + indirect) / direct
+ </pre>
+ .. while parsing, whereas
+ <code>direct</code> is the number of bytes read from the primary document in parsing and
+ <code>indirect</code> is the number of bytes added by expanding entities and reading of external DTD files, combined.
+
+ <p>For a call to <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to succeed:</p>
+ <ul>
+ <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li>
+ <li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li>
+ </ul>
+
+ <p>
+ <strong>Note:</strong>
+ If you ever need to increase this value for non-attack payload,
+ please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>.
+ </p>
+
+ <p>
+ <strong>Note:</strong>
+ Peak amplifications
+ of factor 15,000 for the entire payload and
+ of factor 30,000 in the middle of parsing
+ have been observed with small benign files in practice.
+
+ So if you do reduce the maximum allowed amplification,
+ please make sure that the activation threshold is still big enough
+ to not end up with undesired false positives (i.e. benign files being rejected).
+ </p>
+</div>
+
+<h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</h4>
+<pre class="fcndec">
+/* Added in Expat 2.4.0. */
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
+ unsigned long long activationThresholdBytes);
+</pre>
+<div class="fcndef">
+ <p>
+ Sets number of output bytes (including amplification from entity expansion and reading DTD files)
+ needed to activate protection against
+ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>
+ (default: <code>8 MiB</code>)
+ of parser <code>p</code> to <code>activationThresholdBytes</code>, and
+ returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error.
+ </p>
+
+ <p>For a call to <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to succeed:</p>
+ <ul>
+ <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li>
+ </ul>
+
+ <p>
+ <strong>Note:</strong>
+ If you ever need to increase this value for non-attack payload,
+ please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>.
+ </p>
+
+ <p>
+ <strong>Note:</strong>
+ Activation thresholds below 4 MiB are known to break support for
+ <a href="https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> 1.3 payload
+ and are hence not recommended.
+ </p>
+</div>
+
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
<p>The functions in this section either obtain state information from
diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml
index 1105bff4..c79041bc 100644
--- a/expat/doc/xmlwf.xml
+++ b/expat/doc/xmlwf.xml
@@ -4,7 +4,7 @@
<!ENTITY dhfirstname "<firstname>Scott</firstname>">
<!ENTITY dhsurname "<surname>Bronson</surname>">
<!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>April 25, 2021</date>">
+ <!ENTITY dhdate "<date>May 4, 2021</date>">
<!-- SECTION should be 1-8, maybe w/ subsection other parameters are
allowed: see man(7), man(1). -->
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
@@ -132,6 +132,50 @@ supports both.
<variablelist>
<varlistentry>
+ <term><option>-a</option> <replaceable>factor</replaceable></term>
+ <listitem>
+ <para>
+ Sets the maximum tolerated amplification factor
+ for protection against billion laughs attacks (default: 100.0).
+ The amplification factor is calculated as ..
+ </para>
+ <literallayout>
+ amplification := (direct + indirect) / direct
+ </literallayout>
+ <para>
+ .. while parsing, whereas
+ &lt;direct&gt; is the number of bytes read
+ from the primary document in parsing and
+ &lt;indirect&gt; is the number of bytes
+ added by expanding entities and reading of external DTD files,
+ combined.
+ </para>
+ <para>
+ <emphasis>NOTE</emphasis>:
+ If you ever need to increase this value for non-attack payload,
+ please file a bug report.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-b</option> <replaceable>bytes</replaceable></term>
+ <listitem>
+ <para>
+ Sets the number of output bytes (including amplification)
+ needed to activate protection against billion laughs attacks
+ (default: 8 MiB).
+ This can be thought of as an &quot;activation threshold&quot;.
+ </para>
+ <para>
+ <emphasis>NOTE</emphasis>:
+ If you ever need to increase this value for non-attack payload,
+ please file a bug report.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-c</option></term>
<listitem>
<para>
@@ -458,6 +502,7 @@ supports both.
<literallayout>
The Expat home page: https://libexpat.github.io/
The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/
+Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack
</literallayout>
</para>
diff --git a/expat/lib/expat.h b/expat/lib/expat.h
index 13f540a1..30a064ad 100644
--- a/expat/lib/expat.h
+++ b/expat/lib/expat.h
@@ -124,7 +124,9 @@ enum XML_Error {
/* Added in 2.2.1. */
XML_ERROR_INVALID_ARGUMENT,
/* Added in 2.3.0. */
- XML_ERROR_NO_BUFFER
+ XML_ERROR_NO_BUFFER,
+ /* Added in 2.4.0. */
+ XML_ERROR_AMPLIFICATION_LIMIT_BREACH
};
enum XML_Content_Type {
@@ -1006,7 +1008,10 @@ enum XML_FeatureEnum {
XML_FEATURE_SIZEOF_XML_LCHAR,
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
- XML_FEATURE_ATTR_INFO
+ XML_FEATURE_ATTR_INFO,
+ /* Added in Expat 2.4.0. */
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
/* Additional features must be added to the end of this enum. */
};
@@ -1019,6 +1024,18 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+#ifdef XML_DTD
+/* Added in Expat 2.4.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor);
+
+/* Added in Expat 2.4.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes);
+#endif
+
/* Expat follows the semantic versioning convention.
See http://semver.org.
*/
diff --git a/expat/lib/internal.h b/expat/lib/internal.h
index 66417ccf..444eba0f 100644
--- a/expat/lib/internal.h
+++ b/expat/lib/internal.h
@@ -105,10 +105,46 @@
# endif
#endif
+#include <limits.h> // ULONG_MAX
+
+#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO)
+# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u"
+# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#else
+# define EXPAT_FMT_ULL(midpart) "%" midpart "llu"
+# if ! defined(ULONG_MAX)
+# error Compiler did not define ULONG_MAX for us
+# elif ULONG_MAX == 18446744073709551615u // 2^64-1
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#endif
+
#ifndef UNUSED_P
# define UNUSED_P(p) (void)p
#endif
+/* NOTE BEGIN If you ever patch these defaults to greater values
+ for non-attack XML payload in your environment,
+ please file a bug report with libexpat. Thank you!
+*/
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \
+ 100.0f
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \
+ 8388608 // 8 MiB, 2^23
+/* NOTE END */
+
+#include "expat.h" // so we can use type XML_Parser below
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -116,6 +152,12 @@ extern "C" {
void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
+#if defined(XML_DTD)
+unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
+unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
+const char *unsignedCharToPrintable(unsigned char c);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/expat/lib/libexpat.def b/expat/lib/libexpat.def
index 5360c83d..5aebbd07 100644
--- a/expat/lib/libexpat.def
+++ b/expat/lib/libexpat.def
@@ -75,3 +75,6 @@ EXPORTS
; XML_GetAttributeInfo @66
XML_SetHashSalt @67
; internal @68 removed with version 2.3.1
+; added with version 2.4.0
+ XML_SetBillionLaughsAttackProtectionActivationThreshold @69
+ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
diff --git a/expat/lib/libexpatw.def b/expat/lib/libexpatw.def
index 5360c83d..5aebbd07 100644
--- a/expat/lib/libexpatw.def
+++ b/expat/lib/libexpatw.def
@@ -75,3 +75,6 @@ EXPORTS
; XML_GetAttributeInfo @66
XML_SetHashSalt @67
; internal @68 removed with version 2.3.1
+; added with version 2.4.0
+ XML_SetBillionLaughsAttackProtectionActivationThreshold @69
+ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
index 97a4a4c1..a1aadd86 100644
--- a/expat/lib/xmlparse.c
+++ b/expat/lib/xmlparse.c
@@ -72,6 +72,7 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* getenv, rand_s */
#include <stdint.h> /* uintptr_t */
+#include <math.h> /* isnan */
#ifdef _WIN32
# define getpid GetCurrentProcessId
@@ -396,6 +397,31 @@ typedef struct open_internal_entity {
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
} OPEN_INTERNAL_ENTITY;
+enum XML_Account {
+ XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
+ XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
+ expansion */
+ XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
+};
+
+#ifdef XML_DTD
+typedef unsigned long long XmlBigCount;
+typedef struct accounting {
+ XmlBigCount countBytesDirect;
+ XmlBigCount countBytesIndirect;
+ int debugLevel;
+ float maximumAmplificationFactor; // >=1.0
+ unsigned long long activationThresholdBytes;
+} ACCOUNTING;
+
+typedef struct entity_stats {
+ unsigned int countEverOpened;
+ unsigned int currentDepth;
+ unsigned int maximumDepthSeen;
+ int debugLevel;
+} ENTITY_STATS;
+#endif /* XML_DTD */
+
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
const char *end, const char **endPtr);
@@ -426,13 +452,14 @@ static enum XML_Error initializeEncoding(XML_Parser parser);
static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end, int tok,
const char *next, const char **nextPtr,
- XML_Bool haveMore, XML_Bool allowClosingDoctype);
+ XML_Bool haveMore, XML_Bool allowClosingDoctype,
+ enum XML_Account account);
static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
XML_Bool betweenDecl);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
- XML_Bool haveMore);
+ XML_Bool haveMore, enum XML_Account account);
static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
const char **startPtr, const char *end,
const char **nextPtr, XML_Bool haveMore);
@@ -445,7 +472,8 @@ static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
static void freeBindings(XML_Parser parser, BINDING *bindings);
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
const char *s, TAG_NAME *tagNamePtr,
- BINDING **bindingsPtr);
+ BINDING **bindingsPtr,
+ enum XML_Account account);
static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
const ATTRIBUTE_ID *attId, const XML_Char *uri,
BINDING **bindingsPtr);
@@ -454,15 +482,18 @@ static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
XML_Parser parser);
static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
+ const char *, STRING_POOL *,
+ enum XML_Account account);
static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
+ const char *, STRING_POOL *,
+ enum XML_Account account);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *start, const char *end);
+ const char *start, const char *end,
+ enum XML_Account account);
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int reportComment(XML_Parser parser, const ENCODING *enc,
@@ -526,6 +557,34 @@ static XML_Parser parserCreate(const XML_Char *encodingName,
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
+#ifdef XML_DTD
+static float accountingGetCurrentAmplification(XML_Parser rootParser);
+static void accountingReportStats(XML_Parser originParser, const char *epilog);
+static void accountingOnAbort(XML_Parser originParser);
+static void accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser,
+ const char *before, const char *after,
+ ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account);
+static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
+ const char *before, const char *after,
+ int source_line,
+ enum XML_Account account);
+
+static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
+ const char *action, int sourceLine);
+static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+
+static XML_Parser getRootParserOf(XML_Parser parser,
+ unsigned int *outLevelDiff);
+#endif /* XML_DTD */
+
+static unsigned long getDebugLevel(const char *variableName,
+ unsigned long defaultDebugLevel);
+
#define poolStart(pool) ((pool)->start)
#define poolEnd(pool) ((pool)->ptr)
#define poolLength(pool) ((pool)->ptr - (pool)->start)
@@ -639,6 +698,10 @@ struct XML_ParserStruct {
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+#ifdef XML_DTD
+ ACCOUNTING m_accounting;
+ ENTITY_STATS m_entity_stats;
+#endif
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -823,9 +886,8 @@ gather_time_entropy(void) {
static unsigned long
ENTROPY_DEBUG(const char *label, unsigned long entropy) {
- const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
- if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
- fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
+ if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
+ fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
(int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
}
return entropy;
@@ -1087,6 +1149,18 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
#endif
parser->m_hash_secret_salt = 0;
+
+#ifdef XML_DTD
+ memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
+ parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
+ parser->m_accounting.maximumAmplificationFactor
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
+ parser->m_accounting.activationThresholdBytes
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
+
+ memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
+ parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
+#endif
}
/* moves list of bindings to m_freeBindingList */
@@ -2361,6 +2435,10 @@ XML_ErrorString(enum XML_Error code) {
case XML_ERROR_NO_BUFFER:
return XML_L(
"a successful prior call to function XML_GetBuffer is required");
+ /* Added in 2.4.0. */
+ case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
+ return XML_L(
+ "limit on input amplification factor (from DTD and entities) breached");
}
return NULL;
}
@@ -2397,41 +2475,75 @@ XML_ExpatVersionInfo(void) {
const XML_Feature *XMLCALL
XML_GetFeatureList(void) {
- static const XML_Feature features[]
- = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
- sizeof(XML_Char)},
- {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
- sizeof(XML_LChar)},
+ static const XML_Feature features[] = {
+ {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
+ sizeof(XML_Char)},
+ {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
+ sizeof(XML_LChar)},
#ifdef XML_UNICODE
- {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
+ {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
#endif
#ifdef XML_UNICODE_WCHAR_T
- {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
+ {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
#endif
#ifdef XML_DTD
- {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
+ {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
#endif
#ifdef XML_CONTEXT_BYTES
- {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
- XML_CONTEXT_BYTES},
+ {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
+ XML_CONTEXT_BYTES},
#endif
#ifdef XML_MIN_SIZE
- {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
+ {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
#endif
#ifdef XML_NS
- {XML_FEATURE_NS, XML_L("XML_NS"), 0},
+ {XML_FEATURE_NS, XML_L("XML_NS"), 0},
#endif
#ifdef XML_LARGE_SIZE
- {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
+ {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
#endif
#ifdef XML_ATTR_INFO
- {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
+ {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
+#endif
+#ifdef XML_DTD
+ /* Added in Expat 2.4.0. */
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_L("XML_BLAP_MAX_AMP"),
+ (long int)
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
+ XML_L("XML_BLAP_ACT_THRES"),
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
#endif
- {XML_FEATURE_END, NULL, 0}};
+ {XML_FEATURE_END, NULL, 0}};
return features;
}
+#ifdef XML_DTD
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)
+ || isnan(maximumAmplificationFactor)
+ || (maximumAmplificationFactor < 1.0f)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
+ return XML_TRUE;
+}
+
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
+ return XML_TRUE;
+}
+#endif /* XML_DTD */
+
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
processed, and not yet closed, we need to store tag->rawName in a more
@@ -2484,9 +2596,9 @@ storeRawNames(XML_Parser parser) {
static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
- enum XML_Error result
- = doContent(parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ enum XML_Error result = doContent(
+ parser, 0, parser->m_encoding, start, end, endPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2511,6 +2623,14 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start,
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
switch (tok) {
case XML_TOK_BOM:
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif /* XML_DTD */
+
/* If we are at the end of the buffer, this would cause the next stage,
i.e. externalEntityInitProcessor3, to pass control directly to
doContent (by detecting XML_TOK_NONE) without processing any xml text
@@ -2548,6 +2668,10 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start,
const char *next = start; /* XmlContentTok doesn't always set the last arg */
parser->m_eventPtr = start;
tok = XmlContentTok(parser->m_encoding, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - processXmlDecl
+ - externalEntityContentProcessor
+ */
parser->m_eventEndPtr = next;
switch (tok) {
@@ -2589,7 +2713,8 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
const char *end, const char **endPtr) {
enum XML_Error result
= doContent(parser, 1, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_ENTITY_EXPANSION);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2600,7 +2725,7 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
static enum XML_Error
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
const char *s, const char *end, const char **nextPtr,
- XML_Bool haveMore) {
+ XML_Bool haveMore, enum XML_Account account) {
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -2618,6 +2743,17 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
for (;;) {
const char *next = s; /* XmlContentTok doesn't always set the last arg */
int tok = XmlContentTok(enc, s, end, &next);
+#ifdef XML_DTD
+ const char *accountAfter
+ = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
+ ? (haveMore ? s /* i.e. 0 bytes */ : end)
+ : next;
+ if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_TRAILING_CR:
@@ -2673,6 +2809,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#ifdef XML_DTD
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_DTD */
if (parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
else if (parser->m_defaultHandler)
@@ -2791,7 +2935,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
}
tag->name.str = (XML_Char *)tag->buf;
*toPtr = XML_T('\0');
- result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
+ result
+ = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
if (result)
return result;
if (parser->m_startElementHandler)
@@ -2815,7 +2960,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (! name.str)
return XML_ERROR_NO_MEMORY;
poolFinish(&parser->m_tempPool);
- result = storeAtts(parser, enc, s, &name, &bindings);
+ result = storeAtts(parser, enc, s, &name, &bindings,
+ XML_ACCOUNT_NONE /* token spans whole start tag */);
if (result != XML_ERROR_NONE) {
freeBindings(parser, bindings);
return result;
@@ -3079,7 +3225,8 @@ freeBindings(XML_Parser parser, BINDING *bindings) {
*/
static enum XML_Error
storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
- TAG_NAME *tagNamePtr, BINDING **bindingsPtr) {
+ TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
ELEMENT_TYPE *elementType;
int nDefaultAtts;
@@ -3189,7 +3336,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
/* normalize the attribute value */
result = storeAttributeValue(
parser, enc, isCdata, parser->m_atts[i].valuePtr,
- parser->m_atts[i].valueEnd, &parser->m_tempPool);
+ parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
if (result)
return result;
appAtts[attIndex] = poolStart(&parser->m_tempPool);
@@ -3618,6 +3765,13 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
for (;;) {
const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok = XmlCdataSectionTok(enc, s, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_CDATA_SECT_CLOSE:
@@ -3762,6 +3916,13 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
*startPtr = NULL;
tok = XmlIgnoreSectionTok(enc, s, end, &next);
+# ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_IGNORE_SECT:
@@ -3846,6 +4007,15 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
const char *versionend;
const XML_Char *storedversion = NULL;
int standalone = -1;
+
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
+
if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
&version, &versionend, &encodingName, &newEncoding, &standalone)) {
@@ -3995,6 +4165,10 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(parser->m_encoding, start, end, &next);
+ /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
+ - storeEntityValue
+ - processXmlDecl
+ */
parser->m_eventEndPtr = next;
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
@@ -4013,7 +4187,8 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, parser->m_encoding, s, end);
+ return storeEntityValue(parser, parser->m_encoding, s, end,
+ XML_ACCOUNT_DIRECT);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -4040,6 +4215,14 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
*/
else if (tok == XML_TOK_BOM && next == end
&& ! parser->m_parsingStatus.finalBuffer) {
+# ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
+
*nextPtr = next;
return XML_ERROR_NONE;
}
@@ -4082,16 +4265,24 @@ externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
}
/* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
However, when parsing an external subset, doProlog will not accept a BOM
- as valid, and report a syntax error, so we have to skip the BOM
+ as valid, and report a syntax error, so we have to skip the BOM, and
+ account for the BOM bytes.
*/
else if (tok == XML_TOK_BOM) {
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+
s = next;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
}
parser->m_processor = prologProcessor;
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error PTRCALL
@@ -4104,6 +4295,9 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(enc, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - storeEntityValue
+ */
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
*nextPtr = s;
@@ -4121,7 +4315,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
}
start = next;
}
@@ -4135,13 +4329,14 @@ prologProcessor(XML_Parser parser, const char *s, const char *end,
const char *next = s;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error
doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
- XML_Bool allowClosingDoctype) {
+ XML_Bool allowClosingDoctype, enum XML_Account account) {
#ifdef XML_DTD
static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
#endif /* XML_DTD */
@@ -4168,6 +4363,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -4232,6 +4431,19 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
}
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
+#ifdef XML_DTD
+ switch (role) {
+ case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
+ case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
+ case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
+ break;
+ default:
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+ }
+#endif
switch (role) {
case XML_ROLE_XML_DECL: {
enum XML_Error result = processXmlDecl(parser, 0, s, next);
@@ -4507,7 +4719,8 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
const XML_Char *attVal;
enum XML_Error result = storeAttributeValue(
parser, enc, parser->m_declAttributeIsCdata,
- s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool);
+ s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
+ XML_ACCOUNT_NONE);
if (result)
return result;
attVal = poolStart(&dtd->pool);
@@ -4540,8 +4753,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
break;
case XML_ROLE_ENTITY_VALUE:
if (dtd->keepProcessing) {
- enum XML_Error result = storeEntityValue(
- parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
+ enum XML_Error result
+ = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
+ next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -4931,12 +5145,15 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
handleDefault = XML_FALSE;
if (! dtd->paramEntityRead) {
@@ -5134,6 +5351,13 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
const char *next = NULL;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
parser->m_eventEndPtr = next;
switch (tok) {
/* report partial linebreak - it might be the last token */
@@ -5207,6 +5431,9 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+#ifdef XML_DTD
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
entity->processed = 0;
openEntity->next = parser->m_openInternalEntities;
parser->m_openInternalEntities = openEntity;
@@ -5225,17 +5452,22 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE);
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
} else
#endif /* XML_DTD */
result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE);
+ textStart, textEnd, &next, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
if (result == XML_ERROR_NONE) {
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
entity->processed = (int)(next - textStart);
parser->m_processor = internalEntityProcessor;
} else {
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif /* XML_DTD */
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5268,12 +5500,13 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE);
+ tok, next, &next, XML_FALSE, XML_TRUE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
} else
#endif /* XML_DTD */
result = doContent(parser, openEntity->startTagLevel,
parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE);
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
if (result != XML_ERROR_NONE)
return result;
@@ -5282,6 +5515,9 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
entity->processed = (int)(next - (const char *)entity->textPtr);
return result;
} else {
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5295,7 +5531,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
parser->m_processor = prologProcessor;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
} else
#endif /* XML_DTD */
{
@@ -5303,7 +5540,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
/* see externalEntityContentProcessor vs contentProcessor */
return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
}
}
@@ -5318,9 +5556,10 @@ errorProcessor(XML_Parser parser, const char *s, const char *end,
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
+ = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5332,11 +5571,23 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
for (;;) {
- const char *next;
+ const char *next
+ = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
switch (tok) {
case XML_TOK_NONE:
return XML_ERROR_NONE;
@@ -5396,6 +5647,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#ifdef XML_DTD
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_DTD */
if (! poolAppendChar(pool, ch))
return XML_ERROR_NO_MEMORY;
break;
@@ -5473,9 +5732,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
enum XML_Error result;
const XML_Char *textEnd = entity->textPtr + entity->textLen;
entity->open = XML_TRUE;
+#ifdef XML_DTD
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
result = appendAttributeValue(parser, parser->m_internalEncoding,
isCdata, (const char *)entity->textPtr,
- (const char *)textEnd, pool);
+ (const char *)textEnd, pool,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
entity->open = XML_FALSE;
if (result)
return result;
@@ -5505,13 +5771,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *entityTextPtr, const char *entityTextEnd) {
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
#ifdef XML_DTD
int oldInEntityValue = parser->m_prologState.inEntityValue;
parser->m_prologState.inEntityValue = 1;
+#else
+ UNUSED_P(account);
#endif /* XML_DTD */
/* never return Null for the value argument in EntityDeclHandler,
since this would indicate an external entity; therefore we
@@ -5522,8 +5791,19 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
}
for (;;) {
- const char *next;
+ const char *next
+ = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
+
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ goto endEntityValue;
+ }
+#endif
+
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
#ifdef XML_DTD
@@ -5559,13 +5839,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
goto endEntityValue;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (! dtd->paramEntityRead)
dtd->keepProcessing = dtd->standalone;
@@ -5573,9 +5856,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
dtd->keepProcessing = dtd->standalone;
} else {
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
result = storeEntityValue(
parser, parser->m_internalEncoding, (const char *)entity->textPtr,
- (const char *)(entity->textPtr + entity->textLen));
+ (const char *)(entity->textPtr + entity->textLen),
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (result)
goto endEntityValue;
@@ -6936,3 +7222,755 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
memcpy(result, s, charsRequired * sizeof(XML_Char));
return result;
}
+
+#ifdef XML_DTD
+
+static float
+accountingGetCurrentAmplification(XML_Parser rootParser) {
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = rootParser->m_accounting.countBytesDirect
+ ? (countBytesOutput
+ / (float)(rootParser->m_accounting.countBytesDirect))
+ : 1.0f;
+ assert(! rootParser->m_parentParser);
+ return amplificationFactor;
+}
+
+static void
+accountingReportStats(XML_Parser originParser, const char *epilog) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ if (rootParser->m_accounting.debugLevel < 1) {
+ return;
+ }
+
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ fprintf(stderr,
+ "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
+ "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
+ (void *)rootParser, rootParser->m_accounting.countBytesDirect,
+ rootParser->m_accounting.countBytesIndirect,
+ (double)amplificationFactor, epilog);
+}
+
+static void
+accountingOnAbort(XML_Parser originParser) {
+ accountingReportStats(originParser, " ABORTING\n");
+}
+
+static void
+accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser, const char *before,
+ const char *after, ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account) {
+ assert(! rootParser->m_parentParser);
+
+ fprintf(stderr,
+ " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
+ bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
+ levelsAwayFromRootParser, source_line, 10, "");
+
+ const char ellipis[] = "[..]";
+ const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
+ const unsigned int contextLength = 10;
+
+ /* Note: Performance is of no concern here */
+ const char *walker = before;
+ if ((rootParser->m_accounting.debugLevel >= 3)
+ || (after - before)
+ <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ } else {
+ for (; walker < before + contextLength; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ fprintf(stderr, ellipis);
+ walker = after - contextLength;
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ }
+ fprintf(stderr, "\"\n");
+}
+
+static XML_Bool
+accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
+ const char *after, int source_line,
+ enum XML_Account account) {
+ /* Note: We need to check the token type *first* to be sure that
+ * we can even access variable <after>, safely.
+ * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
+ switch (tok) {
+ case XML_TOK_INVALID:
+ case XML_TOK_PARTIAL:
+ case XML_TOK_PARTIAL_CHAR:
+ case XML_TOK_NONE:
+ return XML_TRUE;
+ }
+
+ if (account == XML_ACCOUNT_NONE)
+ return XML_TRUE; /* because these bytes have been accounted for, already */
+
+ unsigned int levelsAwayFromRootParser;
+ const XML_Parser rootParser
+ = getRootParserOf(originParser, &levelsAwayFromRootParser);
+ assert(! rootParser->m_parentParser);
+
+ const int isDirect
+ = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
+ const ptrdiff_t bytesMore = after - before;
+
+ XmlBigCount *const additionTarget
+ = isDirect ? &rootParser->m_accounting.countBytesDirect
+ : &rootParser->m_accounting.countBytesIndirect;
+
+ /* Detect and avoid integer overflow */
+ if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
+ return XML_FALSE;
+ *additionTarget += bytesMore;
+
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ const XML_Bool tolerated
+ = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
+ || (amplificationFactor
+ <= rootParser->m_accounting.maximumAmplificationFactor);
+
+ if (rootParser->m_accounting.debugLevel >= 2) {
+ accountingReportStats(rootParser, "");
+ accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
+ bytesMore, source_line, account);
+ }
+
+ return tolerated;
+}
+
+unsigned long long
+testingAccountingGetCountBytesDirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesDirect;
+}
+
+unsigned long long
+testingAccountingGetCountBytesIndirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesIndirect;
+}
+
+static void
+entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
+ const char *action, int sourceLine) {
+ assert(! rootParser->m_parentParser);
+ if (rootParser->m_entity_stats.debugLevel < 1)
+ return;
+
+# if defined(XML_UNICODE)
+ const char *const entityName = "[..]";
+# else
+ const char *const entityName = entity->name;
+# endif
+
+ fprintf(
+ stderr,
+ "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
+ (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
+ rootParser->m_entity_stats.currentDepth,
+ rootParser->m_entity_stats.maximumDepthSeen,
+ (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
+ entity->is_param ? "%" : "&", entityName, action, entity->textLen,
+ sourceLine);
+}
+
+static void
+entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ rootParser->m_entity_stats.countEverOpened++;
+ rootParser->m_entity_stats.currentDepth++;
+ if (rootParser->m_entity_stats.currentDepth
+ > rootParser->m_entity_stats.maximumDepthSeen) {
+ rootParser->m_entity_stats.maximumDepthSeen++;
+ }
+
+ entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
+}
+
+static void
+entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
+ rootParser->m_entity_stats.currentDepth--;
+}
+
+static XML_Parser
+getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
+ XML_Parser rootParser = parser;
+ unsigned int stepsTakenUpwards = 0;
+ while (rootParser->m_parentParser) {
+ rootParser = rootParser->m_parentParser;
+ stepsTakenUpwards++;
+ }
+ assert(! rootParser->m_parentParser);
+ if (outLevelDiff != NULL) {
+ *outLevelDiff = stepsTakenUpwards;
+ }
+ return rootParser;
+}
+
+const char *
+unsignedCharToPrintable(unsigned char c) {
+ switch (c) {
+ case 0:
+ return "\\0";
+ case 1:
+ return "\\x1";
+ case 2:
+ return "\\x2";
+ case 3:
+ return "\\x3";
+ case 4:
+ return "\\x4";
+ case 5:
+ return "\\x5";
+ case 6:
+ return "\\x6";
+ case 7:
+ return "\\x7";
+ case 8:
+ return "\\x8";
+ case 9:
+ return "\\t";
+ case 10:
+ return "\\n";
+ case 11:
+ return "\\xB";
+ case 12:
+ return "\\xC";
+ case 13:
+ return "\\r";
+ case 14:
+ return "\\xE";
+ case 15:
+ return "\\xF";
+ case 16:
+ return "\\x10";
+ case 17:
+ return "\\x11";
+ case 18:
+ return "\\x12";
+ case 19:
+ return "\\x13";
+ case 20:
+ return "\\x14";
+ case 21:
+ return "\\x15";
+ case 22:
+ return "\\x16";
+ case 23:
+ return "\\x17";
+ case 24:
+ return "\\x18";
+ case 25:
+ return "\\x19";
+ case 26:
+ return "\\x1A";
+ case 27:
+ return "\\x1B";
+ case 28:
+ return "\\x1C";
+ case 29:
+ return "\\x1D";
+ case 30:
+ return "\\x1E";
+ case 31:
+ return "\\x1F";
+ case 32:
+ return " ";
+ case 33:
+ return "!";
+ case 34:
+ return "\\\"";
+ case 35:
+ return "#";
+ case 36:
+ return "$";
+ case 37:
+ return "%";
+ case 38:
+ return "&";
+ case 39:
+ return "'";
+ case 40:
+ return "(";
+ case 41:
+ return ")";
+ case 42:
+ return "*";
+ case 43:
+ return "+";
+ case 44:
+ return ",";
+ case 45:
+ return "-";
+ case 46:
+ return ".";
+ case 47:
+ return "/";
+ case 48:
+ return "0";
+ case 49:
+ return "1";
+ case 50:
+ return "2";
+ case 51:
+ return "3";
+ case 52:
+ return "4";
+ case 53:
+ return "5";
+ case 54:
+ return "6";
+ case 55:
+ return "7";
+ case 56:
+ return "8";
+ case 57:
+ return "9";
+ case 58:
+ return ":";
+ case 59:
+ return ";";
+ case 60:
+ return "<";
+ case 61:
+ return "=";
+ case 62:
+ return ">";
+ case 63:
+ return "?";
+ case 64:
+ return "@";
+ case 65:
+ return "A";
+ case 66:
+ return "B";
+ case 67:
+ return "C";
+ case 68:
+ return "D";
+ case 69:
+ return "E";
+ case 70:
+ return "F";
+ case 71:
+ return "G";
+ case 72:
+ return "H";
+ case 73:
+ return "I";
+ case 74:
+ return "J";
+ case 75:
+ return "K";
+ case 76:
+ return "L";
+ case 77:
+ return "M";
+ case 78:
+ return "N";
+ case 79:
+ return "O";
+ case 80:
+ return "P";
+ case 81:
+ return "Q";
+ case 82:
+ return "R";
+ case 83:
+ return "S";
+ case 84:
+ return "T";
+ case 85:
+ return "U";
+ case 86:
+ return "V";
+ case 87:
+ return "W";
+ case 88:
+ return "X";
+ case 89:
+ return "Y";
+ case 90:
+ return "Z";
+ case 91:
+ return "[";
+ case 92:
+ return "\\\\";
+ case 93:
+ return "]";
+ case 94:
+ return "^";
+ case 95:
+ return "_";
+ case 96:
+ return "`";
+ case 97:
+ return "a";
+ case 98:
+ return "b";
+ case 99:
+ return "c";
+ case 100:
+ return "d";
+ case 101:
+ return "e";
+ case 102:
+ return "f";
+ case 103:
+ return "g";
+ case 104:
+ return "h";
+ case 105:
+ return "i";
+ case 106:
+ return "j";
+ case 107:
+ return "k";
+ case 108:
+ return "l";
+ case 109:
+ return "m";
+ case 110:
+ return "n";
+ case 111:
+ return "o";
+ case 112:
+ return "p";
+ case 113:
+ return "q";
+ case 114:
+ return "r";
+ case 115:
+ return "s";
+ case 116:
+ return "t";
+ case 117:
+ return "u";
+ case 118:
+ return "v";
+ case 119:
+ return "w";
+ case 120:
+ return "x";
+ case 121:
+ return "y";
+ case 122:
+ return "z";
+ case 123:
+ return "{";
+ case 124:
+ return "|";
+ case 125:
+ return "}";
+ case 126:
+ return "~";
+ case 127:
+ return "\\x7F";
+ case 128:
+ return "\\x80";
+ case 129:
+ return "\\x81";
+ case 130:
+ return "\\x82";
+ case 131:
+ return "\\x83";
+ case 132:
+ return "\\x84";
+ case 133:
+ return "\\x85";
+ case 134:
+ return "\\x86";
+ case 135:
+ return "\\x87";
+ case 136:
+ return "\\x88";
+ case 137:
+ return "\\x89";
+ case 138:
+ return "\\x8A";
+ case 139:
+ return "\\x8B";
+ case 140:
+ return "\\x8C";
+ case 141:
+ return "\\x8D";
+ case 142:
+ return "\\x8E";
+ case 143:
+ return "\\x8F";
+ case 144:
+ return "\\x90";
+ case 145:
+ return "\\x91";
+ case 146:
+ return "\\x92";
+ case 147:
+ return "\\x93";
+ case 148:
+ return "\\x94";
+ case 149:
+ return "\\x95";
+ case 150:
+ return "\\x96";
+ case 151:
+ return "\\x97";
+ case 152:
+ return "\\x98";
+ case 153:
+ return "\\x99";
+ case 154:
+ return "\\x9A";
+ case 155:
+ return "\\x9B";
+ case 156:
+ return "\\x9C";
+ case 157:
+ return "\\x9D";
+ case 158:
+ return "\\x9E";
+ case 159:
+ return "\\x9F";
+ case 160:
+ return "\\xA0";
+ case 161:
+ return "\\xA1";
+ case 162:
+ return "\\xA2";
+ case 163:
+ return "\\xA3";
+ case 164:
+ return "\\xA4";
+ case 165:
+ return "\\xA5";
+ case 166:
+ return "\\xA6";
+ case 167:
+ return "\\xA7";
+ case 168:
+ return "\\xA8";
+ case 169:
+ return "\\xA9";
+ case 170:
+ return "\\xAA";
+ case 171:
+ return "\\xAB";
+ case 172:
+ return "\\xAC";
+ case 173:
+ return "\\xAD";
+ case 174:
+ return "\\xAE";
+ case 175:
+ return "\\xAF";
+ case 176:
+ return "\\xB0";
+ case 177:
+ return "\\xB1";
+ case 178:
+ return "\\xB2";
+ case 179:
+ return "\\xB3";
+ case 180:
+ return "\\xB4";
+ case 181:
+ return "\\xB5";
+ case 182:
+ return "\\xB6";
+ case 183:
+ return "\\xB7";
+ case 184:
+ return "\\xB8";
+ case 185:
+ return "\\xB9";
+ case 186:
+ return "\\xBA";
+ case 187:
+ return "\\xBB";
+ case 188:
+ return "\\xBC";
+ case 189:
+ return "\\xBD";
+ case 190:
+ return "\\xBE";
+ case 191:
+ return "\\xBF";
+ case 192:
+ return "\\xC0";
+ case 193:
+ return "\\xC1";
+ case 194:
+ return "\\xC2";
+ case 195:
+ return "\\xC3";
+ case 196:
+ return "\\xC4";
+ case 197:
+ return "\\xC5";
+ case 198:
+ return "\\xC6";
+ case 199:
+ return "\\xC7";
+ case 200:
+ return "\\xC8";
+ case 201:
+ return "\\xC9";
+ case 202:
+ return "\\xCA";
+ case 203:
+ return "\\xCB";
+ case 204:
+ return "\\xCC";
+ case 205:
+ return "\\xCD";
+ case 206:
+ return "\\xCE";
+ case 207:
+ return "\\xCF";
+ case 208:
+ return "\\xD0";
+ case 209:
+ return "\\xD1";
+ case 210:
+ return "\\xD2";
+ case 211:
+ return "\\xD3";
+ case 212:
+ return "\\xD4";
+ case 213:
+ return "\\xD5";
+ case 214:
+ return "\\xD6";
+ case 215:
+ return "\\xD7";
+ case 216:
+ return "\\xD8";
+ case 217:
+ return "\\xD9";
+ case 218:
+ return "\\xDA";
+ case 219:
+ return "\\xDB";
+ case 220:
+ return "\\xDC";
+ case 221:
+ return "\\xDD";
+ case 222:
+ return "\\xDE";
+ case 223:
+ return "\\xDF";
+ case 224:
+ return "\\xE0";
+ case 225:
+ return "\\xE1";
+ case 226:
+ return "\\xE2";
+ case 227:
+ return "\\xE3";
+ case 228:
+ return "\\xE4";
+ case 229:
+ return "\\xE5";
+ case 230:
+ return "\\xE6";
+ case 231:
+ return "\\xE7";
+ case 232:
+ return "\\xE8";
+ case 233:
+ return "\\xE9";
+ case 234:
+ return "\\xEA";
+ case 235:
+ return "\\xEB";
+ case 236:
+ return "\\xEC";
+ case 237:
+ return "\\xED";
+ case 238:
+ return "\\xEE";
+ case 239:
+ return "\\xEF";
+ case 240:
+ return "\\xF0";
+ case 241:
+ return "\\xF1";
+ case 242:
+ return "\\xF2";
+ case 243:
+ return "\\xF3";
+ case 244:
+ return "\\xF4";
+ case 245:
+ return "\\xF5";
+ case 246:
+ return "\\xF6";
+ case 247:
+ return "\\xF7";
+ case 248:
+ return "\\xF8";
+ case 249:
+ return "\\xF9";
+ case 250:
+ return "\\xFA";
+ case 251:
+ return "\\xFB";
+ case 252:
+ return "\\xFC";
+ case 253:
+ return "\\xFD";
+ case 254:
+ return "\\xFE";
+ case 255:
+ return "\\xFF";
+ default:
+ assert(0); /* never gets here */
+ return "dead code";
+ }
+ assert(0); /* never gets here */
+}
+
+#endif /* XML_DTD */
+
+static unsigned long
+getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
+ const char *const valueOrNull = getenv(variableName);
+ if (valueOrNull == NULL) {
+ return defaultDebugLevel;
+ }
+ const char *const value = valueOrNull;
+
+ errno = 0;
+ char *afterValue = (char *)value;
+ unsigned long debugLevel = strtoul(value, &afterValue, 10);
+ if ((errno != 0) || (afterValue[0] != '\0')) {
+ errno = 0;
+ return defaultDebugLevel;
+ }
+
+ return debugLevel;
+}
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c
index 40fdfb43..0e2b49fa 100644
--- a/expat/tests/runtests.c
+++ b/expat/tests/runtests.c
@@ -53,6 +53,7 @@
#include <ctype.h>
#include <limits.h>
#include <stdint.h> /* intptr_t uint64_t */
+#include <math.h> /* NAN, INFINITY, isnan */
#if ! defined(__cplusplus)
# include <stdbool.h>
@@ -61,7 +62,7 @@
#include "expat.h"
#include "chardata.h"
#include "structdata.h"
-#include "internal.h" /* for UNUSED_P only */
+#include "internal.h"
#include "minicheck.h"
#include "memcheck.h"
#include "siphash.h"
@@ -11225,6 +11226,379 @@ START_TEST(test_nsalloc_prefixed_element) {
}
END_TEST
+#if defined(XML_DTD)
+typedef enum XML_Status (*XmlParseFunction)(XML_Parser, const char *, int, int);
+
+struct AccountingTestCase {
+ const char *primaryText;
+ const char *firstExternalText; /* often NULL */
+ const char *secondExternalText; /* often NULL */
+ const unsigned long long expectedCountBytesIndirectExtra;
+ XML_Bool singleBytesWanted;
+};
+
+static int
+accounting_external_entity_ref_handler(XML_Parser parser,
+ const XML_Char *context,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId) {
+ UNUSED_P(context);
+ UNUSED_P(base);
+ UNUSED_P(publicId);
+
+ const struct AccountingTestCase *const testCase
+ = (const struct AccountingTestCase *)XML_GetUserData(parser);
+
+ const char *externalText = NULL;
+ if (xcstrcmp(systemId, XCS("first.ent")) == 0) {
+ externalText = testCase->firstExternalText;
+ } else if (xcstrcmp(systemId, XCS("second.ent")) == 0) {
+ externalText = testCase->secondExternalText;
+ } else {
+ assert(! "systemId is neither \"first.ent\" nor \"second.ent\"");
+ }
+ assert(externalText);
+
+ XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
+ assert(entParser);
+
+ const XmlParseFunction xmlParseFunction
+ = testCase->singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse;
+
+ const enum XML_Status status = xmlParseFunction(
+ entParser, externalText, (int)strlen(externalText), XML_TRUE);
+
+ XML_ParserFree(entParser);
+ return status;
+}
+
+START_TEST(test_accounting_precision) {
+ const XML_Bool filled_later = XML_TRUE; /* value is arbitrary */
+ struct AccountingTestCase cases[] = {
+ {"<e/>", NULL, NULL, 0, 0},
+ {"<e></e>", NULL, NULL, 0, 0},
+
+ /* Attributes */
+ {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0, filled_later},
+ {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0, 0},
+ {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0,
+ filled_later},
+ {"<e k=\"&amp;&apos;&gt;&lt;&quot;\" />", NULL, NULL,
+ sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later},
+ {"<e1 xmlns='https://example.org/'>\n"
+ " <e2 xmlns=''/>\n"
+ "</e1>",
+ NULL, NULL, 0, filled_later},
+
+ /* Text */
+ {"<e>text</e>", NULL, NULL, 0, filled_later},
+ {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0, filled_later},
+ {"<e>&amp;&apos;&gt;&lt;&quot;</e>", NULL, NULL,
+ sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later},
+ {"<e>&#65;&#41;</e>", NULL, NULL, 0, filled_later},
+
+ /* Prolog */
+ {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0, filled_later},
+
+ /* Whitespace */
+ {" <e1> <e2> </e2> </e1> ", NULL, NULL, 0, filled_later},
+ {"<e1 ><e2 /></e1 >", NULL, NULL, 0, filled_later},
+ {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0, filled_later},
+
+ /* Comments */
+ {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0, filled_later},
+
+ /* Processing instructions */
+ {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>",
+ NULL, NULL, 0, filled_later},
+ {"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>",
+ "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>",
+ 0, filled_later},
+
+ /* CDATA */
+ {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later},
+
+ /* Conditional sections */
+ {"<!DOCTYPE r [\n"
+ "<!ENTITY % draft 'INCLUDE'>\n"
+ "<!ENTITY % final 'IGNORE'>\n"
+ "<!ENTITY % import SYSTEM \"first.ent\">\n"
+ "%import;\n"
+ "]>\n"
+ "<r/>\n",
+ "<![%draft;[<!--1-->]]>\n"
+ "<![%final;[<!--22-->]]>",
+ NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")),
+ filled_later},
+
+ /* General entities */
+ {"<!DOCTYPE root [\n"
+ "<!ENTITY nine \"123456789\">\n"
+ "]>\n"
+ "<root>&nine;</root>",
+ NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later},
+ {"<!DOCTYPE root [\n"
+ "<!ENTITY nine \"123456789\">\n"
+ "]>\n"
+ "<root k1=\"&nine;\"/>",
+ NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later},
+ {"<!DOCTYPE root [\n"
+ "<!ENTITY nine \"123456789\">\n"
+ "<!ENTITY nine2 \"&nine;&nine;\">\n"
+ "]>\n"
+ "<root>&nine2;&nine2;&nine2;</root>",
+ NULL, NULL,
+ sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */
+ * (strlen("&nine;") + strlen("123456789")),
+ filled_later},
+ {"<!DOCTYPE r [\n"
+ " <!ENTITY five SYSTEM 'first.ent'>\n"
+ "]>\n"
+ "<r>&five;</r>",
+ "12345", NULL, 0, filled_later},
+
+ /* Parameter entities */
+ {"<!DOCTYPE r [\n"
+ "<!ENTITY % comment \"<!---->\">\n"
+ "%comment;\n"
+ "]>\n"
+ "<r/>",
+ NULL, NULL, sizeof(XML_Char) * strlen("<!---->"), filled_later},
+ {"<!DOCTYPE r [\n"
+ "<!ENTITY % ninedef \"&#60;!ENTITY nine &#34;123456789&#34;&#62;\">\n"
+ "%ninedef;\n"
+ "]>\n"
+ "<r>&nine;</r>",
+ NULL, NULL,
+ sizeof(XML_Char)
+ * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789")),
+ filled_later},
+ {"<!DOCTYPE r [\n"
+ "<!ENTITY % comment \"<!--1-->\">\n"
+ "<!ENTITY % comment2 \"&#37;comment;<!--22-->&#37;comment;\">\n"
+ "%comment2;\n"
+ "]>\n"
+ "<r/>\n",
+ NULL, NULL,
+ sizeof(XML_Char)
+ * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->")),
+ filled_later},
+ {"<!DOCTYPE r [\n"
+ " <!ENTITY % five \"12345\">\n"
+ " <!ENTITY % five2def \"&#60;!ENTITY five2 &#34;[&#37;five;][&#37;five;]]]]&#34;&#62;\">\n"
+ " %five2def;\n"
+ "]>\n"
+ "<r>&five2;</r>",
+ NULL, NULL, /* from "%five2def;": */
+ sizeof(XML_Char)
+ * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">")
+ + 2 /* calls to "%five;" */ * strlen("12345")
+ + /* from "&five2;": */ strlen("[12345][12345]]]]")),
+ filled_later},
+ {"<!DOCTYPE r SYSTEM \"first.ent\">\n"
+ "<r/>",
+ "<!ENTITY % comment '<!--1-->'>\n"
+ "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n"
+ "%comment2;",
+ NULL,
+ sizeof(XML_Char)
+ * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->")
+ + 2 /* calls to "%comment;" */ * strlen("<!---->")),
+ filled_later},
+ {"<!DOCTYPE r SYSTEM 'first.ent'>\n"
+ "<r/>",
+ "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n"
+ "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n"
+ "%e2;\n",
+ "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->"),
+ filled_later},
+ {
+ "<!DOCTYPE r SYSTEM 'first.ent'>\n"
+ "<r/>",
+ "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
+ "<!ENTITY % e2 '%e1;'>",
+ "<?xml version='1.0' encoding='utf-8'?>\n"
+ "hello\n"
+ "xml" /* without trailing newline! */,
+ 0,
+ filled_later,
+ },
+ {
+ "<!DOCTYPE r SYSTEM 'first.ent'>\n"
+ "<r/>",
+ "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
+ "<!ENTITY % e2 '%e1;'>",
+ "<?xml version='1.0' encoding='utf-8'?>\n"
+ "hello\n"
+ "xml\n" /* with trailing newline! */,
+ 0,
+ filled_later,
+ },
+ {"<!DOCTYPE doc SYSTEM 'first.ent'>\n"
+ "<doc></doc>\n",
+ "<!ELEMENT doc EMPTY>\n"
+ "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
+ "<!ENTITY % e2 '%e1;'>\n"
+ "%e1;\n",
+ "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */,
+ strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later},
+ {"<!DOCTYPE r [\n"
+ " <!ENTITY five SYSTEM 'first.ent'>\n"
+ "]>\n"
+ "<r>&five;</r>",
+ "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later},
+ };
+
+ const size_t countCases = sizeof(cases) / sizeof(cases[0]);
+ size_t u = 0;
+ for (; u < countCases; u++) {
+ size_t v = 0;
+ for (; v < 2; v++) {
+ const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE;
+ const unsigned long long expectedCountBytesDirect
+ = strlen(cases[u].primaryText);
+ const unsigned long long expectedCountBytesIndirect
+ = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText)
+ : 0)
+ + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText)
+ : 0)
+ + cases[u].expectedCountBytesIndirectExtra;
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (cases[u].firstExternalText) {
+ XML_SetExternalEntityRefHandler(parser,
+ accounting_external_entity_ref_handler);
+ XML_SetUserData(parser, (void *)&cases[u]);
+ cases[u].singleBytesWanted = singleBytesWanted;
+ }
+
+ const XmlParseFunction xmlParseFunction
+ = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse;
+
+ enum XML_Status status
+ = xmlParseFunction(parser, cases[u].primaryText,
+ (int)strlen(cases[u].primaryText), XML_TRUE);
+ if (status != XML_STATUS_OK) {
+ _xml_failure(parser, __FILE__, __LINE__);
+ }
+
+ const unsigned long long actualCountBytesDirect
+ = testingAccountingGetCountBytesDirect(parser);
+ const unsigned long long actualCountBytesIndirect
+ = testingAccountingGetCountBytesIndirect(parser);
+
+ XML_ParserFree(parser);
+
+ if (actualCountBytesDirect != expectedCountBytesDirect) {
+ fprintf(
+ stderr,
+ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
+ "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n",
+ u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
+ expectedCountBytesDirect, actualCountBytesDirect);
+ fail("Count of direct bytes is off");
+ }
+
+ if (actualCountBytesIndirect != expectedCountBytesIndirect) {
+ fprintf(
+ stderr,
+ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
+ "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n",
+ u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
+ expectedCountBytesIndirect, actualCountBytesIndirect);
+ fail("Count of indirect bytes is off");
+ }
+ }
+ }
+}
+END_TEST
+
+START_TEST(test_billion_laughs_attack_protection_api) {
+ XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
+ XML_Parser parserWithParent
+ = XML_ExternalEntityParserCreate(parserWithoutParent, NULL, NULL);
+ if (parserWithoutParent == NULL)
+ fail("parserWithoutParent is NULL");
+ if (parserWithParent == NULL)
+ fail("parserWithParent is NULL");
+
+ // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f)
+ == XML_TRUE)
+ fail("Call with NULL parser is NOT supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent,
+ 123.0f)
+ == XML_TRUE)
+ fail("Call with non-root parser is NOT supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, NAN)
+ == XML_TRUE)
+ fail("Call with NaN limit is NOT supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, -1.0f)
+ == XML_TRUE)
+ fail("Call with negative limit is NOT supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, 0.9f)
+ == XML_TRUE)
+ fail("Call with positive limit <1.0 is NOT supposed to succeed");
+
+ // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, 1.0f)
+ == XML_FALSE)
+ fail("Call with positive limit >=1.0 is supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, 123456.789f)
+ == XML_FALSE)
+ fail("Call with positive limit >=1.0 is supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parserWithoutParent, INFINITY)
+ == XML_FALSE)
+ fail("Call with positive limit >=1.0 is supposed to succeed");
+
+ // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases
+ if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123)
+ == XML_TRUE)
+ fail("Call with NULL parser is NOT supposed to succeed");
+ if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent,
+ 123)
+ == XML_TRUE)
+ fail("Call with non-root parser is NOT supposed to succeed");
+
+ // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases
+ if (XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ parserWithoutParent, 123)
+ == XML_FALSE)
+ fail("Call with non-NULL parentless parser is supposed to succeed");
+
+ XML_ParserFree(parserWithParent);
+ XML_ParserFree(parserWithoutParent);
+}
+END_TEST
+
+START_TEST(test_helper_unsigned_char_to_printable) {
+ // Smoke test
+ unsigned char uc = 0;
+ for (; uc < (unsigned char)-1; uc++) {
+ const char *const printable = unsignedCharToPrintable(uc);
+ if (printable == NULL)
+ fail("unsignedCharToPrintable returned NULL");
+ if (strlen(printable) < (size_t)1)
+ fail("unsignedCharToPrintable returned empty string");
+ }
+
+ // Two concrete samples
+ if (strcmp(unsignedCharToPrintable('A'), "A") != 0)
+ fail("unsignedCharToPrintable result mistaken");
+ if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0)
+ fail("unsignedCharToPrintable result mistaken");
+}
+END_TEST
+#endif // defined(XML_DTD)
+
static Suite *
make_suite(void) {
Suite *s = suite_create("basic");
@@ -11233,6 +11607,9 @@ make_suite(void) {
TCase *tc_misc = tcase_create("miscellaneous tests");
TCase *tc_alloc = tcase_create("allocation tests");
TCase *tc_nsalloc = tcase_create("namespace allocation tests");
+#if defined(XML_DTD)
+ TCase *tc_accounting = tcase_create("accounting tests");
+#endif
suite_add_tcase(s, tc_basic);
tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
@@ -11593,6 +11970,13 @@ make_suite(void) {
tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext);
tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element);
+#if defined(XML_DTD)
+ suite_add_tcase(s, tc_accounting);
+ tcase_add_test(tc_accounting, test_accounting_precision);
+ tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
+ tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
+#endif
+
return s;
}
diff --git a/expat/xmlwf/xmltchar.h b/expat/xmlwf/xmltchar.h
index 4843fbe7..30283d08 100644
--- a/expat/xmlwf/xmltchar.h
+++ b/expat/xmlwf/xmltchar.h
@@ -7,7 +7,7 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Licensed under the MIT license:
@@ -55,6 +55,8 @@
# define tmain wmain
# define tremove _wremove
# define tchar wchar_t
+# define tcstof wcstof
+# define tcstoull wcstoull
#else /* not XML_UNICODE */
# define T(x) x
# define ftprintf fprintf
@@ -72,4 +74,6 @@
# define tmain main
# define tremove remove
# define tchar char
+# define tcstof strtof
+# define tcstoull strtoull
#endif /* not XML_UNICODE */
diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c
index 4242e1c7..342d6c59 100644
--- a/expat/xmlwf/xmlwf.c
+++ b/expat/xmlwf/xmlwf.c
@@ -39,11 +39,15 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <expat_config.h>
+
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
+#include <math.h> /* for isnan */
+#include <errno.h>
#include "expat.h"
#include "codepage.h"
@@ -903,6 +907,12 @@ usage(const XML_Char *prog, int rc) {
T(" -t write no XML output for [t]iming of plain parsing\n")
T(" -N enable adding doctype and [n]otation declarations\n")
T("\n")
+ T("billion laughs attack protection:\n")
+ T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
+ T("\n")
+ T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
+ T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
+ T("\n")
T("info arguments:\n")
T(" -h show this [h]elp message and exit\n")
T(" -v show program's [v]ersion number and exit\n")
@@ -951,6 +961,11 @@ tmain(int argc, XML_Char **argv) {
int requireStandalone = 0;
int requiresNotations = 0;
int continueOnError = 0;
+
+ float attackMaximumAmplification = -1.0f; /* signaling "not set" */
+ unsigned long long attackThresholdBytes;
+ XML_Bool attackThresholdGiven = XML_FALSE;
+
int exitCode = XMLWF_EXIT_SUCCESS;
enum XML_ParamEntityParsing paramEntityParsing
= XML_PARAM_ENTITY_PARSING_NEVER;
@@ -1030,6 +1045,49 @@ tmain(int argc, XML_Char **argv) {
continueOnError = 1;
j++;
break;
+ case T('a'): {
+ const XML_Char *valueText = NULL;
+ XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
+
+ errno = 0;
+ XML_Char *afterValueText = (XML_Char *)valueText;
+ attackMaximumAmplification = tcstof(valueText, &afterValueText);
+ if ((errno != 0) || (afterValueText[0] != T('\0'))
+ || isnan(attackMaximumAmplification)
+ || (attackMaximumAmplification < 1.0f)) {
+ // This prevents tperror(..) from reporting misleading "[..]: Success"
+ errno = ERANGE;
+ tperror(T("invalid amplification limit") T(
+ " (needs a floating point number greater or equal than 1.0)"));
+ exit(XMLWF_EXIT_USAGE_ERROR);
+ }
+#ifndef XML_DTD
+ ftprintf(stderr, T("Warning: Given amplification limit ignored") T(
+ ", xmlwf has been compiled without DTD support.\n"));
+#endif
+ break;
+ }
+ case T('b'): {
+ const XML_Char *valueText = NULL;
+ XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
+
+ errno = 0;
+ XML_Char *afterValueText = (XML_Char *)valueText;
+ attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
+ if ((errno != 0) || (afterValueText[0] != T('\0'))) {
+ // This prevents tperror(..) from reporting misleading "[..]: Success"
+ errno = ERANGE;
+ tperror(T("invalid ignore threshold")
+ T(" (needs an integer from 0 to 2^64-1)"));
+ exit(XMLWF_EXIT_USAGE_ERROR);
+ }
+ attackThresholdGiven = XML_TRUE;
+#ifndef XML_DTD
+ ftprintf(stderr, T("Warning: Given attack threshold ignored") T(
+ ", xmlwf has been compiled without DTD support.\n"));
+#endif
+ break;
+ }
case T('\0'):
if (j > 1) {
i++;
@@ -1060,6 +1118,19 @@ tmain(int argc, XML_Char **argv) {
exit(XMLWF_EXIT_INTERNAL_ERROR);
}
+ if (attackMaximumAmplification != -1.0f) {
+#ifdef XML_DTD
+ XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parser, attackMaximumAmplification);
+#endif
+ }
+ if (attackThresholdGiven) {
+#ifdef XML_DTD
+ XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ parser, attackThresholdBytes);
+#endif
+ }
+
if (requireStandalone)
XML_SetNotStandaloneHandler(parser, notStandalone);
XML_SetParamEntityParsing(parser, paramEntityParsing);
diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py
index 8ec8d4ea..c2a527fd 100755
--- a/expat/xmlwf/xmlwf_helpgen.py
+++ b/expat/xmlwf/xmlwf_helpgen.py
@@ -73,6 +73,14 @@ output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not
output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing')
output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations')
+billion_laughs = parser.add_argument_group('billion laughs attack protection',
+ description='NOTE: '
+ 'If you ever need to increase these values '
+ 'for non-attack payload, please file a bug report.')
+billion_laughs.add_argument('-a', metavar='FACTOR',
+ help='set maximum tolerated [a]mplification factor (default: 100.0)')
+billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)')
+
parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)')
info = parser.add_argument_group('info arguments')