summaryrefslogtreecommitdiff
path: root/libarchive/archive_read_support_format_warc.c
diff options
context:
space:
mode:
authorMartin Matuska <martin@matuska.org>2017-02-04 01:30:53 +0100
committerMartin Matuska <martin@matuska.org>2017-02-04 01:30:53 +0100
commitc2c71a6dfaf0e79f2a08d9f221ef57f3a8359f58 (patch)
treefad994aeb85173a8c52cf6fb8e07b4761c9b9943 /libarchive/archive_read_support_format_warc.c
parentd77b577b2d5aa259fca06313c4940e1e61ab1e0e (diff)
downloadlibarchive-c2c71a6dfaf0e79f2a08d9f221ef57f3a8359f58.tar.gz
WARC reader: always check against valid line end
Change the way how WARC version number is determined. Reported-By: OSS-Fuzz issue 511, 526, 532
Diffstat (limited to 'libarchive/archive_read_support_format_warc.c')
-rw-r--r--libarchive/archive_read_support_format_warc.c139
1 files changed, 74 insertions, 65 deletions
diff --git a/libarchive/archive_read_support_format_warc.c b/libarchive/archive_read_support_format_warc.c
index 30cdaf9d..fe0f2e1a 100644
--- a/libarchive/archive_read_support_format_warc.c
+++ b/libarchive/archive_read_support_format_warc.c
@@ -134,8 +134,8 @@ static ssize_t _warc_rdlen(const char *buf, size_t bsz);
static time_t _warc_rdrtm(const char *buf, size_t bsz);
static time_t _warc_rdmtm(const char *buf, size_t bsz);
static const char *_warc_find_eoh(const char *buf, size_t bsz);
+static const char *_warc_find_eol(const char *buf, size_t bsz);
-
int
archive_read_support_format_warc(struct archive *_a)
{
@@ -577,51 +577,32 @@ out:
}
static unsigned int
-_warc_rdver(const char buf[10], size_t bsz)
+_warc_rdver(const char *buf, size_t bsz)
{
static const char magic[] = "WARC/";
- unsigned int ver;
-
- (void)bsz; /* UNUSED */
+ unsigned int ver = 99999U;
+ unsigned int end = 0U;
- if (memcmp(buf, magic, sizeof(magic) - 1U) != 0) {
- /* nope */
- return 99999U;
+ if (bsz < 12 || memcmp(buf, magic, sizeof(magic) - 1U) != 0) {
+ /* buffer too small or invalid magic */
+ return ver;
}
/* looks good so far, read the version number for a laugh */
buf += sizeof(magic) - 1U;
- /* most common case gets a quick-check here */
- if (memcmp(buf, "1.0\r\n", 5U) == 0) {
- ver = 10000U;
- } else {
- switch (*buf) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- if (buf[1U] == '.') {
- char *on;
-
- /* set up major version */
- ver = (buf[0U] - '0') * 10000U;
- /* minor version, anyone? */
- ver += (strtol(buf + 2U, &on, 10)) * 100U;
- /* don't parse anything else */
- if (on > buf + 2U) {
- break;
- }
- }
- /* FALLTHROUGH */
- case '9':
- default:
- /* just make the version ridiculously high */
- ver = 999999U;
- break;
+
+ if (isdigit(buf[0U]) && (buf[1U] == '.') && isdigit(buf[2U])) {
+ /* we support a maximum of 2 digits in the minor version */
+ if (isdigit(buf[3U]))
+ end = 1U;
+ if (memcmp(buf + 3U + end, "\r\n", 2U) == 0) {
+ /* set up major version */
+ ver = (buf[0U] - '0') * 10000U;
+ /* set up minor version */
+ if (end == 1U) {
+ ver += (buf[2U] - '0') * 1000U;
+ ver += (buf[3U] - '0') * 100U;
+ } else
+ ver += (buf[2U] - '0') * 100U;
}
}
return ver;
@@ -632,32 +613,37 @@ _warc_rdtyp(const char *buf, size_t bsz)
{
static const char _key[] = "\r\nWARC-Type:";
const char *const eob = buf + bsz;
- const char *val;
+ const char *val, *eol;
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
/* no bother */
return WT_NONE;
}
- /* overread whitespace */
val += sizeof(_key) - 1U;
+ if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
+ /* no end of line */
+ return WT_NONE;
+ }
+
+ /* overread whitespace */
while (val < eob && isspace((unsigned char)*val))
++val;
if (val + 8U > eob) {
;
- } else if (memcmp(val, "resource", 8U) == 0) {
+ } else if (memcmp(val, "resource", 8U) == 0 && val + 8U == eol) {
return WT_RSRC;
- } else if (memcmp(val, "warcinfo", 8U) == 0) {
+ } else if (memcmp(val, "warcinfo", 8U) == 0 && val + 8U == eol) {
return WT_INFO;
- } else if (memcmp(val, "metadata", 8U) == 0) {
+ } else if (memcmp(val, "metadata", 8U) == 0 && val + 8U == eol) {
return WT_META;
- } else if (memcmp(val, "request", 7U) == 0) {
+ } else if (memcmp(val, "request", 7U) == 0 && val + 7U == eol) {
return WT_REQ;
- } else if (memcmp(val, "response", 8U) == 0) {
+ } else if (memcmp(val, "response", 8U) == 0 && val + 8U == eol) {
return WT_RSP;
- } else if (memcmp(val, "conversi", 8U) == 0) {
+ } else if (memcmp(val, "conversi", 8U) == 0 && val + 8U == eol) {
return WT_CONV;
- } else if (memcmp(val, "continua", 8U) == 0) {
+ } else if (memcmp(val, "continua", 8U) == 0 && val + 8U == eol) {
return WT_CONT;
}
return WT_NONE;
@@ -686,8 +672,10 @@ _warc_rduri(const char *buf, size_t bsz)
if ((uri = xmemmem(val, eob - val, "://", 3U)) == NULL) {
/* not touching that! */
return res;
- } else if ((eol = memchr(uri, '\n', eob - uri)) == NULL) {
- /* no end of line? :O */
+ }
+
+ if ((eol = _warc_find_eol(uri, eob - uri)) == NULL) {
+ /* no end of line */
return res;
}
@@ -720,7 +708,7 @@ static ssize_t
_warc_rdlen(const char *buf, size_t bsz)
{
static const char _key[] = "\r\nContent-Length:";
- const char *val;
+ const char *val, *eol;
char *on = NULL;
long int len;
@@ -728,14 +716,19 @@ _warc_rdlen(const char *buf, size_t bsz)
/* no bother */
return -1;
}
+ val += sizeof(_key) - 1U;
+ if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
+ /* no end of line */
+ return -1;
+ }
/* strtol kindly overreads whitespace for us, so use that */
- val += sizeof(_key) - 1U;
len = strtol(val, &on, 10);
- if (on == NULL || !isspace((unsigned char)*on)) {
- /* hm, can we trust that number? Best not. */
+ if (on != eol) {
+ /* line must end here */
return -1;
}
+
return (size_t)len;
}
@@ -743,7 +736,7 @@ static time_t
_warc_rdrtm(const char *buf, size_t bsz)
{
static const char _key[] = "\r\nWARC-Date:";
- const char *val;
+ const char *val, *eol;
char *on = NULL;
time_t res;
@@ -751,13 +744,17 @@ _warc_rdrtm(const char *buf, size_t bsz)
/* no bother */
return (time_t)-1;
}
+ val += sizeof(_key) - 1U;
+ if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
+ /* no end of line */
+ return -1;
+ }
/* xstrpisotime() kindly overreads whitespace for us, so use that */
- val += sizeof(_key) - 1U;
res = xstrpisotime(val, &on);
- if (on == NULL || !isspace((unsigned char)*on)) {
- /* hm, can we trust that number? Best not. */
- return (time_t)-1;
+ if (on != eol) {
+ /* line must end here */
+ return -1;
}
return res;
}
@@ -766,7 +763,7 @@ static time_t
_warc_rdmtm(const char *buf, size_t bsz)
{
static const char _key[] = "\r\nLast-Modified:";
- const char *val;
+ const char *val, *eol;
char *on = NULL;
time_t res;
@@ -774,13 +771,17 @@ _warc_rdmtm(const char *buf, size_t bsz)
/* no bother */
return (time_t)-1;
}
+ val += sizeof(_key) - 1U;
+ if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
+ /* no end of line */
+ return -1;
+ }
/* xstrpisotime() kindly overreads whitespace for us, so use that */
- val += sizeof(_key) - 1U;
res = xstrpisotime(val, &on);
- if (on == NULL || !isspace((unsigned char)*on)) {
- /* hm, can we trust that number? Best not. */
- return (time_t)-1;
+ if (on != eol) {
+ /* line must end here */
+ return -1;
}
return res;
}
@@ -797,4 +798,12 @@ _warc_find_eoh(const char *buf, size_t bsz)
return hit;
}
+static const char*
+_warc_find_eol(const char *buf, size_t bsz)
+{
+ static const char _marker[] = "\r\n";
+ const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U);
+
+ return hit;
+}
/* archive_read_support_format_warc.c ends here */