diff options
author | Anatol Belski <ab@php.net> | 2020-07-04 17:28:32 +0200 |
---|---|---|
committer | Anatol Belski <ab@php.net> | 2020-08-29 02:05:56 +0200 |
commit | c3eeab01bac39234bbe09407f4bddf8f22b6af53 (patch) | |
tree | 99a9bb45811ee756c81409aea6b59c0af53f5224 /ext/fileinfo/libmagic | |
parent | 1902f730ee2bda60552f34c0643e2d7b47e4fb64 (diff) | |
download | php-git-c3eeab01bac39234bbe09407f4bddf8f22b6af53.tar.gz |
fileinfo: Upgrade to libmagic 5.39
Signed-off-by: Anatol Belski <ab@php.net>
Diffstat (limited to 'ext/fileinfo/libmagic')
-rw-r--r-- | ext/fileinfo/libmagic/apprentice.c | 207 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/ascmagic.c | 15 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/buffer.c | 6 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/cdf.c | 28 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/compress.c | 216 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/der.c | 78 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/encoding.c | 3 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/file.h | 115 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/fsmagic.c | 8 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/funcs.c | 130 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/is_csv.c | 197 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/is_json.c | 17 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/magic.c | 4 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/magic.h | 4 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/print.c | 29 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/readcdf.c | 2 | ||||
-rw-r--r-- | ext/fileinfo/libmagic/softmagic.c | 166 |
17 files changed, 986 insertions, 239 deletions
diff --git a/ext/fileinfo/libmagic/apprentice.c b/ext/fileinfo/libmagic/apprentice.c index f6ff1cf26e..3fc33a7c09 100644 --- a/ext/fileinfo/libmagic/apprentice.c +++ b/ext/fileinfo/libmagic/apprentice.c @@ -34,7 +34,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.283 2019/02/20 02:35:27 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $") #endif /* lint */ #include "magic.h" @@ -65,6 +65,8 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.283 2019/02/20 02:35:27 christos Exp $") #else #define MAXMAGIC_SIZE SSIZE_MAX #endif +#include <dirent.h> +#include <limits.h> #define EATAB {while (isascii(CAST(unsigned char, *l)) && \ @@ -139,6 +141,7 @@ private void apprentice_list(struct mlist *, int ); private struct magic_map *apprentice_load(struct magic_set *, const char *, int); private struct mlist *mlist_alloc(void); +private void mlist_free_all(struct magic_set *); private void mlist_free(struct mlist *); private void byteswap(struct magic *, uint32_t); private void bs1(struct magic *); @@ -146,17 +149,24 @@ private uint16_t swap2(uint16_t); private uint32_t swap4(uint32_t); private uint64_t swap8(uint64_t); private char *mkdbname(struct magic_set *, const char *, int); +private struct magic_map *apprentice_buf(struct magic_set *, struct magic *, + size_t); private struct magic_map *apprentice_map(struct magic_set *, const char *); +private int check_buffer(struct magic_set *, struct magic_map *, const char *); private void apprentice_unmap(struct magic_map *); private int apprentice_compile(struct magic_set *, struct magic_map *, const char *); private int check_format_type(const char *, int, const char **); private int check_format(struct magic_set *, struct magic *); private int get_op(char); -private int parse_mime(struct magic_set *, struct magic_entry *, const char *); -private int parse_strength(struct magic_set *, struct magic_entry *, const char *); -private int parse_apple(struct magic_set *, struct magic_entry *, const char *); -private int parse_ext(struct magic_set *, struct magic_entry *, const char *); +private int parse_mime(struct magic_set *, struct magic_entry *, const char *, + size_t); +private int parse_strength(struct magic_set *, struct magic_entry *, + const char *, size_t); +private int parse_apple(struct magic_set *, struct magic_entry *, const char *, + size_t); +private int parse_ext(struct magic_set *, struct magic_entry *, const char *, + size_t); private size_t magicsize = sizeof(struct magic); @@ -166,7 +176,8 @@ private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; private struct { const char *name; size_t len; - int (*fun)(struct magic_set *, struct magic_entry *, const char *); + int (*fun)(struct magic_set *, struct magic_entry *, const char *, + size_t); } bang[] = { #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } DECLARE_FIELD(mime), @@ -245,6 +256,8 @@ static const struct type_tbl_s type_tbl[] = { { XX("use"), FILE_USE, FILE_FMT_NONE }, { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, { XX("der"), FILE_DER, FILE_FMT_STR }, + { XX("guid"), FILE_GUID, FILE_FMT_STR }, + { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD }, { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, }; @@ -432,21 +445,23 @@ apprentice_1(struct magic_set *ms, const char *fn, int action) return apprentice_compile(ms, map, fn); } +#ifndef COMPILE_ONLY map = apprentice_map(ms, fn); - if (map == RCAST(struct magic_map *, -1)) - return -1; if (map == NULL) { - if (fn) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "using regular magic file `%s'", fn); - map = apprentice_load(ms, fn, action); - } + if (ms->flags & MAGIC_CHECK) + file_magwarn(ms, "using regular magic file `%s'", fn); + map = apprentice_load(ms, fn, action); if (map == NULL) return -1; } for (i = 0; i < MAGIC_SETS; i++) { if (add_mlist(ms->mlist[i], map, i) == -1) { + /* failed to add to any list, free explicitly */ + if (i == 0) + apprentice_unmap(map); + else + mlist_free_all(ms); file_oomem(ms, sizeof(*ml)); return -1; } @@ -462,6 +477,9 @@ apprentice_1(struct magic_set *ms, const char *fn, int action) } } return 0; +#else + return 0; +#endif /* COMPILE_ONLY */ } protected void @@ -500,6 +518,7 @@ file_ms_alloc(int flags) } ms->o.buf = ms->o.pbuf = NULL; + ms->o.blen = 0; len = (ms->c.len = 10) * sizeof(*ms->c.li); if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL) @@ -556,6 +575,17 @@ mlist_alloc(void) } private void +mlist_free_all(struct magic_set *ms) +{ + size_t i; + + for (i = 0; i < MAGIC_SETS; i++) { + mlist_free(ms->mlist[i]); + ms->mlist[i] = NULL; + } +} + +private void mlist_free_one(struct mlist *ml) { if (ml->map) @@ -571,20 +601,66 @@ mlist_free(struct mlist *mlist) if (mlist == NULL) return; - for (ml = mlist->next; ml != mlist; ml = next) { + for (ml = mlist->next; ml != mlist;) { next = ml->next; mlist_free_one(ml); + ml = next; } mlist_free_one(mlist); } +#ifndef COMPILE_ONLY +/* void **bufs: an array of compiled magic files */ +protected int +buffer_apprentice(struct magic_set *ms, struct magic **bufs, + size_t *sizes, size_t nbufs) +{ + size_t i, j; + struct mlist *ml; + struct magic_map *map; + + if (nbufs == 0) + return -1; + + (void)file_reset(ms, 0); + + init_file_tables(); + + for (i = 0; i < MAGIC_SETS; i++) { + mlist_free(ms->mlist[i]); + if ((ms->mlist[i] = mlist_alloc()) == NULL) { + file_oomem(ms, sizeof(*ms->mlist[i])); + goto fail; + } + } + + for (i = 0; i < nbufs; i++) { + map = apprentice_buf(ms, bufs[i], sizes[i]); + if (map == NULL) + goto fail; + + for (j = 0; j < MAGIC_SETS; j++) { + if (add_mlist(ms->mlist[j], map, j) == -1) { + file_oomem(ms, sizeof(*ml)); + goto fail; + } + } + } + + return 0; +fail: + mlist_free_all(ms); + return -1; +} +#endif + /* const char *fn: list of magic files and directories */ protected int file_apprentice(struct magic_set *ms, const char *fn, int action) { char *p, *mfn; int fileerr, errs = -1; - size_t i; + size_t i, j; (void)file_reset(ms, 0); @@ -618,9 +694,9 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) mlist_free(ms->mlist[i]); if ((ms->mlist[i] = mlist_alloc()) == NULL) { file_oomem(ms, sizeof(*ms->mlist[i])); - while (i-- > 0) { - mlist_free(ms->mlist[i]); - ms->mlist[i] = NULL; + for (j = 0; j < i; j++) { + mlist_free(ms->mlist[j]); + ms->mlist[j] = NULL; } efree(mfn); return -1; @@ -771,9 +847,14 @@ typesize(int type) case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: + case FILE_OFFSET: return 8; + + case FILE_GUID: + return 16; + default: - return CAST(size_t, ~0); + return FILE_BADSIZE; } } @@ -827,8 +908,10 @@ apprentice_magic_strength(const struct magic *m) case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: + case FILE_GUID: + case FILE_OFFSET: ts = typesize(m->type); - if (ts == CAST(size_t, ~0)) + if (ts == FILE_BADSIZE) abort(); val += ts * MULT; break; @@ -1019,6 +1102,8 @@ set_test_type(struct magic *mstart, struct magic *m) case FILE_BEDOUBLE: case FILE_LEDOUBLE: case FILE_DER: + case FILE_GUID: + case FILE_OFFSET: mstart->flag |= BINTEST; break; case FILE_STRING: @@ -1148,7 +1233,8 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs, continue; } if ((*bang[i].fun)(ms, &me, - line + bang[i].len + 2) != 0) { + line + bang[i].len + 2, + len - bang[i].len - 2) != 0) { (*errs)++; continue; } @@ -1448,6 +1534,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: + case FILE_OFFSET: v = CAST(int64_t, v); break; case FILE_STRING: @@ -1462,12 +1549,13 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) case FILE_USE: case FILE_CLEAR: case FILE_DER: + case FILE_GUID: break; default: if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "cannot happen: m->type=%d\n", m->type); - return ~0U; + return FILE_BADSIZE; } } return v; @@ -1869,12 +1957,17 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, } /* get offset, then skip over it */ + if (*l == '-') { + ++l; /* step over */ + m->flag |= OFFNEGATIVE; + } m->offset = CAST(int32_t, strtol(l, &t, 0)); if (l == t) { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "offset `%s' invalid", l); return -1; } + l = t; if (m->flag & INDIR) { @@ -2167,7 +2260,8 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line, * if valid */ private int -parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) +parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line, + size_t len __attribute__((__unused__))) { const char *l = line; char *el; @@ -2229,7 +2323,7 @@ goodchar(unsigned char x, const char *extra) private int parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, - zend_off_t off, size_t len, const char *name, const char *extra, int nt) + size_t llen, zend_off_t off, size_t len, const char *name, const char *extra, int nt) { size_t i; const char *l = line; @@ -2250,7 +2344,8 @@ parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, } EATAB; - for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++) + for (i = 0; *l && i < llen && i < len && goodchar(*l, extra); + buf[i++] = *l++) continue; if (i == len && *l) { @@ -2279,11 +2374,12 @@ parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, * magic[index - 1] */ private int -parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) +parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line, + size_t len) { struct magic *m = &me->mp[0]; - return parse_extra(ms, me, line, + return parse_extra(ms, me, line, len, CAST(off_t, offsetof(struct magic, apple)), sizeof(m->apple), "APPLE", "!+-./?", 0); } @@ -2292,11 +2388,12 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) * Parse a comma-separated list of extensions */ private int -parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line) +parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line, + size_t len) { struct magic *m = &me->mp[0]; - return parse_extra(ms, me, line, + return parse_extra(ms, me, line, len, CAST(off_t, offsetof(struct magic, ext)), sizeof(m->ext), "EXTENSION", ",!+-/@?_$", 0); } @@ -2306,12 +2403,13 @@ parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line) * if valid */ private int -parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) +parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line, + size_t len) { struct magic *m = &me->mp[0]; - return parse_extra(ms, me, line, - CAST(zend_off_t, offsetof(struct magic, mimetype)), + return parse_extra(ms, me, line, len, + CAST(off_t, offsetof(struct magic, mimetype)), sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1); } @@ -2632,6 +2730,11 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) if (errno == 0) *p = ep; return 0; + case FILE_GUID: + if (file_parse_guid(*p, m->value.guid) == -1) + return -1; + *p += FILE_GUID_SIZE - 1; + return 0; default: errno = 0; ull = CAST(uint64_t, strtoull(*p, &ep, 0)); @@ -2643,7 +2746,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) uint64_t x; const char *q; - if (ts == CAST(size_t, ~0)) { + if (ts == FILE_BADSIZE) { file_magwarn(ms, "Expected numeric type got `%s'", type_tbl[m->type].name); @@ -2840,8 +2943,12 @@ getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) out: *p = '\0'; m->vallen = CAST(unsigned char, (p - origp)); - if (m->type == FILE_PSTRING) - m->vallen += CAST(unsigned char, file_pstring_length_size(m)); + if (m->type == FILE_PSTRING) { + size_t l = file_pstring_length_size(ms, m); + if (l == FILE_BADSIZE) + return NULL; + m->vallen += CAST(unsigned char, l); + } return s; } @@ -2871,7 +2978,7 @@ file_showstr(FILE *fp, const char *s, size_t len) char c; for (;;) { - if (len == ~0U) { + if (len == FILE_BADSIZE) { c = *s++; if (c == '\0') break; @@ -2948,7 +3055,6 @@ eatsize(const char **p) *p = l; } -#if 0 /* * handle a buffer containing a compiled file. */ @@ -2970,7 +3076,6 @@ apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len) } return map; } -#endif /* * handle a compiled file. @@ -3119,7 +3224,6 @@ error: return NULL; } -#if 0 private int check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) { @@ -3175,7 +3279,6 @@ check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname) byteswap(map->magic[i], map->nmagic[i]); return 0; } -#endif /* * handle an mmaped file. @@ -3376,7 +3479,7 @@ bs1(struct magic *m) } protected size_t -file_pstring_length_size(const struct magic *m) +file_pstring_length_size(struct magic_set *ms, const struct magic *m) { switch (m->str_flags & PSTRING_LEN) { case PSTRING_1_LE: @@ -3388,12 +3491,15 @@ file_pstring_length_size(const struct magic *m) case PSTRING_4_BE: return 4; default: - abort(); /* Impossible */ - return 1; + file_error(ms, 0, "corrupt magic file " + "(bad pascal string length %d)", + m->str_flags & PSTRING_LEN); + return FILE_BADSIZE; } } protected size_t -file_pstring_get_length(const struct magic *m, const char *ss) +file_pstring_get_length(struct magic_set *ms, const struct magic *m, + const char *ss) { size_t len = 0; const unsigned char *s = RCAST(const unsigned char *, ss); @@ -3428,11 +3534,18 @@ file_pstring_get_length(const struct magic *m, const char *ss) len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3; break; default: - abort(); /* Impossible */ + file_error(ms, 0, "corrupt magic file " + "(bad pascal string length %d)", + m->str_flags & PSTRING_LEN); + return FILE_BADSIZE; } - if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) - len -= file_pstring_length_size(m); + if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) { + size_t l = file_pstring_length_size(ms, m); + if (l == FILE_BADSIZE) + return l; + len -= l; + } return len; } diff --git a/ext/fileinfo/libmagic/ascmagic.c b/ext/fileinfo/libmagic/ascmagic.c index 348f6d6bd7..2804f67f79 100644 --- a/ext/fileinfo/libmagic/ascmagic.c +++ b/ext/fileinfo/libmagic/ascmagic.c @@ -35,12 +35,11 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.104 2019/05/07 02:27:11 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $") #endif /* lint */ #include "magic.h" #include <string.h> -#include <memory.h> #include <ctype.h> #include <stdlib.h> #ifdef HAVE_UNISTD_H @@ -116,7 +115,6 @@ file_ascmagic_with_encoding(struct magic_set *ms, int need_separator = 0; const char *subtype = NULL; - const char *subtype_mime = NULL; int has_escapes = 0; int has_backspace = 0; @@ -165,8 +163,11 @@ file_ascmagic_with_encoding(struct magic_set *ms, goto done; } } - if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) - return 0; + + if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) { + rv = 0; + goto done; + } /* Now try to discover other details about the file. */ for (i = 0; i < ulen; i++) { @@ -223,10 +224,6 @@ file_ascmagic_with_encoding(struct magic_set *ms, } if (need_separator && file_separator(ms) == -1) goto done; - } - if (subtype_mime) { - if (file_printf(ms, "%s", subtype_mime) == -1) - goto done; } else { if (file_printf(ms, "text/plain") == -1) goto done; diff --git a/ext/fileinfo/libmagic/buffer.c b/ext/fileinfo/libmagic/buffer.c index c61e15c2e0..3a02308220 100644 --- a/ext/fileinfo/libmagic/buffer.c +++ b/ext/fileinfo/libmagic/buffer.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: buffer.c,v 1.6 2019/05/07 02:27:11 christos Exp $") +FILE_RCSID("@(#)$File: buffer.c,v 1.8 2020/02/16 15:52:49 christos Exp $") #endif /* lint */ #include "magic.h" @@ -68,7 +68,7 @@ buffer_fill(const struct buffer *bb) struct buffer *b = CCAST(struct buffer *, bb); if (b->elen != 0) - return b->elen == CAST(size_t, ~0) ? -1 : 0; + return b->elen == FILE_BADSIZE ? -1 : 0; if (!S_ISREG(b->st.st_mode)) goto out; @@ -89,6 +89,6 @@ buffer_fill(const struct buffer *bb) return 0; out: - b->elen = CAST(size_t, ~0); + b->elen = FILE_BADSIZE; return -1; } diff --git a/ext/fileinfo/libmagic/cdf.c b/ext/fileinfo/libmagic/cdf.c index cdf273eda6..1a08dd7c0d 100644 --- a/ext/fileinfo/libmagic/cdf.c +++ b/ext/fileinfo/libmagic/cdf.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: cdf.c,v 1.114 2019/02/20 02:35:27 christos Exp $") +FILE_RCSID("@(#)$File: cdf.c,v 1.116 2019/08/26 14:31:39 christos Exp $") #endif #include <assert.h> @@ -63,6 +63,10 @@ FILE_RCSID("@(#)$File: cdf.c,v 1.114 2019/02/20 02:35:27 christos Exp $") #define EFTYPE EINVAL #endif +#ifndef SIZE_T_MAX +#define SIZE_T_MAX CAST(size_t, ~0ULL) +#endif + #include "cdf.h" #ifdef CDF_DEBUG @@ -388,17 +392,29 @@ ssize_t cdf_read_sector(const cdf_info_t *info, void *buf, size_t offs, size_t len, const cdf_header_t *h, cdf_secid_t id) { - size_t pos = CDF_SEC_POS(h, id); - assert(CDF_SEC_SIZE(h) == len); - return cdf_read(info, CAST(zend_off_t, pos), RCAST(char *, buf) + offs, len); + size_t ss = CDF_SEC_SIZE(h); + size_t pos; + + if (SIZE_T_MAX / ss < CAST(size_t, id)) + return -1; + + pos = CDF_SEC_POS(h, id); + assert(ss == len); + return cdf_read(info, CAST(off_t, pos), RCAST(char *, buf) + offs, len); } ssize_t cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs, size_t len, const cdf_header_t *h, cdf_secid_t id) { - size_t pos = CDF_SHORT_SEC_POS(h, id); - assert(CDF_SHORT_SEC_SIZE(h) == len); + size_t ss = CDF_SHORT_SEC_SIZE(h); + size_t pos; + + if (SIZE_T_MAX / ss < CAST(size_t, id)) + return -1; + + pos = CDF_SHORT_SEC_POS(h, id); + assert(ss == len); if (pos + len > CDF_SEC_SIZE(h) * sst->sst_len) { DPRINTF(("Out of bounds read %" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", diff --git a/ext/fileinfo/libmagic/compress.c b/ext/fileinfo/libmagic/compress.c index e2a2b8ff9c..7bfa19f5dd 100644 --- a/ext/fileinfo/libmagic/compress.c +++ b/ext/fileinfo/libmagic/compress.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: compress.c,v 1.121 2019/05/07 02:27:11 christos Exp $") +FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $") #endif #include "magic.h" @@ -45,6 +45,8 @@ FILE_RCSID("@(#)$File: compress.c,v 1.121 2019/05/07 02:27:11 christos Exp $") #endif #include <string.h> #include <errno.h> +#include <ctype.h> +#include <stdarg.h> #include <signal.h> #ifndef HAVE_SIG_T typedef void (*sig_t)(int); @@ -70,6 +72,11 @@ typedef void (*sig_t)(int); #include <bzlib.h> #endif +#if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT) +#define BUILTIN_XZLIB +#include <lzma.h> +#endif + #ifdef DEBUG int tty = -1; #define DPRINTF(...) do { \ @@ -112,10 +119,21 @@ zlibcmp(const unsigned char *buf) } #endif +#ifdef PHP_FILEINFO_UNCOMPRESS + +static int +lzmacmp(const unsigned char *buf) +{ + if (buf[0] != 0x5d || buf[1] || buf[2]) + return 0; + if (buf[12] && buf[12] != 0xff) + return 0; + return 1; +} + #define gzip_flags "-cd" #define lrzip_flags "-do" #define lzip_flags gzip_flags -#ifdef PHP_FILEINFO_UNCOMPRESS static const char *gzip_args[] = { "gzip", gzip_flags, NULL @@ -146,30 +164,39 @@ static const char *zstd_args[] = { #define do_bzlib NULL private const struct { - const void *magic; - size_t maglen; + union { + const char *magic; + int (*func)(const unsigned char *); + } u; + int maglen; const char **argv; void *unused; } compr[] = { - { "\037\235", 2, gzip_args, NULL }, /* compressed */ - /* Uncompress can get stuck; so use gzip first if we have it - * Idea from Damien Clark, thanks! */ - { "\037\235", 2, uncompress_args, NULL }, /* compressed */ - { "\037\213", 2, gzip_args, do_zlib }, /* gzipped */ - { "\037\236", 2, gzip_args, NULL }, /* frozen */ - { "\037\240", 2, gzip_args, NULL }, /* SCO LZH */ - /* the standard pack utilities do not accept standard input */ - { "\037\036", 2, gzip_args, NULL }, /* packed */ - { "PK\3\4", 4, gzip_args, NULL }, /* pkzipped, */ - /* ...only first file examined */ - { "BZh", 3, bzip2_args, do_bzlib }, /* bzip2-ed */ - { "LZIP", 4, lzip_args, NULL }, /* lzip-ed */ - { "\3757zXZ\0", 6, xz_args, NULL }, /* XZ Utils */ - { "LRZI", 4, lrzip_args, NULL }, /* LRZIP */ - { "\004\"M\030",4, lz4_args, NULL }, /* LZ4 */ - { "\x28\xB5\x2F\xFD", 4, zstd_args, NULL }, /* zstd */ +#define METH_FROZEN 2 +#define METH_BZIP 7 +#define METH_XZ 9 +#define METH_LZMA 13 +#define METH_ZLIB 14 + { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ + /* Uncompress can get stuck; so use gzip first if we have it + * Idea from Damien Clark, thanks! */ + { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ + { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ + { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ + { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ + /* the standard pack utilities do not accept standard input */ + { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ + { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ + /* ...only first file examined */ + { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ + { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ + { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ + { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ + { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ + { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ + { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ #ifdef ZLIBSUPPORT - { RCAST(const void *, zlibcmp), 0, zlib_args, NULL }, /* zlib */ + { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ #endif }; @@ -190,7 +217,11 @@ private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, #endif #ifdef BUILTIN_BZLIB private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, - size_t *, int); + size_t *); +#endif +#ifdef BUILTIN_XZLIB +private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, + size_t *); #endif static int makeerror(unsigned char **, size_t *, const char *, ...); @@ -233,15 +264,14 @@ file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) for (i = 0; i < ncompr; i++) { int zm; - if (nbytes < compr[i].maglen) + if (nbytes < CAST(size_t, abs(compr[i].maglen))) continue; -#ifdef ZLIBSUPPORT - if (compr[i].maglen == 0) - zm = (RCAST(int (*)(const unsigned char *), - CCAST(void *, compr[i].magic)))(buf); - else -#endif - zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0; + if (compr[i].maglen < 0) { + zm = (*compr[i].u.func)(buf); + } else { + zm = memcmp(buf, compr[i].u.magic, + CAST(size_t, compr[i].maglen)) == 0; + } if (!zm) continue; @@ -434,6 +464,7 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, #else { int te; + mode_t ou = umask(0); tfd = mkstemp(buf); (void)umask(ou); te = errno; @@ -569,6 +600,90 @@ err: } #endif +#ifdef BUILTIN_BZLIB +private int +uncompressbzlib(const unsigned char *old, unsigned char **newch, + size_t bytes_max, size_t *n) +{ + int rc; + bz_stream bz; + + memset(&bz, 0, sizeof(bz)); + rc = BZ2_bzDecompressInit(&bz, 0, 0); + if (rc != BZ_OK) + goto err; + + if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) + return makeerror(newch, n, "No buffer, %s", strerror(errno)); + + bz.next_in = CCAST(char *, RCAST(const char *, old)); + bz.avail_in = CAST(uint32_t, *n); + bz.next_out = RCAST(char *, *newch); + bz.avail_out = CAST(unsigned int, bytes_max); + + rc = BZ2_bzDecompress(&bz); + if (rc != BZ_OK && rc != BZ_STREAM_END) + goto err; + + /* Assume byte_max is within 32bit */ + /* assert(bz.total_out_hi32 == 0); */ + *n = CAST(size_t, bz.total_out_lo32); + rc = BZ2_bzDecompressEnd(&bz); + if (rc != BZ_OK) + goto err; + + /* let's keep the nul-terminate tradition */ + (*newch)[*n] = '\0'; + + return OKDATA; +err: + snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc); + *n = strlen(RCAST(char *, *newch)); + return ERRDATA; +} +#endif + +#ifdef BUILTIN_XZLIB +private int +uncompressxzlib(const unsigned char *old, unsigned char **newch, + size_t bytes_max, size_t *n) +{ + int rc; + lzma_stream xz; + + memset(&xz, 0, sizeof(xz)); + rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); + if (rc != LZMA_OK) + goto err; + + if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) + return makeerror(newch, n, "No buffer, %s", strerror(errno)); + + xz.next_in = CCAST(const uint8_t *, old); + xz.avail_in = CAST(uint32_t, *n); + xz.next_out = RCAST(uint8_t *, *newch); + xz.avail_out = CAST(unsigned int, bytes_max); + + rc = lzma_code(&xz, LZMA_RUN); + if (rc != LZMA_OK && rc != LZMA_STREAM_END) + goto err; + + *n = CAST(size_t, xz.total_out); + + lzma_end(&xz); + + /* let's keep the nul-terminate tradition */ + (*newch)[*n] = '\0'; + + return OKDATA; +err: + snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc); + *n = strlen(RCAST(char *, *newch)); + return ERRDATA; +} +#endif + + static int makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) { @@ -675,12 +790,24 @@ filter_error(unsigned char *ubuf, ssize_t n) private const char * methodname(size_t method) { + switch (method) { #ifdef BUILTIN_DECOMPRESS - /* FIXME: This doesn't cope with bzip2 */ - if (method == 2 || compr[method].maglen == 0) - return "zlib"; + case METH_FROZEN: + case METH_ZLIB: + return "zlib"; +#endif +#ifdef BUILTIN_BZLIB + case METH_BZIP: + return "bzlib"; #endif - return compr[method].argv[0]; +#ifdef BUILTIN_XZLIB + case METH_XZ: + case METH_LZMA: + return "xzlib"; +#endif + default: + return compr[method].argv[0]; + } } private int @@ -694,13 +821,26 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, size_t i; ssize_t r; + switch (method) { #ifdef BUILTIN_DECOMPRESS - /* FIXME: This doesn't cope with bzip2 */ - if (method == 2) + case METH_FROZEN: return uncompressgzipped(old, newch, bytes_max, n); - if (compr[method].maglen == 0) + case METH_ZLIB: return uncompresszlib(old, newch, bytes_max, n, 1); #endif +#ifdef BUILTIN_BZLIB + case METH_BZIP: + return uncompressbzlib(old, newch, bytes_max, n); +#endif +#ifdef BUILTIN_XZLIB + case METH_XZ: + case METH_LZMA: + return uncompressxzlib(old, newch, bytes_max, n); +#endif + default: + break; + } + (void)fflush(stdout); (void)fflush(stderr); diff --git a/ext/fileinfo/libmagic/der.c b/ext/fileinfo/libmagic/der.c index 067575bbd9..f6cc80051c 100644 --- a/ext/fileinfo/libmagic/der.c +++ b/ext/fileinfo/libmagic/der.c @@ -35,8 +35,11 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: der.c,v 1.16 2019/02/20 02:35:27 christos Exp $") +FILE_RCSID("@(#)$File: der.c,v 1.20 2020/06/07 19:10:37 christos Exp $") #endif +#else +#define SIZE_T_FORMAT "z" +#define CAST(a, b) ((a)(b)) #endif #include <sys/types.h> @@ -64,13 +67,13 @@ FILE_RCSID("@(#)$File: der.c,v 1.16 2019/02/20 02:35:27 christos Exp $") #define DER_CLASS_APPLICATION 1 #define DER_CLASS_CONTEXT 2 #define DER_CLASS_PRIVATE 3 -#ifdef DEBUG_DER +#if defined(DEBUG_DER) || defined(TEST_DER) static const char der_class[] = "UACP"; #endif #define DER_TYPE_PRIMITIVE 0 #define DER_TYPE_CONSTRUCTED 1 -#ifdef DEBUG_DER +#if defined(DEBUG_DER) || defined(TEST_DER) static const char der_type[] = "PC"; #endif @@ -88,7 +91,7 @@ static const char der_type[] = "PC"; #define DER_TAG_EMBEDDED_PDV 0x0b #define DER_TAG_UTF8_STRING 0x0c #define DER_TAG_RELATIVE_OID 0x0d -#define DER_TAG_RESERVED_1 0x0e +#define DER_TAG_TIME 0x0e #define DER_TAG_RESERVED_2 0x0f #define DER_TAG_SEQUENCE 0x10 #define DER_TAG_SET 0x11 @@ -105,16 +108,23 @@ static const char der_type[] = "PC"; #define DER_TAG_UNIVERSAL_STRING 0x1c #define DER_TAG_CHARACTER_STRING 0x1d #define DER_TAG_BMP_STRING 0x1e -#define DER_TAG_LONG 0x1f +#define DER_TAG_DATE 0x1f +#define DER_TAG_TIME_OF_DAY 0x20 +#define DER_TAG_DATE_TIME 0x21 +#define DER_TAG_DURATION 0x22 +#define DER_TAG_OID_IRI 0x23 +#define DER_TAG_RELATIVE_OID_IRI 0x24 +#define DER_TAG_LAST 0x25 static const char *der__tag[] = { "eoc", "bool", "int", "bit_str", "octet_str", "null", "obj_id", "obj_desc", "ext", "real", - "enum", "embed", "utf8_str", "oid", "res1", + "enum", "embed", "utf8_str", "rel_oid", "time", "res2", "seq", "set", "num_str", "prt_str", - "t61_str", "vid_str", "ia5_str", "utc_time", - "gen_time", "gr_str", "vis_str", "gen_str", - "char_str", "bmp_str", "long" + "t61_str", "vid_str", "ia5_str", "utc_time", "gen_time", + "gr_str", "vis_str", "gen_str", "univ_str", "char_str", + "bmp_str", "date", "tod", "datetime", "duration", + "oid-iri", "rel-oid-iri", }; #ifdef DEBUG_DER @@ -177,8 +187,10 @@ getlength(const uint8_t *c, size_t *p, size_t l) size_t len; int is_onebyte_result; - if (*p >= l) + if (*p >= l) { + DPRINTF(("%s:[1] %zu >= %zu\n", __func__, *p, l)); return DER_BAD; + } /* * Digits can either be 0b0 followed by the result, or 0b1 @@ -187,8 +199,10 @@ getlength(const uint8_t *c, size_t *p, size_t l) */ is_onebyte_result = (c[*p] & 0x80) == 0; digits = c[(*p)++] & 0x7f; - if (*p + digits >= l) + if (*p + digits >= l) { + DPRINTF(("%s:[2] %zu + %u >= %zu\n", __func__, *p, digits, l)); return DER_BAD; + } if (is_onebyte_result) return digits; @@ -201,15 +215,18 @@ getlength(const uint8_t *c, size_t *p, size_t l) for (i = 0; i < digits; i++) len = (len << 8) | c[(*p)++]; - if (len > UINT32_MAX - *p || *p + len >= l) + if (len > UINT32_MAX - *p || *p + len > l) { + DPRINTF(("%s:[3] bad len %zu + %zu >= %zu\n", + __func__, *p, len, l)); return DER_BAD; + } return CAST(uint32_t, len); } static const char * der_tag(char *buf, size_t len, uint32_t tag) { - if (tag < DER_TAG_LONG) + if (tag < DER_TAG_LAST) strlcpy(buf, der__tag[tag], len); else snprintf(buf, len, "%#x", tag); @@ -226,8 +243,14 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len) case DER_TAG_PRINTABLE_STRING: case DER_TAG_UTF8_STRING: case DER_TAG_IA5_STRING: - case DER_TAG_UTCTIME: return snprintf(buf, blen, "%.*s", len, RCAST(const char *, q)); + case DER_TAG_UTCTIME: + if (len < 12) + break; + return snprintf(buf, blen, + "20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2], + d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]); + break; default: break; } @@ -246,14 +269,18 @@ der_offs(struct magic_set *ms, struct magic *m, size_t nbytes) const uint8_t *b = RCAST(const uint8_t *, ms->search.s); size_t offs = 0, len = ms->search.s_len ? ms->search.s_len : nbytes; - if (gettag(b, &offs, len) == DER_BAD) + if (gettag(b, &offs, len) == DER_BAD) { + DPRINTF(("%s: bad tag 1\n", __func__)); return -1; + } DPRINTF(("%s1: %d %" SIZE_T_FORMAT "u %u\n", __func__, ms->offset, offs, m->offset)); uint32_t tlen = getlength(b, &offs, len); - if (tlen == DER_BAD) + if (tlen == DER_BAD) { + DPRINTF(("%s: bad tag 2\n", __func__)); return -1; + } DPRINTF(("%s2: %d %" SIZE_T_FORMAT "u %u\n", __func__, ms->offset, offs, tlen)); @@ -283,13 +310,22 @@ der_cmp(struct magic_set *ms, struct magic *m) uint32_t tag, tlen; char buf[128]; + DPRINTF(("%s: compare %zu bytes\n", __func__, len)); + tag = gettag(b, &offs, len); - if (tag == DER_BAD) + if (tag == DER_BAD) { + DPRINTF(("%s: bad tag 1\n", __func__)); return -1; + } + + DPRINTF(("%s1: %d %" SIZE_T_FORMAT "u %u\n", __func__, ms->offset, + offs, m->offset)); tlen = getlength(b, &offs, len); - if (tlen == DER_BAD) + if (tlen == DER_BAD) { + DPRINTF(("%s: bad tag 2\n", __func__)); return -1; + } der_tag(buf, sizeof(buf), tag); if ((ms->flags & MAGIC_DEBUG) != 0) @@ -345,6 +381,8 @@ printtag(uint32_t tag, const void *q, uint32_t len) switch (tag) { case DER_TAG_PRINTABLE_STRING: case DER_TAG_UTF8_STRING: + case DER_TAG_IA5_STRING: + case DER_TAG_UTCTIME: printf("%.*s\n", len, (const char *)q); return; default: @@ -368,8 +406,8 @@ printdata(size_t level, const void *v, size_t x, size_t l) uint8_t c = getclass(p[x]); uint8_t t = gettype(p[x]); ox = x; - if (x != 0) - printf("%.2x %.2x %.2x\n", p[x - 1], p[x], p[x + 1]); +// if (x != 0) +// printf("%.2x %.2x %.2x\n", p[x - 1], p[x], p[x + 1]); uint32_t tag = gettag(p, &x, ep - p + x); if (p + x >= ep) break; diff --git a/ext/fileinfo/libmagic/encoding.c b/ext/fileinfo/libmagic/encoding.c index 8d0e6012e5..37c182630e 100644 --- a/ext/fileinfo/libmagic/encoding.c +++ b/ext/fileinfo/libmagic/encoding.c @@ -35,12 +35,11 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.20 2019/04/15 16:48:41 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $") #endif /* lint */ #include "magic.h" #include <string.h> -#include <memory.h> #include <stdlib.h> diff --git a/ext/fileinfo/libmagic/file.h b/ext/fileinfo/libmagic/file.h index c74c159569..72ba63d708 100644 --- a/ext/fileinfo/libmagic/file.h +++ b/ext/fileinfo/libmagic/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.206 2019/05/07 02:27:11 christos Exp $ + * @(#)$File: file.h,v 1.220 2020/06/08 17:38:27 christos Exp $ */ #ifndef __file_h__ @@ -35,36 +35,46 @@ #include "config.h" -#ifdef PHP_WIN32 - #ifdef _WIN64 - #define SIZE_T_FORMAT "I64" - #else - #define SIZE_T_FORMAT "" - #endif - #define INT64_T_FORMAT "I64" - #define INTMAX_T_FORMAT "I64" -#else - #define SIZE_T_FORMAT "z" - #define INT64_T_FORMAT "ll" - #define INTMAX_T_FORMAT "j" -#endif +#include "ext/standard/php_string.h" +#include "ext/pcre/php_pcre.h" -#include <stdio.h> /* Include that here, to make sure __P gets defined */ -#include <errno.h> -#include <fcntl.h> /* For open and flags */ +#include <stdint.h> +#include <inttypes.h> #ifndef __STDC_LIMIT_MACROS -# define __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS #endif #ifndef __STDC_FORMAT_MACROS -# define __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#ifdef _WIN32 +# ifdef PRIu32 +# ifdef _WIN64 +# define SIZE_T_FORMAT PRIu64 +# else +# define SIZE_T_FORMAT PRIu32 +# endif +# define INT64_T_FORMAT PRIi64 +# define INTMAX_T_FORMAT PRIiMAX +# else +# ifdef _WIN64 +# define SIZE_T_FORMAT "I64" +# else +# define SIZE_T_FORMAT "" +# endif +# define INT64_T_FORMAT "I64" +# define INTMAX_T_FORMAT "I64" +# endif +#else +# define SIZE_T_FORMAT "z" +# define INT64_T_FORMAT "ll" +# define INTMAX_T_FORMAT "j" #endif -#include <stdint.h> -#include <inttypes.h> -#include "php.h" -#include "ext/standard/php_string.h" -#include "ext/pcre/php_pcre.h" +#include <stdio.h> /* Include that here, to make sure __P gets defined */ +#include <errno.h> +#include <fcntl.h> /* For open and flags */ #include <sys/types.h> #ifdef PHP_WIN32 @@ -124,18 +134,16 @@ #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif -#ifndef FILE_BYTES_MAX -# define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */ -#endif -#define MAXMAGIS 8192 /* max entries in any one magic file - or directory */ +#define FILE_BADSIZE CAST(size_t, ~0ul) #define MAXDESC 64 /* max len of text description/MIME type */ #define MAXMIME 80 /* max len of text MIME type */ -#define MAXstring 96 /* max len of "string" types */ +#define MAXstring 128 /* max len of "string" types */ #define MAGICNO 0xF11E041C -#define VERSIONNO 14 -#define FILE_MAGICSIZE 344 +#define VERSIONNO 16 +#define FILE_MAGICSIZE 376 + +#define FILE_GUID_SIZE sizeof("XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") #define FILE_LOAD 0 #define FILE_CHECK 1 @@ -162,6 +170,7 @@ union VALUETYPE { uint8_t hq[8]; /* 8 bytes of a fixed-endian "quad" */ char s[MAXstring]; /* the search string or regex pattern */ unsigned char us[MAXstring]; + uint64_t guid[2]; float f; double d; }; @@ -178,6 +187,7 @@ struct magic { #define BINTEST 0x20 /* test is for a binary type (set only for top-level tests) */ #define TEXTTEST 0x40 /* for passing to file_softmagic */ +#define OFFNEGATIVE 0x80 /* relative to the end of file */ uint8_t factor; @@ -235,7 +245,9 @@ struct magic { #define FILE_USE 46 #define FILE_CLEAR 47 #define FILE_DER 48 -#define FILE_NAMES_SIZE 49 /* size of array to contain all names */ +#define FILE_GUID 49 +#define FILE_OFFSET 50 +#define FILE_NAMES_SIZE 51 /* size of array to contain all names */ #define IS_LIBMAGIC_STRING(t) \ ((t) == FILE_STRING || \ @@ -399,9 +411,10 @@ struct magic_set { } c; struct out { char *buf; /* Accumulation buffer */ + size_t blen; /* Length of buffer */ char *pbuf; /* Printable buffer */ } o; - uint32_t offset; /* a copy of m->offset while we */ + uint32_t offset; /* a copy of m->offset while we */ /* are working on the magic entry */ uint32_t eoffset; /* offset from end of file */ int error; @@ -430,11 +443,14 @@ struct magic_set { uint16_t elf_notes_max; uint16_t regex_max; size_t bytes_max; /* number of bytes to read from file */ -#define FILE_INDIR_MAX 50 -#define FILE_NAME_MAX 30 -#define FILE_ELF_SHNUM_MAX 32768 -#define FILE_ELF_PHNUM_MAX 2048 +#ifndef FILE_BYTES_MAX +# define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */ +#endif #define FILE_ELF_NOTES_MAX 256 +#define FILE_ELF_PHNUM_MAX 2048 +#define FILE_ELF_SHNUM_MAX 32768 +#define FILE_INDIR_MAX 50 +#define FILE_NAME_MAX 50 #define FILE_REGEX_MAX 8192 }; @@ -443,7 +459,7 @@ typedef unsigned long unicodechar; #define FILE_T_LOCAL 1 #define FILE_T_WINDOWS 2 -protected const char *file_fmttime(uint64_t, int, char *); +protected const char *file_fmttime(char *, size_t, uint64_t, int); protected struct magic_set *file_ms_alloc(int); protected void file_ms_free(struct magic_set *); protected int file_buffer(struct magic_set *, php_stream *, zend_stat_t *, const char *, const void *, @@ -451,7 +467,11 @@ protected int file_buffer(struct magic_set *, php_stream *, zend_stat_t *, const protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *); protected int file_pipe2file(struct magic_set *, int, const void *, size_t); protected int file_separator(struct magic_set *); +protected char *file_copystr(char *, size_t, size_t, const char *); +protected int file_checkfmt(char *, size_t, const char *); protected size_t file_printedlen(const struct magic_set *); +protected int file_print_guid(char *, size_t, const uint64_t *); +protected int file_parse_guid(const char *, uint64_t *); protected int file_replace(struct magic_set *, const char *, const char *); protected int file_printf(struct magic_set *, const char *, ...); protected int file_reset(struct magic_set *, int); @@ -468,6 +488,7 @@ protected int file_ascmagic_with_encoding(struct magic_set *, protected int file_encoding(struct magic_set *, const struct buffer *, unicodechar **, size_t *, const char **, const char **, const char **); protected int file_is_json(struct magic_set *, const struct buffer *); +protected int file_is_csv(struct magic_set *, const struct buffer *, int); protected int file_is_tar(struct magic_set *, const struct buffer *); protected int file_softmagic(struct magic_set *, const struct buffer *, uint16_t *, uint16_t *, int, int); @@ -483,6 +504,7 @@ protected void file_oomem(struct magic_set *, size_t); protected void file_error(struct magic_set *, int, const char *, ...); protected void file_magerror(struct magic_set *, const char *, ...); protected void file_magwarn(struct magic_set *, const char *, ...); +protected void file_mdump(struct magic *); protected void file_showstr(FILE *, const char *, size_t); protected size_t file_mbswidth(const char *); protected const char *file_getbuffer(struct magic_set *); @@ -490,8 +512,10 @@ protected ssize_t sread(int, void *, size_t, int); protected int file_check_mem(struct magic_set *, unsigned int); protected int file_looks_utf8(const unsigned char *, size_t, unicodechar *, size_t *); -protected size_t file_pstring_length_size(const struct magic *); -protected size_t file_pstring_get_length(const struct magic *, const char *); +protected size_t file_pstring_length_size(struct magic_set *, + const struct magic *); +protected size_t file_pstring_get_length(struct magic_set *, + const struct magic *, const char *); protected char * file_printable(char *, size_t, const char *, size_t); #ifdef __EMX__ protected int file_os2_apptype(struct magic_set *, const char *, const void *, @@ -507,14 +531,17 @@ public zend_string* convert_libmagic_pattern(char *val, size_t len, uint32_t opt typedef struct { char *buf; + size_t blen; uint32_t offset; } file_pushbuf_t; protected file_pushbuf_t *file_push_buffer(struct magic_set *); protected char *file_pop_buffer(struct magic_set *, file_pushbuf_t *); +#ifndef COMPILE_ONLY extern const char *file_names[]; extern const size_t file_nnames; +#endif #ifndef strlcpy size_t strlcpy(char *, const char *, size_t); @@ -560,6 +587,9 @@ static const char *rcsid(const char *p) { \ #else #define FILE_RCSID(id) #endif +#ifndef __RCSID +#define __RCSID(a) +#endif #ifdef PHP_WIN32 #ifdef _WIN64 @@ -572,8 +602,5 @@ static const char *rcsid(const char *p) { \ #define FINFO_LSEEK_FUNC lseek #define FINFO_READ_FUNC read #endif -#ifndef __RCSID -#define __RCSID(a) -#endif #endif /* __file_h__ */ diff --git a/ext/fileinfo/libmagic/fsmagic.c b/ext/fileinfo/libmagic/fsmagic.c index 938b526a37..a1b18d479f 100644 --- a/ext/fileinfo/libmagic/fsmagic.c +++ b/ext/fileinfo/libmagic/fsmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: fsmagic.c,v 1.80 2019/04/23 18:59:27 christos Exp $") +FILE_RCSID("@(#)$File: fsmagic.c,v 1.81 2019/07/16 13:30:32 christos Exp $") #endif /* lint */ #include "magic.h" @@ -246,5 +246,11 @@ file_fsmagic(struct magic_set *ms, const char *fn, zend_stat_t *sb) if (file_printf(ms, " ") == -1) return -1; } + /* + * If we were looking for extensions or apple (silent) it is not our + * job to print here, so don't count this as a match. + */ + if (ret == 1 && silent) + return 0; return ret; } diff --git a/ext/fileinfo/libmagic/funcs.c b/ext/fileinfo/libmagic/funcs.c index a21e85ffc0..2ab015b279 100644 --- a/ext/fileinfo/libmagic/funcs.c +++ b/ext/fileinfo/libmagic/funcs.c @@ -27,16 +27,21 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.104 2019/05/07 02:27:11 christos Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $") #endif /* lint */ #include "magic.h" +#include <assert.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> #include <ctype.h> +#if defined(HAVE_WCHAR_H) #include <wchar.h> +#endif +#if defined(HAVE_WCTYPE_H) #include <wctype.h> +#endif #include <limits.h> #ifndef SIZE_MAX @@ -50,6 +55,77 @@ FILE_RCSID("@(#)$File: funcs.c,v 1.104 2019/05/07 02:27:11 christos Exp $") # define PREG_OFFSET_CAPTURE (1<<8) #endif +protected char * +file_copystr(char *buf, size_t blen, size_t width, const char *str) +{ + if (++width > blen) + width = blen; + strlcpy(buf, str, width); + return buf; +} + +private void +file_clearbuf(struct magic_set *ms) +{ + efree(ms->o.buf); + ms->o.buf = NULL; + ms->o.blen = 0; +} + +private int +file_checkfield(char *msg, size_t mlen, const char *what, const char **pp) +{ + const char *p = *pp; + int fw = 0; + + while (*p && isdigit((unsigned char)*p)) + fw = fw * 10 + (*p++ - '0'); + + *pp = p; + + if (fw < 1024) + return 1; + if (msg) + snprintf(msg, mlen, "field %s too large: %d", what, fw); + + return 0; +} + +protected int +file_checkfmt(char *msg, size_t mlen, const char *fmt) +{ + for (const char *p = fmt; *p; p++) { + if (*p != '%') + continue; + if (*++p == '%') + continue; + // Skip uninteresting. + while (strchr("0.'+- ", *p) != NULL) + p++; + if (*p == '*') { + if (msg) + snprintf(msg, mlen, "* not allowed in format"); + return -1; + } + + if (!file_checkfield(msg, mlen, "width", &p)) + return -1; + + if (*p == '.') { + p++; + if (!file_checkfield(msg, mlen, "precision", &p)) + return -1; + } + + if (!isalpha((unsigned char)*p)) { + if (msg) + snprintf(msg, mlen, "bad format char: %c", *p); + return -1; + } + } + return 0; +} + protected int file_printf(struct magic_set *ms, const char *fmt, ...) { @@ -256,6 +332,7 @@ file_buffer(struct magic_set *ms, php_stream *stream, zend_stat_t *st, #endif #if PHP_FILEINFO_UNCOMPRESS + /* try compression stuff */ if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { m = file_zmagic(ms, &b, inname); if ((ms->flags & MAGIC_DEBUG) != 0) @@ -287,6 +364,17 @@ file_buffer(struct magic_set *ms, php_stream *stream, zend_stat_t *st, } } + /* Check if we have a CSV file */ + if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) { + m = file_is_csv(ms, &b, looks_text); + if ((ms->flags & MAGIC_DEBUG) != 0) + (void)fprintf(stderr, "[try csv %d]\n", m); + if (m) { + if (checkdone(ms, &rv)) + goto done; + } + } + /* Check if we have a CDF file */ if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { m = file_trycdf(ms, &b); @@ -385,10 +473,7 @@ file_reset(struct magic_set *ms, int checkloaded) file_error(ms, 0, "no magic files loaded"); return -1; } - if (ms->o.buf) { - efree(ms->o.buf); - ms->o.buf = NULL; - } + file_clearbuf(ms); if (ms->o.pbuf) { efree(ms->o.pbuf); ms->o.pbuf = NULL; @@ -434,7 +519,7 @@ file_getbuffer(struct magic_set *ms) } ms->o.pbuf = pbuf; -#if defined(HAVE_WCWIDTH) +#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) { mbstate_t state; wchar_t nextchar; @@ -562,9 +647,11 @@ file_push_buffer(struct magic_set *ms) return NULL; pb->buf = ms->o.buf; + pb->blen = ms->o.blen; pb->offset = ms->offset; ms->o.buf = NULL; + ms->o.blen = 0; ms->offset = 0; return pb; @@ -584,6 +671,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) rbuf = ms->o.buf; ms->o.buf = pb->buf; + ms->o.blen = pb->blen; ms->offset = pb->offset; efree(pb); @@ -615,3 +703,33 @@ file_printable(char *buf, size_t bufsiz, const char *str, size_t slen) *ptr = '\0'; return buf; } + +struct guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +}; + +protected int +file_parse_guid(const char *s, uint64_t *guid) +{ + struct guid *g = CAST(struct guid *, guid); + return sscanf(s, + "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", + &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1], + &g->data4[2], &g->data4[3], &g->data4[4], &g->data4[5], + &g->data4[6], &g->data4[7]) == 11 ? 0 : -1; +} + +protected int +file_print_guid(char *str, size_t len, const uint64_t *guid) +{ + const struct guid *g = CAST(const struct guid *, guid); + + return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-" + "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX", + g->data1, g->data2, g->data3, g->data4[0], g->data4[1], + g->data4[2], g->data4[3], g->data4[4], g->data4[5], + g->data4[6], g->data4[7]); +} diff --git a/ext/fileinfo/libmagic/is_csv.c b/ext/fileinfo/libmagic/is_csv.c new file mode 100644 index 0000000000..0081088c80 --- /dev/null +++ b/ext/fileinfo/libmagic/is_csv.c @@ -0,0 +1,197 @@ +/*- + * Copyright (c) 2019 Christos Zoulas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Parse CSV object serialization format (RFC-4180, RFC-7111) + */ + +#ifndef TEST +#include "file.h" + +#ifndef lint +FILE_RCSID("@(#)$File: is_csv.c,v 1.4 2019/06/26 20:31:31 christos Exp $") +#endif + +#include <string.h> +#include "magic.h" +#else +#include <sys/types.h> +#endif + + +#ifdef DEBUG +#include <stdio.h> +#define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__) +#else +#define DPRINTF(fmt, ...) +#endif + +/* + * if CSV_LINES == 0: + * check all the lines in the buffer + * otherwise: + * check only up-to the number of lines specified + * + * the last line count is always ignored if it does not end in CRLF + */ +#ifndef CSV_LINES +#define CSV_LINES 10 +#endif + +static int csv_parse(const unsigned char *, const unsigned char *); + +static const unsigned char * +eatquote(const unsigned char *uc, const unsigned char *ue) +{ + int quote = 0; + + while (uc < ue) { + unsigned char c = *uc++; + if (c != '"') { + // We already got one, done. + if (quote) { + return --uc; + } + continue; + } + if (quote) { + // quote-quote escapes + quote = 0; + continue; + } + // first quote + quote = 1; + } + return ue; +} + +static int +csv_parse(const unsigned char *uc, const unsigned char *ue) +{ + size_t nf = 0, tf = 0, nl = 0; + + while (uc < ue) { + unsigned char c; + switch (c = *uc++) { + case '"': + // Eat until the matching quote + uc = eatquote(uc, ue); + break; + case ',': + nf++; + break; + case '\n': + DPRINTF("%zu %zu %zu\n", nl, nf, tf); + nl++; +#if CSV_LINES + if (nl == CSV_LINES) + return tf != 0 && tf == nf; +#endif + if (tf == 0) { + // First time and no fields, give up + if (nf == 0) + return 0; + // First time, set the number of fields + tf = nf; + } else if (tf != nf) { + // Field number mismatch, we are done. + return 0; + } + nf = 0; + break; + default: + break; + } + } + return tf && nl > 2; +} + +#ifndef TEST +int +file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text) +{ + const unsigned char *uc = CAST(const unsigned char *, b->fbuf); + const unsigned char *ue = uc + b->flen; + int mime = ms->flags & MAGIC_MIME; + + if (!looks_text) + return 0; + + if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) + return 0; + + if (!csv_parse(uc, ue)) + return 0; + + if (mime == MAGIC_MIME_ENCODING) + return 1; + + if (mime) { + if (file_printf(ms, "application/csv") == -1) + return -1; + return 1; + } + + if (file_printf(ms, "CSV text") == -1) + return -1; + + return 1; +} + +#else + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <err.h> + +int +main(int argc, char *argv[]) +{ + int fd, rv; + struct stat st; + unsigned char *p; + + if ((fd = open(argv[1], O_RDONLY)) == -1) + err(EXIT_FAILURE, "Can't open `%s'", argv[1]); + + if (fstat(fd, &st) == -1) + err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); + + if ((p = malloc(st.st_size)) == NULL) + err(EXIT_FAILURE, "Can't allocate %jd bytes", + (intmax_t)st.st_size); + if (read(fd, p, st.st_size) != st.st_size) + err(EXIT_FAILURE, "Can't read %jd bytes", + (intmax_t)st.st_size); + printf("is csv %d\n", csv_parse(p, p + st.st_size)); + return 0; +} +#endif diff --git a/ext/fileinfo/libmagic/is_json.c b/ext/fileinfo/libmagic/is_json.c index 206ec3795e..0b12438ff2 100644 --- a/ext/fileinfo/libmagic/is_json.c +++ b/ext/fileinfo/libmagic/is_json.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $") +FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $") #endif #include <string.h> @@ -156,6 +156,7 @@ json_parse_string(const unsigned char **ucp, const unsigned char *ue) } case '"': *ucp = uc; + DPRINTF("Good string: ", uc, *ucp); return 1; default: continue; @@ -172,23 +173,24 @@ json_parse_array(const unsigned char **ucp, const unsigned char *ue, size_t *st, size_t lvl) { const unsigned char *uc = *ucp; - int more = 0; /* Array has more than 1 element */ DPRINTF("Parse array: ", uc, *ucp); while (uc < ue) { + if (*uc == ']') + goto done; if (!json_parse(&uc, ue, st, lvl + 1)) goto out; if (uc == ue) goto out; switch (*uc) { case ',': - more++; uc++; continue; case ']': - if (more) - st[JSON_ARRAYN]++; + done: + st[JSON_ARRAYN]++; *ucp = uc + 1; + DPRINTF("Good array: ", uc, *ucp); return 1; default: goto out; @@ -210,6 +212,10 @@ json_parse_object(const unsigned char **ucp, const unsigned char *ue, uc = json_skip_space(uc, ue); if (uc == ue) goto out; + if (*uc == '}') { + uc++; + goto done; + } if (*uc++ != '"') { DPRINTF("not string", uc, *ucp); goto out; @@ -236,6 +242,7 @@ json_parse_object(const unsigned char **ucp, const unsigned char *ue, case ',': continue; case '}': /* { */ + done: *ucp = uc; DPRINTF("Good object: ", uc, *ucp); return 1; diff --git a/ext/fileinfo/libmagic/magic.c b/ext/fileinfo/libmagic/magic.c index 56c8acfcac..8cd40119b5 100644 --- a/ext/fileinfo/libmagic/magic.c +++ b/ext/fileinfo/libmagic/magic.c @@ -28,7 +28,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: magic.c,v 1.111 2019/05/07 02:27:11 christos Exp $") +FILE_RCSID("@(#)$File: magic.c,v 1.112 2020/06/08 19:44:10 christos Exp $") #endif /* lint */ #include "magic.h" @@ -189,6 +189,7 @@ close_and_restore(const struct magic_set *ms, const char *name, int fd, } #endif +#ifndef COMPILE_ONLY /* * find type of descriptor @@ -312,6 +313,7 @@ magic_buffer(struct magic_set *ms, const void *buf, size_t nb) } return file_getbuffer(ms); } +#endif public const char * magic_error(struct magic_set *ms) diff --git a/ext/fileinfo/libmagic/magic.h b/ext/fileinfo/libmagic/magic.h index ee0aa13678..266e99a8cb 100644 --- a/ext/fileinfo/libmagic/magic.h +++ b/ext/fileinfo/libmagic/magic.h @@ -56,6 +56,7 @@ #define MAGIC_NO_CHECK_ELF 0x0010000 /* Don't check for elf details */ #define MAGIC_NO_CHECK_TEXT 0x0020000 /* Don't check for text files */ #define MAGIC_NO_CHECK_CDF 0x0040000 /* Don't check for cdf files */ +#define MAGIC_NO_CHECK_CSV 0x0080000 /* Don't check for CSV files */ #define MAGIC_NO_CHECK_TOKENS 0x0100000 /* Don't check tokens */ #define MAGIC_NO_CHECK_ENCODING 0x0200000 /* Don't check text encodings */ #define MAGIC_NO_CHECK_JSON 0x0400000 /* Don't check for JSON files */ @@ -68,6 +69,7 @@ MAGIC_NO_CHECK_APPTYPE | \ MAGIC_NO_CHECK_ELF | \ MAGIC_NO_CHECK_TEXT | \ + MAGIC_NO_CHECK_CSV | \ MAGIC_NO_CHECK_CDF | \ MAGIC_NO_CHECK_TOKENS | \ MAGIC_NO_CHECK_ENCODING | \ @@ -111,7 +113,7 @@ b\31transp_compression\0\ #define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */ #define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */ -#define MAGIC_VERSION 537 /* This implementation */ +#define MAGIC_VERSION 539 /* This implementation */ #ifdef __cplusplus diff --git a/ext/fileinfo/libmagic/print.c b/ext/fileinfo/libmagic/print.c index 94ff571bd0..edd4a6320a 100644 --- a/ext/fileinfo/libmagic/print.c +++ b/ext/fileinfo/libmagic/print.c @@ -33,7 +33,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: print.c,v 1.85 2019/03/12 20:43:05 christos Exp $") +FILE_RCSID("@(#)$File: print.c,v 1.88 2020/05/09 18:57:15 christos Exp $") #endif /* lint */ #include <string.h> @@ -53,7 +53,7 @@ protected void file_mdump(struct magic *m) { static const char optyp[] = { FILE_OPS }; - char tbuf[26]; + char tbuf[256]; (void) fprintf(stderr, "%u: %.*s %u", m->lineno, (m->cont_level & 7) + 1, ">>>>>>>>", m->offset); @@ -142,6 +142,7 @@ file_mdump(struct magic *m) case FILE_BEQUAD: case FILE_LEQUAD: case FILE_QUAD: + case FILE_OFFSET: (void) fprintf(stderr, "%" INT64_T_FORMAT "d", CAST(long long, m->value.q)); break; @@ -159,32 +160,35 @@ file_mdump(struct magic *m) case FILE_BEDATE: case FILE_MEDATE: (void)fprintf(stderr, "%s,", - file_fmttime(m->value.l, 0, tbuf)); + file_fmttime(tbuf, sizeof(tbuf), m->value.l, 0)); break; case FILE_LDATE: case FILE_LELDATE: case FILE_BELDATE: case FILE_MELDATE: (void)fprintf(stderr, "%s,", - file_fmttime(m->value.l, FILE_T_LOCAL, tbuf)); + file_fmttime(tbuf, sizeof(tbuf), m->value.l, + FILE_T_LOCAL)); break; case FILE_QDATE: case FILE_LEQDATE: case FILE_BEQDATE: (void)fprintf(stderr, "%s,", - file_fmttime(m->value.q, 0, tbuf)); + file_fmttime(tbuf, sizeof(tbuf), m->value.q, 0)); break; case FILE_QLDATE: case FILE_LEQLDATE: case FILE_BEQLDATE: (void)fprintf(stderr, "%s,", - file_fmttime(m->value.q, FILE_T_LOCAL, tbuf)); + file_fmttime(tbuf, sizeof(tbuf), m->value.q, + FILE_T_LOCAL)); break; case FILE_QWDATE: case FILE_LEQWDATE: case FILE_BEQWDATE: (void)fprintf(stderr, "%s,", - file_fmttime(m->value.q, FILE_T_WINDOWS, tbuf)); + file_fmttime(tbuf, sizeof(tbuf), m->value.q, + FILE_T_WINDOWS)); break; case FILE_FLOAT: case FILE_BEFLOAT: @@ -204,6 +208,12 @@ file_mdump(struct magic *m) case FILE_DER: (void) fprintf(stderr, "'%s'", m->value.s); break; + case FILE_GUID: + (void) file_print_guid(tbuf, sizeof(tbuf), + m->value.guid); + (void) fprintf(stderr, "%s", tbuf); + break; + default: (void) fprintf(stderr, "*bad type %d*", m->type); break; @@ -233,7 +243,7 @@ file_magwarn(struct magic_set *ms, const char *f, ...) } protected const char * -file_fmttime(uint64_t v, int flags, char *buf) +file_fmttime(char *buf, size_t bsize, uint64_t v, int flags) { char *pp; time_t t; @@ -263,5 +273,6 @@ file_fmttime(uint64_t v, int flags, char *buf) pp[strcspn(pp, "\n")] = '\0'; return pp; out: - return strcpy(buf, "*Invalid time*"); + strlcpy(buf, "*Invalid time*", bsize); + return buf; } diff --git a/ext/fileinfo/libmagic/readcdf.c b/ext/fileinfo/libmagic/readcdf.c index be75e398cf..23bf62fc97 100644 --- a/ext/fileinfo/libmagic/readcdf.c +++ b/ext/fileinfo/libmagic/readcdf.c @@ -26,7 +26,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: readcdf.c,v 1.73 2019/03/12 20:43:05 christos Exp $") +FILE_RCSID("@(#)$File: readcdf.c,v 1.74 2019/09/11 15:46:30 christos Exp $") #endif #include <assert.h> diff --git a/ext/fileinfo/libmagic/softmagic.c b/ext/fileinfo/libmagic/softmagic.c index fa272f625d..d791d65ff4 100644 --- a/ext/fileinfo/libmagic/softmagic.c +++ b/ext/fileinfo/libmagic/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.286 2019/05/17 02:24:59 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $") #endif /* lint */ #include "magic.h" @@ -337,6 +337,13 @@ flush: if (msetoffset(ms, m, &bb, b, offset, cont_level) == -1) goto flush; if (m->flag & OFFADD) { + if (cont_level == 0) { + if ((ms->flags & MAGIC_DEBUG) != 0) + fprintf(stderr, + "direct *zero*" + " cont_level\n"); + return 0; + } ms->offset += ms->c.li[cont_level - 1].off; } @@ -631,6 +638,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_QUAD: case FILE_BEQUAD: case FILE_LEQUAD: + case FILE_OFFSET: v = file_signextend(ms, m, p->q); switch (check_fmt(ms, desc)) { case -1: @@ -688,8 +696,12 @@ mprint(struct magic_set *ms, struct magic *m) sizeof(p->s) - (str - p->s))) == -1) return -1; - if (m->type == FILE_PSTRING) - t += file_pstring_length_size(m); + if (m->type == FILE_PSTRING) { + size_t l = file_pstring_length_size(ms, m); + if (l == FILE_BADSIZE) + return -1; + t += l; + } } break; @@ -698,7 +710,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_LEDATE: case FILE_MEDATE: if (file_printf(ms, F(ms, desc, "%s"), - file_fmttime(p->l, 0, tbuf)) == -1) + file_fmttime(tbuf, sizeof(tbuf), p->l, 0)) == -1) return -1; t = ms->offset + sizeof(uint32_t); break; @@ -708,7 +720,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_LELDATE: case FILE_MELDATE: if (file_printf(ms, F(ms, desc, "%s"), - file_fmttime(p->l, FILE_T_LOCAL, tbuf)) == -1) + file_fmttime(tbuf, sizeof(tbuf), p->l, FILE_T_LOCAL)) == -1) return -1; t = ms->offset + sizeof(uint32_t); break; @@ -717,7 +729,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQDATE: case FILE_LEQDATE: if (file_printf(ms, F(ms, desc, "%s"), - file_fmttime(p->q, 0, tbuf)) == -1) + file_fmttime(tbuf, sizeof(tbuf), p->q, 0)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -726,7 +738,7 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQLDATE: case FILE_LEQLDATE: if (file_printf(ms, F(ms, desc, "%s"), - file_fmttime(p->q, FILE_T_LOCAL, tbuf)) == -1) + file_fmttime(tbuf, sizeof(tbuf), p->q, FILE_T_LOCAL)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -735,7 +747,8 @@ mprint(struct magic_set *ms, struct magic *m) case FILE_BEQWDATE: case FILE_LEQWDATE: if (file_printf(ms, F(ms, desc, "%s"), - file_fmttime(p->q, FILE_T_WINDOWS, tbuf)) == -1) + file_fmttime(tbuf, sizeof(tbuf), p->q, FILE_T_WINDOWS)) + == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -788,7 +801,7 @@ mprint(struct magic_set *ms, struct magic *m) cp = estrndup(RCAST(const char *, ms->search.s), ms->search.rm_len); rval = file_printf(ms, F(ms, desc, "%s"), - file_printable(sbuf, sizeof(sbuf), cp, ms->search.rm_len)); + file_printable(sbuf, sizeof(sbuf), cp, ms->search.rm_len)); efree(cp); if (rval == -1) @@ -820,6 +833,12 @@ mprint(struct magic_set *ms, struct magic *m) return -1; t = ms->offset; break; + case FILE_GUID: + (void) file_print_guid(buf, sizeof(buf), ms->ms_value.guid); + if (file_printf(ms, F(ms, desc, "%s"), buf) == -1) + return -1; + t = ms->offset; + break; default: file_magerror(ms, "invalid m->type (%d) in mprint()", m->type); return -1; @@ -870,9 +889,12 @@ moffset(struct magic_set *ms, struct magic *m, const struct buffer *b, if (*m->value.s == '\0') p->s[strcspn(p->s, "\r\n")] = '\0'; o = CAST(uint32_t, (ms->offset + strlen(p->s))); - if (m->type == FILE_PSTRING) - o += CAST(uint32_t, - file_pstring_length_size(m)); + if (m->type == FILE_PSTRING) { + size_t l = file_pstring_length_size(ms, m); + if (l == FILE_BADSIZE) + return -1; + o += CAST(uint32_t, l); + } } break; @@ -932,23 +954,26 @@ moffset(struct magic_set *ms, struct magic *m, const struct buffer *b, case FILE_CLEAR: case FILE_DEFAULT: case FILE_INDIRECT: + case FILE_OFFSET: o = ms->offset; break; case FILE_DER: - { - o = der_offs(ms, m, nbytes); - if (o == -1 || CAST(size_t, o) > nbytes) { - if ((ms->flags & MAGIC_DEBUG) != 0) { - (void)fprintf(stderr, - "Bad DER offset %d nbytes=%" - SIZE_T_FORMAT "u", o, nbytes); - } - *op = 0; - return 0; + o = der_offs(ms, m, nbytes); + if (o == -1 || CAST(size_t, o) > nbytes) { + if ((ms->flags & MAGIC_DEBUG) != 0) { + (void)fprintf(stderr, + "Bad DER offset %d nbytes=%" + SIZE_T_FORMAT "u", o, nbytes); } - break; + *op = 0; + return 0; } + break; + + case FILE_GUID: + o = CAST(int32_t, (ms->offset + 2 * sizeof(uint64_t))); + break; default: o = 0; @@ -1153,6 +1178,7 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) case FILE_QDATE: case FILE_QLDATE: case FILE_QWDATE: + case FILE_OFFSET: if (cvt_64(p, m) == -1) goto out; return 1; @@ -1164,9 +1190,15 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) return 1; } case FILE_PSTRING: { - size_t sz = file_pstring_length_size(m); - char *ptr1 = p->s, *ptr2 = ptr1 + sz; - size_t len = file_pstring_get_length(m, ptr1); + char *ptr1, *ptr2; + size_t len, sz = file_pstring_length_size(ms, m); + if (sz == FILE_BADSIZE) + return 0; + ptr1 = p->s; + ptr2 = ptr1 + sz; + len = file_pstring_get_length(ms, m, ptr1); + if (len == FILE_BADSIZE) + return 0; sz = sizeof(p->s) - sz; /* maximum length of string */ if (len >= sz) { /* @@ -1266,6 +1298,7 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) case FILE_NAME: case FILE_USE: case FILE_DER: + case FILE_GUID: return 1; default: file_magerror(ms, "invalid type %d in mconvert()", m->type); @@ -1392,6 +1425,12 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, } } + if (type == FILE_OFFSET) { + (void)memset(p, '\0', sizeof(*p)); + p->q = offset; + return 0; + } + if (offset >= nbytes) { (void)memset(p, '\0', sizeof(*p)); return 0; @@ -1456,7 +1495,9 @@ private int msetoffset(struct magic_set *ms, struct magic *m, struct buffer *bb, const struct buffer *b, size_t o, unsigned int cont_level) { - if (m->offset < 0) { + int32_t offset; + if (m->flag & OFFNEGATIVE) { + offset = -m->offset; if (cont_level > 0) { if (m->flag & (OFFADD|INDIROFFADD)) goto normal; @@ -1474,26 +1515,28 @@ msetoffset(struct magic_set *ms, struct magic *m, struct buffer *bb, "u at level %u", o, cont_level); return -1; } - if (CAST(size_t, -m->offset) > b->elen) + if (CAST(size_t, m->offset) > b->elen) return -1; buffer_init(bb, -1, NULL, b->ebuf, b->elen); - ms->eoffset = ms->offset = CAST(int32_t, b->elen + m->offset); + ms->eoffset = ms->offset = CAST(int32_t, b->elen - m->offset); } else { + offset = m->offset; if (cont_level == 0) { normal: // XXX: Pass real fd, then who frees bb? buffer_init(bb, -1, NULL, b->fbuf, b->flen); - ms->offset = m->offset; + ms->offset = offset; ms->eoffset = 0; } else { - ms->offset = ms->eoffset + m->offset; + ms->offset = ms->eoffset + offset; } } if ((ms->flags & MAGIC_DEBUG) != 0) { - fprintf(stderr, "bb=[%p,%" SIZE_T_FORMAT "u], %d [b=%p,%" - SIZE_T_FORMAT "u], [o=%#x, c=%d]\n", - bb->fbuf, bb->flen, ms->offset, b->fbuf, b->flen, - m->offset, cont_level); + fprintf(stderr, "bb=[%p,%" SIZE_T_FORMAT "u,%" + SIZE_T_FORMAT "u], %d [b=%p,%" + SIZE_T_FORMAT "u,%" SIZE_T_FORMAT "u], [o=%#x, c=%d]\n", + bb->fbuf, bb->flen, bb->elen, ms->offset, b->fbuf, + b->flen, b->elen, offset, cont_level); } return 0; } @@ -1540,6 +1583,9 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, *indir_count, *name_count); mdebug(offset, RCAST(char *, RCAST(void *, p)), sizeof(union VALUETYPE)); +#ifndef COMPILE_ONLY + file_mdump(m); +#endif } if (m->flag & INDIR) { @@ -1548,7 +1594,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, if (m->in_op & FILE_OPINDIRECT) { const union VALUETYPE *q = CAST(const union VALUETYPE *, RCAST(const void *, s + offset + off)); - switch (cvt_flip(m->in_type, flip)) { + int op; + switch (op = cvt_flip(m->in_type, flip)) { case FILE_BYTE: if (OFFSET_OOB(nbytes, offset + off, 1)) return 0; @@ -1602,7 +1649,9 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, off = SEXT(sgn,64,LE64(q)); break; default: - abort(); + if ((ms->flags & MAGIC_DEBUG) != 0) + fprintf(stderr, "bad op=%d\n", op); + return 0; } if ((ms->flags & MAGIC_DEBUG) != 0) fprintf(stderr, "indirect offs=%jd\n", off); @@ -1667,11 +1716,19 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, offset = do_ops(m, SEXT(sgn,64,BE64(p)), off); break; default: - break; + if ((ms->flags & MAGIC_DEBUG) != 0) + fprintf(stderr, "bad in_type=%d\n", in_type); + return 0; } if (m->flag & INDIROFFADD) { - offset += ms->c.li[cont_level-1].off; + if (cont_level == 0) { + if ((ms->flags & MAGIC_DEBUG) != 0) + fprintf(stderr, + "indirect *zero* cont_level\n"); + return 0; + } + offset += ms->c.li[cont_level - 1].off; if (offset == 0) { if ((ms->flags & MAGIC_DEBUG) != 0) fprintf(stderr, @@ -1688,6 +1745,9 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, if ((ms->flags & MAGIC_DEBUG) != 0) { mdebug(offset, RCAST(char *, RCAST(void *, p)), sizeof(union VALUETYPE)); +#ifndef COMPILE_ONLY + file_mdump(m); +#endif } } @@ -1731,6 +1791,11 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, return 0; break; + case FILE_GUID: + if (OFFSET_OOB(nbytes, offset, 16)) + return 0; + break; + case FILE_STRING: case FILE_PSTRING: case FILE_SEARCH: @@ -1825,7 +1890,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, } private uint64_t -file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) +file_strncmp(const char *s1, const char *s2, size_t len, size_t maxlen, + uint32_t flags) { /* * Convert the source args to unsigned here so that (1) the @@ -1835,7 +1901,9 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) */ const unsigned char *a = RCAST(const unsigned char *, s1); const unsigned char *b = RCAST(const unsigned char *, s2); - const unsigned char *eb = b + len; + uint32_t ws = flags & (STRING_COMPACT_WHITESPACE | + STRING_COMPACT_OPTIONAL_WHITESPACE); + const unsigned char *eb = b + (ws ? maxlen : len); uint64_t v; /* @@ -1893,7 +1961,8 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) } private uint64_t -file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags) +file_strncmp16(const char *a, const char *b, size_t len, size_t maxlen, + uint32_t flags) { /* * XXX - The 16-bit string compare probably needs to be done @@ -1901,7 +1970,7 @@ file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags) * At the moment, I am unsure. */ flags = 0; - return file_strncmp(a, b, len, flags); + return file_strncmp(a, b, len, maxlen, flags); } public zend_string* convert_libmagic_pattern(char *val, size_t len, uint32_t options) @@ -2006,6 +2075,7 @@ magiccheck(struct magic_set *ms, struct magic *m) case FILE_QWDATE: case FILE_BEQWDATE: case FILE_LEQWDATE: + case FILE_OFFSET: v = p->q; break; @@ -2084,14 +2154,14 @@ magiccheck(struct magic_set *ms, struct magic *m) case FILE_PSTRING: l = 0; v = file_strncmp(m->value.s, p->s, CAST(size_t, m->vallen), - m->str_flags); + sizeof(p->s), m->str_flags); break; case FILE_BESTRING16: case FILE_LESTRING16: l = 0; v = file_strncmp16(m->value.s, p->s, CAST(size_t, m->vallen), - m->str_flags); + sizeof(p->s), m->str_flags); break; case FILE_SEARCH: { /* search ms->search.s for the string m->value.s */ @@ -2126,7 +2196,7 @@ magiccheck(struct magic_set *ms, struct magic *m) return 0; v = file_strncmp(m->value.s, ms->search.s + idx, slen, - m->str_flags); + ms->search.s_len - idx, m->str_flags); if (v == 0) { /* found match */ ms->search.offset += idx; ms->search.rm_len = ms->search.s_len - idx; @@ -2223,6 +2293,10 @@ error_out: return 0; } return matched; + case FILE_GUID: + l = 0; + v = memcmp(m->value.guid, p->guid, sizeof(p->guid)); + break; default: file_magerror(ms, "invalid type %d in magiccheck()", m->type); return -1; |