summaryrefslogtreecommitdiff
path: root/src/apprentice.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/apprentice.c')
-rw-r--r--src/apprentice.c817
1 files changed, 561 insertions, 256 deletions
diff --git a/src/apprentice.c b/src/apprentice.c
index cd45bdc..47b4c87 100644
--- a/src/apprentice.c
+++ b/src/apprentice.c
@@ -32,7 +32,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.190 2013/02/17 22:29:40 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.229 2015/01/01 17:07:34 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -40,6 +40,9 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.190 2013/02/17 22:29:40 christos Exp $")
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
#include <string.h>
#include <assert.h>
#include <ctype.h>
@@ -48,6 +51,15 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.190 2013/02/17 22:29:40 christos Exp $")
#include <sys/mman.h>
#endif
#include <dirent.h>
+#if defined(HAVE_LIMITS_H)
+#include <limits.h>
+#endif
+
+#ifndef SSIZE_MAX
+#define MAXMAGIC_SIZE ((ssize_t)0x7fffffff)
+#else
+#define MAXMAGIC_SIZE SSIZE_MAX
+#endif
#define EATAB {while (isascii((unsigned char) *l) && \
isspace((unsigned char) *l)) ++l;}
@@ -74,15 +86,26 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.190 2013/02/17 22:29:40 christos Exp $")
#define ALLOC_CHUNK (size_t)10
#define ALLOC_INCR (size_t)200
+#define MAP_TYPE_MMAP 0
+#define MAP_TYPE_MALLOC 1
+#define MAP_TYPE_USER 2
+
struct magic_entry {
struct magic *mp;
uint32_t cont_count;
uint32_t max_count;
};
+struct magic_entry_set {
+ struct magic_entry *me;
+ uint32_t count;
+ uint32_t max;
+};
+
struct magic_map {
void *p;
size_t len;
+ int type;
struct magic *magic[MAGIC_SETS];
uint32_t nmagic[MAGIC_SETS];
};
@@ -113,7 +136,10 @@ private uint16_t swap2(uint16_t);
private uint32_t swap4(uint32_t);
private uint64_t swap8(uint64_t);
private char *mkdbname(struct magic_set *, const char *, int);
+private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
+ size_t);
private struct magic_map *apprentice_map(struct magic_set *, const char *);
+private int check_buffer(struct magic_set *, struct magic_map *, const char *);
private void apprentice_unmap(struct magic_map *);
private int apprentice_compile(struct magic_set *, struct magic_map *,
const char *);
@@ -125,7 +151,6 @@ private int parse_strength(struct magic_set *, struct magic_entry *, const char
private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
-private size_t maxmagic[MAGIC_SETS] = { 0 };
private size_t magicsize = sizeof(struct magic);
private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
@@ -196,7 +221,7 @@ static const struct type_tbl_s type_tbl[] = {
{ XX("invalid"), FILE_INVALID, FILE_FMT_NONE },
{ XX("byte"), FILE_BYTE, FILE_FMT_NUM },
{ XX("short"), FILE_SHORT, FILE_FMT_NUM },
- { XX("default"), FILE_DEFAULT, FILE_FMT_STR },
+ { XX("default"), FILE_DEFAULT, FILE_FMT_NONE },
{ XX("long"), FILE_LONG, FILE_FMT_NUM },
{ XX("string"), FILE_STRING, FILE_FMT_STR },
{ XX("date"), FILE_DATE, FILE_FMT_STR },
@@ -240,6 +265,7 @@ static const struct type_tbl_s type_tbl[] = {
{ XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
{ XX("name"), FILE_NAME, FILE_FMT_NONE },
{ XX("use"), FILE_USE, FILE_FMT_NONE },
+ { XX("clear"), FILE_CLEAR, FILE_FMT_NONE },
{ XX_NULL, FILE_INVALID, FILE_FMT_NONE },
};
@@ -378,10 +404,11 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
{
struct mlist *ml;
+ mlp->map = idx == 0 ? map : NULL;
if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
return -1;
- ml->map = idx == 0 ? map : NULL;
+ ml->map = NULL;
ml->magic = map->magic[idx];
ml->nmagic = map->nmagic[idx];
@@ -398,9 +425,11 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
private int
apprentice_1(struct magic_set *ms, const char *fn, int action)
{
- struct mlist *ml;
struct magic_map *map;
+#ifndef COMPILE_ONLY
+ struct mlist *ml;
size_t i;
+#endif
if (magicsize != FILE_MAGICSIZE) {
file_error(ms, 0, "magic element size %lu != %lu",
@@ -429,20 +458,27 @@ apprentice_1(struct magic_set *ms, const char *fn, int action)
for (i = 0; i < MAGIC_SETS; i++) {
if (add_mlist(ms->mlist[i], map, i) == -1) {
file_oomem(ms, sizeof(*ml));
- apprentice_unmap(map);
- return -1;
+ goto fail;
}
}
if (action == FILE_LIST) {
for (i = 0; i < MAGIC_SETS; i++) {
- printf("Set %zu:\nBinary patterns:\n", i);
+ printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
+ i);
apprentice_list(ms->mlist[i], BINTEST);
printf("Text patterns:\n");
apprentice_list(ms->mlist[i], TEXTTEST);
}
}
-
+ return 0;
+fail:
+ for (i = 0; i < MAGIC_SETS; i++) {
+ mlist_free(ms->mlist[i]);
+ ms->mlist[i] = NULL;
+ }
+ return -1;
+#else
return 0;
#endif /* COMPILE_ONLY */
}
@@ -488,6 +524,11 @@ file_ms_alloc(int flags)
ms->mlist[i] = NULL;
ms->file = "unknown";
ms->line = 0;
+ ms->indir_max = FILE_INDIR_MAX;
+ ms->name_max = FILE_NAME_MAX;
+ ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
+ ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
+ ms->elf_notes_max = FILE_ELF_NOTES_MAX;
return ms;
free:
free(ms);
@@ -499,14 +540,22 @@ apprentice_unmap(struct magic_map *map)
{
if (map == NULL)
return;
- if (map->p == NULL)
- return;
+
+ switch (map->type) {
#ifdef QUICK
- if (map->len)
- (void)munmap(map->p, map->len);
- else
+ case MAP_TYPE_MMAP:
+ if (map->p)
+ (void)munmap(map->p, map->len);
+ break;
#endif
+ case MAP_TYPE_MALLOC:
free(map->p);
+ break;
+ case MAP_TYPE_USER:
+ break;
+ default:
+ abort();
+ }
free(map);
}
@@ -524,21 +573,70 @@ mlist_alloc(void)
private void
mlist_free(struct mlist *mlist)
{
- struct mlist *ml;
+ struct mlist *ml, *next;
if (mlist == NULL)
return;
- for (ml = mlist->next; ml != mlist;) {
- struct mlist *next = ml->next;
+ ml = mlist->next;
+ for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
if (ml->map)
apprentice_unmap(ml->map);
free(ml);
- ml = next;
+ if (ml == mlist)
+ break;
}
- free(ml);
}
+#ifndef COMPILE_ONLY
+/* void **bufs: an array of compiled magic files */
+protected int
+buffer_apprentice(struct magic_set *ms, struct magic **bufs,
+ size_t *sizes, size_t nbufs)
+{
+ size_t i, j;
+ struct mlist *ml;
+ struct magic_map *map;
+
+ if (nbufs == 0)
+ return -1;
+
+ if (ms->mlist[0] != NULL)
+ file_reset(ms);
+
+ init_file_tables();
+
+ for (i = 0; i < MAGIC_SETS; i++) {
+ mlist_free(ms->mlist[i]);
+ if ((ms->mlist[i] = mlist_alloc()) == NULL) {
+ file_oomem(ms, sizeof(*ms->mlist[i]));
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < nbufs; i++) {
+ map = apprentice_buf(ms, bufs[i], sizes[i]);
+ if (map == NULL)
+ goto fail;
+
+ for (j = 0; j < MAGIC_SETS; j++) {
+ if (add_mlist(ms->mlist[j], map, j) == -1) {
+ file_oomem(ms, sizeof(*ml));
+ goto fail;
+ }
+ }
+ }
+
+ return 0;
+fail:
+ for (i = 0; i < MAGIC_SETS; i++) {
+ mlist_free(ms->mlist[i]);
+ ms->mlist[i] = NULL;
+ }
+ return -1;
+}
+#endif
+
/* const char *fn: list of magic files and directories */
protected int
file_apprentice(struct magic_set *ms, const char *fn, int action)
@@ -547,6 +645,9 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
int file_err, errs = -1;
size_t i;
+ if (ms->mlist[0] != NULL)
+ file_reset(ms);
+
if ((fn = magic_getpath(fn, action)) == NULL)
return -1;
@@ -561,11 +662,9 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
mlist_free(ms->mlist[i]);
if ((ms->mlist[i] = mlist_alloc()) == NULL) {
file_oomem(ms, sizeof(*ms->mlist[i]));
- if (i != 0) {
- --i;
- do
- mlist_free(ms->mlist[i]);
- while (i != 0);
+ while (i-- > 0) {
+ mlist_free(ms->mlist[i]);
+ ms->mlist[i] = NULL;
}
free(mfn);
return -1;
@@ -595,6 +694,10 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
return -1;
}
+#if 0
+ /*
+ * Always leave the database loaded
+ */
if (action == FILE_LOAD)
return 0;
@@ -602,8 +705,10 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
mlist_free(ms->mlist[i]);
ms->mlist[i] = NULL;
}
+#endif
switch (action) {
+ case FILE_LOAD:
case FILE_COMPILE:
case FILE_CHECK:
case FILE_LIST:
@@ -615,13 +720,62 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
}
/*
+ * Compute the real length of a magic expression, for the purposes
+ * of determining how "strong" a magic expression is (approximating
+ * how specific its matches are):
+ * - magic characters count 0 unless escaped.
+ * - [] expressions count 1
+ * - {} expressions count 0
+ * - regular characters or escaped magic characters count 1
+ * - 0 length expressions count as one
+ */
+private size_t
+nonmagic(const char *str)
+{
+ const char *p;
+ size_t rv = 0;
+
+ for (p = str; *p; p++)
+ switch (*p) {
+ case '\\': /* Escaped anything counts 1 */
+ if (!*++p)
+ p--;
+ rv++;
+ continue;
+ case '?': /* Magic characters count 0 */
+ case '*':
+ case '.':
+ case '+':
+ case '^':
+ case '$':
+ continue;
+ case '[': /* Bracketed expressions count 1 the ']' */
+ while (*p && *p != ']')
+ p++;
+ p--;
+ continue;
+ case '{': /* Braced expressions count 0 */
+ while (*p && *p != '}')
+ p++;
+ if (!*p)
+ p--;
+ continue;
+ default: /* Anything else counts 1 */
+ rv++;
+ continue;
+ }
+
+ return rv == 0 ? 1 : rv; /* Return at least 1 */
+}
+
+/*
* Get weight of this magic entry, for sorting purposes.
*/
private size_t
apprentice_magic_strength(const struct magic *m)
{
#define MULT 10
- size_t val = 2 * MULT; /* baseline strength */
+ size_t v, val = 2 * MULT; /* baseline strength */
switch (m->type) {
case FILE_DEFAULT: /* make sure this sorts last */
@@ -657,10 +811,14 @@ apprentice_magic_strength(const struct magic *m)
break;
case FILE_SEARCH:
- case FILE_REGEX:
val += m->vallen * MAX(MULT / m->vallen, 1);
break;
+ case FILE_REGEX:
+ v = nonmagic(m->value.s);
+ val += v * MAX(MULT / v, 1);
+ break;
+
case FILE_DATE:
case FILE_LEDATE:
case FILE_BEDATE:
@@ -699,7 +857,6 @@ apprentice_magic_strength(const struct magic *m)
break;
default:
- val = 0;
(void)fprintf(stderr, "Bad type %d\n", m->type);
abort();
}
@@ -896,24 +1053,24 @@ set_test_type(struct magic *mstart, struct magic *m)
private int
addentry(struct magic_set *ms, struct magic_entry *me,
- struct magic_entry **mentry, uint32_t *mentrycount)
+ struct magic_entry_set *mset)
{
size_t i = me->mp->type == FILE_NAME ? 1 : 0;
- if (mentrycount[i] == maxmagic[i]) {
+ if (mset[i].count == mset[i].max) {
struct magic_entry *mp;
- maxmagic[i] += ALLOC_INCR;
+ mset[i].max += ALLOC_INCR;
if ((mp = CAST(struct magic_entry *,
- realloc(mentry[i], sizeof(*mp) * maxmagic[i]))) ==
+ realloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
NULL) {
- file_oomem(ms, sizeof(*mp) * maxmagic[i]);
+ file_oomem(ms, sizeof(*mp) * mset[i].max);
return -1;
}
- (void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) *
+ (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
ALLOC_INCR);
- mentry[i] = mp;
+ mset[i].me = mp;
}
- mentry[i][mentrycount[i]++] = *me;
+ mset[i].me[mset[i].count++] = *me;
memset(me, 0, sizeof(*me));
return 0;
}
@@ -923,7 +1080,7 @@ addentry(struct magic_set *ms, struct magic_entry *me,
*/
private void
load_1(struct magic_set *ms, int action, const char *fn, int *errs,
- struct magic_entry **mentry, uint32_t *mentrycount)
+ struct magic_entry_set *mset)
{
size_t lineno = 0, llen = 0;
char *line = NULL;
@@ -990,7 +1147,7 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
case 0:
continue;
case 1:
- (void)addentry(ms, &me, mentry, mentrycount);
+ (void)addentry(ms, &me, mset);
goto again;
default:
(*errs)++;
@@ -999,7 +1156,7 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
}
}
if (me.mp)
- (void)addentry(ms, &me, mentry, mentrycount);
+ (void)addentry(ms, &me, mset);
free(line);
(void)fclose(f);
}
@@ -1110,19 +1267,21 @@ private struct magic_map *
apprentice_load(struct magic_set *ms, const char *fn, int action)
{
int errs = 0;
- struct magic_entry *mentry[MAGIC_SETS] = { NULL };
- uint32_t mentrycount[MAGIC_SETS] = { 0 };
uint32_t i, j;
size_t files = 0, maxfiles = 0;
char **filearr = NULL, *mfn;
struct stat st;
struct magic_map *map;
+ struct magic_entry_set mset[MAGIC_SETS];
DIR *dir;
struct dirent *d;
+ memset(mset, 0, sizeof(mset));
ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
- if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
+
+ if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
+ {
file_oomem(ms, sizeof(*map));
return NULL;
}
@@ -1168,36 +1327,35 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
closedir(dir);
qsort(filearr, files, sizeof(*filearr), cmpstrp);
for (i = 0; i < files; i++) {
- load_1(ms, action, filearr[i], &errs, mentry,
- mentrycount);
+ load_1(ms, action, filearr[i], &errs, mset);
free(filearr[i]);
}
free(filearr);
} else
- load_1(ms, action, fn, &errs, mentry, mentrycount);
+ load_1(ms, action, fn, &errs, mset);
if (errs)
goto out;
for (j = 0; j < MAGIC_SETS; j++) {
/* Set types of tests */
- for (i = 0; i < mentrycount[j]; ) {
- if (mentry[j][i].mp->cont_level != 0) {
+ for (i = 0; i < mset[j].count; ) {
+ if (mset[j].me[i].mp->cont_level != 0) {
i++;
continue;
}
- i = set_text_binary(ms, mentry[j], mentrycount[j], i);
+ i = set_text_binary(ms, mset[j].me, mset[j].count, i);
}
- qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]),
+ qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
apprentice_sort);
/*
* Make sure that any level 0 "default" line is last
* (if one exists).
*/
- set_last_default(ms, mentry[j], mentrycount[j]);
+ set_last_default(ms, mset[j].me, mset[j].count);
/* coalesce per file arrays into a single one */
- if (coalesce_entries(ms, mentry[j], mentrycount[j],
+ if (coalesce_entries(ms, mset[j].me, mset[j].count,
&map->magic[j], &map->nmagic[j]) == -1) {
errs++;
goto out;
@@ -1206,14 +1364,10 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
out:
for (j = 0; j < MAGIC_SETS; j++)
- magic_entry_free(mentry[j], mentrycount[j]);
+ magic_entry_free(mset[j].me, mset[j].count);
if (errs) {
- for (j = 0; j < MAGIC_SETS; j++) {
- if (map->magic[j])
- free(map->magic[j]);
- }
- free(map);
+ apprentice_unmap(map);
return NULL;
}
return map;
@@ -1233,7 +1387,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
* the sign extension must have happened.
*/
case FILE_BYTE:
- v = (char) v;
+ v = (signed char) v;
break;
case FILE_SHORT:
case FILE_BESHORT:
@@ -1284,6 +1438,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
case FILE_INDIRECT:
case FILE_NAME:
case FILE_USE:
+ case FILE_CLEAR:
break;
default:
if (ms->flags & MAGIC_CHECK)
@@ -1301,7 +1456,8 @@ string_modifier_check(struct magic_set *ms, struct magic *m)
if ((ms->flags & MAGIC_CHECK) == 0)
return 0;
- if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
+ if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
+ (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
file_magwarn(ms,
"'/BHhLl' modifiers are only allowed for pascal strings\n");
return -1;
@@ -1449,6 +1605,145 @@ check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
}
#endif /* ENABLE_CONDITIONALS */
+private int
+parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
+{
+ const char *l = *lp;
+
+ while (!isspace((unsigned char)*++l))
+ switch (*l) {
+ case CHAR_INDIRECT_RELATIVE:
+ m->str_flags |= INDIRECT_RELATIVE;
+ break;
+ default:
+ if (ms->flags & MAGIC_CHECK)
+ file_magwarn(ms, "indirect modifier `%c' "
+ "invalid", *l);
+ *lp = l;
+ return -1;
+ }
+ *lp = l;
+ return 0;
+}
+
+private void
+parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
+ int op)
+{
+ const char *l = *lp;
+ char *t;
+ uint64_t val;
+
+ ++l;
+ m->mask_op |= op;
+ val = (uint64_t)strtoull(l, &t, 0);
+ l = t;
+ m->num_mask = file_signextend(ms, m, val);
+ eatsize(&l);
+ *lp = l;
+}
+
+private int
+parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
+{
+ const char *l = *lp;
+ char *t;
+ int have_range = 0;
+
+ while (!isspace((unsigned char)*++l)) {
+ switch (*l) {
+ case '0': case '1': case '2':
+ case '3': case '4': case '5':
+ case '6': case '7': case '8':
+ case '9':
+ if (have_range && (ms->flags & MAGIC_CHECK))
+ file_magwarn(ms, "multiple ranges");
+ have_range = 1;
+ m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
+ if (m->str_range == 0)
+ file_magwarn(ms, "zero range");
+ l = t - 1;
+ break;
+ case CHAR_COMPACT_WHITESPACE:
+ m->str_flags |= STRING_COMPACT_WHITESPACE;
+ break;
+ case CHAR_COMPACT_OPTIONAL_WHITESPACE:
+ m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
+ break;
+ case CHAR_IGNORE_LOWERCASE:
+ m->str_flags |= STRING_IGNORE_LOWERCASE;
+ break;
+ case CHAR_IGNORE_UPPERCASE:
+ m->str_flags |= STRING_IGNORE_UPPERCASE;
+ break;
+ case CHAR_REGEX_OFFSET_START:
+ m->str_flags |= REGEX_OFFSET_START;
+ break;
+ case CHAR_BINTEST:
+ m->str_flags |= STRING_BINTEST;
+ break;
+ case CHAR_TEXTTEST:
+ m->str_flags |= STRING_TEXTTEST;
+ break;
+ case CHAR_TRIM:
+ m->str_flags |= STRING_TRIM;
+ break;
+ case CHAR_PSTRING_1_LE:
+#define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ SET_LENGTH(PSTRING_1_LE);
+ break;
+ case CHAR_PSTRING_2_BE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ SET_LENGTH(PSTRING_2_BE);
+ break;
+ case CHAR_PSTRING_2_LE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ SET_LENGTH(PSTRING_2_LE);
+ break;
+ case CHAR_PSTRING_4_BE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ SET_LENGTH(PSTRING_4_BE);
+ break;
+ case CHAR_PSTRING_4_LE:
+ switch (m->type) {
+ case FILE_PSTRING:
+ case FILE_REGEX:
+ break;
+ default:
+ goto bad;
+ }
+ SET_LENGTH(PSTRING_4_LE);
+ break;
+ case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
+ break;
+ default:
+ bad:
+ if (ms->flags & MAGIC_CHECK)
+ file_magwarn(ms, "string modifier `%c' "
+ "invalid", *l);
+ goto out;
+ }
+ /* allow multiple '/' for readability */
+ if (l[1] == '/' && !isspace((unsigned char)l[2]))
+ l++;
+ }
+ if (string_modifier_check(ms, m) == -1)
+ goto out;
+ *lp = l;
+ return 0;
+out:
+ *lp = l;
+ return -1;
+}
+
/*
* parse one line from magic file, put into magic[index++] if valid
*/
@@ -1667,7 +1962,7 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
*/
m->type = get_standard_integer_type(l, &l);
}
- // It's unsigned.
+ /* It's unsigned. */
if (m->type != FILE_INVALID)
m->flag |= UNSIGNED;
} else {
@@ -1718,113 +2013,27 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
m->str_range = 0;
m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
if ((op = get_op(*l)) != -1) {
- if (!IS_STRING(m->type)) {
- uint64_t val;
- ++l;
- m->mask_op |= op;
- val = (uint64_t)strtoull(l, &t, 0);
- l = t;
- m->num_mask = file_signextend(ms, m, val);
- eatsize(&l);
- }
- else if (op == FILE_OPDIVIDE) {
- int have_range = 0;
- while (!isspace((unsigned char)*++l)) {
- switch (*l) {
- case '0': case '1': case '2':
- case '3': case '4': case '5':
- case '6': case '7': case '8':
- case '9':
- if (have_range &&
- (ms->flags & MAGIC_CHECK))
- file_magwarn(ms,
- "multiple ranges");
- have_range = 1;
- m->str_range = CAST(uint32_t,
- strtoul(l, &t, 0));
- if (m->str_range == 0)
- file_magwarn(ms,
- "zero range");
- l = t - 1;
- break;
- case CHAR_COMPACT_WHITESPACE:
- m->str_flags |=
- STRING_COMPACT_WHITESPACE;
- break;
- case CHAR_COMPACT_OPTIONAL_WHITESPACE:
- m->str_flags |=
- STRING_COMPACT_OPTIONAL_WHITESPACE;
- break;
- case CHAR_IGNORE_LOWERCASE:
- m->str_flags |= STRING_IGNORE_LOWERCASE;
- break;
- case CHAR_IGNORE_UPPERCASE:
- m->str_flags |= STRING_IGNORE_UPPERCASE;
- break;
- case CHAR_REGEX_OFFSET_START:
- m->str_flags |= REGEX_OFFSET_START;
- break;
- case CHAR_BINTEST:
- m->str_flags |= STRING_BINTEST;
- break;
- case CHAR_TEXTTEST:
- m->str_flags |= STRING_TEXTTEST;
- break;
- case CHAR_TRIM:
- m->str_flags |= STRING_TRIM;
- break;
- case CHAR_PSTRING_1_LE:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
- break;
- case CHAR_PSTRING_2_BE:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
- break;
- case CHAR_PSTRING_2_LE:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
- break;
- case CHAR_PSTRING_4_BE:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
- break;
- case CHAR_PSTRING_4_LE:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
- break;
- case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
- if (m->type != FILE_PSTRING)
- goto bad;
- m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
- break;
- default:
- bad:
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms,
- "string extension `%c' "
- "invalid", *l);
- return -1;
- }
- /* allow multiple '/' for readability */
- if (l[1] == '/' &&
- !isspace((unsigned char)l[2]))
- l++;
+ if (IS_STRING(m->type)) {
+ int r;
+
+ if (op != FILE_OPDIVIDE) {
+ if (ms->flags & MAGIC_CHECK)
+ file_magwarn(ms,
+ "invalid string/indirect op: "
+ "`%c'", *t);
+ return -1;
}
- if (string_modifier_check(ms, m) == -1)
+
+ if (m->type == FILE_INDIRECT)
+ r = parse_indirect_modifier(ms, m, &l);
+ else
+ r = parse_string_modifier(ms, m, &l);
+ if (r == -1)
return -1;
- }
- else {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "invalid string op: %c", *t);
- return -1;
- }
+ } else
+ parse_op_modifier(ms, m, &l, op);
}
+
/*
* We used to set mask to all 1's here, instead let's just not do
* anything if mask = 0 (unless you have a better idea)
@@ -1936,6 +2145,11 @@ parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
m->factor_op, m->factor);
return -1;
}
+ if (m->type == FILE_NAME) {
+ file_magwarn(ms, "%s: Strength setting is not supported in "
+ "\"name\" magic entries", m->value.s);
+ return -1;
+ }
EATAB;
switch (*l) {
case FILE_FACTOR_OP_NONE:
@@ -1972,39 +2186,70 @@ out:
return -1;
}
-/*
- * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
- * magic[index - 1]
- */
private int
-parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+goodchar(unsigned char x, const char *extra)
+{
+ return (isascii(x) && isalnum(x)) || strchr(extra, x);
+}
+
+private int
+parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
+ off_t off, size_t len, const char *name, const char *extra, int nt)
{
size_t i;
const char *l = line;
struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
+ char *buf = (char *)m + off;
- if (m->apple[0] != '\0') {
- file_magwarn(ms, "Current entry already has a APPLE type "
- "`%.8s', new type `%s'", m->mimetype, l);
+ if (buf[0] != '\0') {
+ len = nt ? strlen(buf) : len;
+ file_magwarn(ms, "Current entry already has a %s type "
+ "`%.*s', new type `%s'", name, (int)len, buf, l);
return -1;
}
+ if (*m->desc == '\0') {
+ file_magwarn(ms, "Current entry does not yet have a "
+ "description for adding a %s type", name);
+ return -1;
+ }
+
EATAB;
- for (i = 0; *l && ((isascii((unsigned char)*l) &&
- isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
- i < sizeof(m->apple); m->apple[i++] = *l++)
+ for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
continue;
- if (i == sizeof(m->apple) && *l) {
- /* We don't need to NUL terminate here, printing handles it */
+
+ if (i == len && *l) {
+ if (nt)
+ buf[len - 1] = '\0';
if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "APPLE type `%s' truncated %"
- SIZE_T_FORMAT "u", line, i);
+ file_magwarn(ms, "%s type `%s' truncated %"
+ SIZE_T_FORMAT "u", name, line, i);
+ } else {
+ if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
+ file_magwarn(ms, "%s type `%s' has bad char '%c'",
+ name, line, *l);
+ if (nt)
+ buf[i] = '\0';
}
if (i > 0)
return 0;
- else
- return -1;
+
+ file_magerror(ms, "Bad magic entry '%s'", line);
+ return -1;
+}
+
+/*
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
+ * magic[index - 1]
+ */
+private int
+parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+{
+ struct magic *m = &me->mp[0];
+
+ return parse_extra(ms, me, line, offsetof(struct magic, apple),
+ sizeof(m->apple), "APPLE", "!+-./", 0);
}
/*
@@ -2014,49 +2259,50 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
private int
parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
{
- size_t i;
- const char *l = line;
- struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
-
- if (m->mimetype[0] != '\0') {
- file_magwarn(ms, "Current entry already has a MIME type `%s',"
- " new type `%s'", m->mimetype, l);
- return -1;
- }
-
- EATAB;
- for (i = 0; *l && ((isascii((unsigned char)*l) &&
- isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
- i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
- continue;
- if (i == sizeof(m->mimetype)) {
- m->mimetype[sizeof(m->mimetype) - 1] = '\0';
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "MIME type `%s' truncated %"
- SIZE_T_FORMAT "u", m->mimetype, i);
- } else
- m->mimetype[i] = '\0';
+ struct magic *m = &me->mp[0];
- if (i > 0)
- return 0;
- else
- return -1;
+ return parse_extra(ms, me, line, offsetof(struct magic, mimetype),
+ sizeof(m->mimetype), "MIME", "+-/.", 1);
}
private int
check_format_type(const char *ptr, int type)
{
- int quad = 0;
+ int quad = 0, h;
if (*ptr == '\0') {
/* Missing format string; bad */
return -1;
}
- switch (type) {
+ switch (file_formats[type]) {
case FILE_FMT_QUAD:
quad = 1;
/*FALLTHROUGH*/
case FILE_FMT_NUM:
+ if (quad == 0) {
+ switch (type) {
+ case FILE_BYTE:
+ h = 2;
+ break;
+ case FILE_SHORT:
+ case FILE_BESHORT:
+ case FILE_LESHORT:
+ h = 1;
+ break;
+ case FILE_LONG:
+ case FILE_BELONG:
+ case FILE_LELONG:
+ case FILE_MELONG:
+ case FILE_LEID3:
+ case FILE_BEID3:
+ case FILE_INDIRECT:
+ h = 0;
+ break;
+ default:
+ abort();
+ }
+ } else
+ h = 0;
if (*ptr == '-')
ptr++;
if (*ptr == '.')
@@ -2073,45 +2319,67 @@ check_format_type(const char *ptr, int type)
}
switch (*ptr++) {
+#ifdef STRICT_FORMAT /* "long" formats are int formats for us */
+ /* so don't accept the 'l' modifier */
case 'l':
switch (*ptr++) {
case 'i':
case 'd':
case 'u':
+ case 'o':
case 'x':
case 'X':
- return 0;
+ return h != 0 ? -1 : 0;
default:
return -1;
}
+ /*
+ * Don't accept h and hh modifiers. They make writing
+ * magic entries more complicated, for very little benefit
+ */
case 'h':
+ if (h-- <= 0)
+ return -1;
switch (*ptr++) {
case 'h':
+ if (h-- <= 0)
+ return -1;
switch (*ptr++) {
case 'i':
case 'd':
case 'u':
+ case 'o':
case 'x':
case 'X':
return 0;
default:
return -1;
}
+ case 'i':
case 'd':
- return 0;
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ return h != 0 ? -1 : 0;
default:
return -1;
}
-
- case 'i':
+#endif
case 'c':
+ return h != 2 ? -1 : 0;
+ case 'i':
case 'd':
case 'u':
+ case 'o':
case 'x':
case 'X':
+#ifdef STRICT_FORMAT
+ return h != 0 ? -1 : 0;
+#else
return 0;
-
+#endif
default:
return -1;
}
@@ -2198,7 +2466,7 @@ check_format(struct magic_set *ms, struct magic *m)
}
ptr++;
- if (check_format_type(ptr, file_formats[m->type]) == -1) {
+ if (check_format_type(ptr, m->type) == -1) {
/*
* TODO: this error message is unhelpful if the format
* string is not one character long
@@ -2245,6 +2513,16 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
m->value.s);
return -1;
}
+ if (m->type == FILE_REGEX) {
+ file_regex_t rx;
+ int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED);
+ if (rc) {
+ if (ms->flags & MAGIC_CHECK)
+ file_regerror(&rx, rc, ms);
+ }
+ file_regfree(&rx);
+ return rc ? -1 : 0;
+ }
return 0;
case FILE_FLOAT:
case FILE_BEFLOAT:
@@ -2545,6 +2823,28 @@ eatsize(const char **p)
}
/*
+ * handle a buffer containing a compiled file.
+ */
+private struct magic_map *
+apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
+{
+ struct magic_map *map;
+
+ if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
+ file_oomem(ms, sizeof(*map));
+ return NULL;
+ }
+ map->len = len;
+ map->p = buf;
+ map->type = MAP_TYPE_USER;
+ if (check_buffer(ms, map, "buffer") != 0) {
+ apprentice_unmap(map);
+ return NULL;
+ }
+ return map;
+}
+
+/*
* handle a compiled file.
*/
@@ -2553,12 +2853,8 @@ apprentice_map(struct magic_set *ms, const char *fn)
{
int fd;
struct stat st;
- uint32_t *ptr;
- uint32_t version, entries, nentries;
- int needsbyteswap;
char *dbname = NULL;
struct magic_map *map;
- size_t i;
fd = -1;
if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
@@ -2577,8 +2873,9 @@ apprentice_map(struct magic_set *ms, const char *fn)
file_error(ms, errno, "cannot stat `%s'", dbname);
goto error;
}
- if (st.st_size < 8) {
- file_error(ms, 0, "file `%s' is too small", dbname);
+ if (st.st_size < 8 || st.st_size > MAXMAGIC_SIZE) {
+ file_error(ms, 0, "file `%s' is too %s", dbname,
+ st.st_size < 8 ? "small" : "large");
goto error;
}
@@ -2589,6 +2886,7 @@ apprentice_map(struct magic_set *ms, const char *fn)
file_error(ms, errno, "cannot map `%s'", dbname);
goto error;
}
+ map->type = MAP_TYPE_MMAP;
#else
if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
file_oomem(ms, map->len);
@@ -2598,16 +2896,39 @@ apprentice_map(struct magic_set *ms, const char *fn)
file_badread(ms);
goto error;
}
- map->len = 0;
+ map->type = MAP_TYPE_MALLOC;
#define RET 1
#endif
(void)close(fd);
fd = -1;
+
+ if (check_buffer(ms, map, dbname) != 0)
+ goto error;
+
+ free(dbname);
+ return map;
+
+error:
+ if (fd != -1)
+ (void)close(fd);
+ apprentice_unmap(map);
+ free(dbname);
+ return NULL;
+}
+
+private int
+check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
+{
+ uint32_t *ptr;
+ uint32_t entries, nentries;
+ uint32_t version;
+ int i, needsbyteswap;
+
ptr = CAST(uint32_t *, map->p);
if (*ptr != MAGICNO) {
if (swap4(*ptr) != MAGICNO) {
file_error(ms, 0, "bad magic in `%s'", dbname);
- goto error;
+ return -1;
}
needsbyteswap = 1;
} else
@@ -2620,14 +2941,14 @@ apprentice_map(struct magic_set *ms, const char *fn)
file_error(ms, 0, "File %s supports only version %d magic "
"files. `%s' is version %d", VERSION,
VERSIONNO, dbname, version);
- goto error;
+ return -1;
}
- entries = (uint32_t)(st.st_size / sizeof(struct magic));
- if ((off_t)(entries * sizeof(struct magic)) != st.st_size) {
- file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
- dbname, (unsigned long long)st.st_size,
- sizeof(struct magic));
- goto error;
+ entries = (uint32_t)(map->len / sizeof(struct magic));
+ if ((entries * sizeof(struct magic)) != map->len) {
+ file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
+ "a multiple of %" SIZE_T_FORMAT "u",
+ dbname, map->len, sizeof(struct magic));
+ return -1;
}
map->magic[0] = CAST(struct magic *, map->p) + 1;
nentries = 0;
@@ -2643,26 +2964,14 @@ apprentice_map(struct magic_set *ms, const char *fn)
if (entries != nentries + 1) {
file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
dbname, entries, nentries + 1);
- goto error;
+ return -1;
}
if (needsbyteswap)
for (i = 0; i < MAGIC_SETS; i++)
byteswap(map->magic[i], map->nmagic[i]);
- free(dbname);
- return map;
-
-error:
- if (fd != -1)
- (void)close(fd);
- apprentice_unmap(map);
- free(dbname);
- return NULL;
+ return 0;
}
-private const uint32_t ar[] = {
- MAGICNO, VERSIONNO
-};
-
/*
* handle an mmaped file.
*/
@@ -2676,6 +2985,10 @@ apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
char *dbname;
int rv = -1;
uint32_t i;
+ union {
+ struct magic m;
+ uint32_t h[2 + MAGIC_SETS];
+ } hdr;
dbname = mkdbname(ms, fn, 1);
@@ -2687,24 +3000,16 @@ apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
file_error(ms, errno, "cannot open `%s'", dbname);
goto out;
}
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.h[0] = MAGICNO;
+ hdr.h[1] = VERSIONNO;
+ memcpy(hdr.h + 2, map->nmagic, nm);
- if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
- file_error(ms, errno, "error writing `%s'", dbname);
- goto out;
- }
-
- if (write(fd, map->nmagic, nm) != (ssize_t)nm) {
+ if (write(fd, &hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
file_error(ms, errno, "error writing `%s'", dbname);
goto out;
}
- assert(nm + sizeof(ar) < m);
-
- if (lseek(fd, (off_t)m, SEEK_SET) != (off_t)m) {
- file_error(ms, errno, "error seeking `%s'", dbname);
- goto out;
- }
-
for (i = 0; i < MAGIC_SETS; i++) {
len = m * map->nmagic[i];
if (write(fd, map->magic[i], len) != (ssize_t)len) {