diff options
author | Christos Zoulas <christos@zoulas.com> | 2021-02-05 21:33:49 +0000 |
---|---|---|
committer | Christos Zoulas <christos@zoulas.com> | 2021-02-05 21:33:49 +0000 |
commit | f6dfbb86de9b377f4b8f273d54258b787241e7a2 (patch) | |
tree | 17728203f42df61315f1a9c10512dee05d1cfbb9 | |
parent | 9c74f7b258cfe17b8c7f6eaaf6bbbf4ed14017d0 (diff) | |
download | file-git-f6dfbb86de9b377f4b8f273d54258b787241e7a2.tar.gz |
PR/234: halaei: Add limit to the number of bytes to scan for encoding
file_buffer(3) passed the full size of the buffer to the encoding
determination function. If the file was too large, we ended up
allocating (2 * size + 4 * size) buffers to scan for encoding. Now
we limit size to 64K.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | doc/file.man | 5 | ||||
-rw-r--r-- | src/apprentice.c | 3 | ||||
-rw-r--r-- | src/encoding.c | 5 | ||||
-rw-r--r-- | src/file.c | 4 | ||||
-rw-r--r-- | src/file.h | 4 | ||||
-rw-r--r-- | src/magic.c | 8 |
7 files changed, 26 insertions, 7 deletions
@@ -1,3 +1,7 @@ +2021-02-05 16:31 Christos Zoulas <christos@zoulas.com> + + * PR/234: Add limit to the number of bytes to scan for encoding + 2021-02-01 12:31 Christos Zoulas <christos@zoulas.com> * PR/77: Trim trailing separator. diff --git a/doc/file.man b/doc/file.man index 761a51cf..5c803fb2 100644 --- a/doc/file.man +++ b/doc/file.man @@ -1,5 +1,5 @@ -.\" $File: file.man,v 1.142 2021/01/03 20:51:48 christos Exp $ -.Dd January 3, 2021 +.\" $File: file.man,v 1.143 2021/02/05 21:33:49 christos Exp $ +.Dd February 5, 2021 .Dt FILE __CSECTION__ .Os .Sh NAME @@ -343,6 +343,7 @@ Set various parameter limits. .It Li elf_notes Ta 256 Ta max ELF notes processed .It Li elf_phnum Ta 2048 Ta max ELF program sections processed .It Li elf_shnum Ta 32768 Ta max ELF sections processed +.It Li encoding Ta 65536 Ta max number of bytes to scan for encoding evaluation .It Li indir Ta 50 Ta recursion limit for indirect magic .It Li name Ta 50 Ta use count limit for name/use magic .It Li regex Ta 8192 Ta length limit for regex searches diff --git a/src/apprentice.c b/src/apprentice.c index 9b732c50..d7f4a885 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.299 2020/12/16 23:38:39 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.300 2021/02/05 21:33:49 christos Exp $") #endif /* lint */ #include "magic.h" @@ -543,6 +543,7 @@ file_ms_alloc(int flags) ms->elf_notes_max = FILE_ELF_NOTES_MAX; ms->regex_max = FILE_REGEX_MAX; ms->bytes_max = FILE_BYTES_MAX; + ms->encoding_max = FILE_ENCODING_MAX; return ms; free: free(ms); diff --git a/src/encoding.c b/src/encoding.c index 8e04b3ca..c8b40439 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.26 2020/08/15 12:06:07 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.27 2021/02/05 21:33:49 christos Exp $") #endif /* lint */ #include "magic.h" @@ -94,6 +94,9 @@ file_encoding(struct magic_set *ms, const struct buffer *b, *code = "unknown"; *code_mime = "binary"; + if (nbytes > ms->encoding_max) + nbytes = ms->encoding_max; + mlen = (nbytes + 1) * sizeof((*ubuf)[0]); *ubuf = CAST(file_unichar_t *, calloc(CAST(size_t, 1), mlen)); if (*ubuf == NULL) { @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: file.c,v 1.188 2020/12/06 14:58:50 christos Exp $") +FILE_RCSID("@(#)$File: file.c,v 1.189 2021/02/05 21:33:49 christos Exp $") #endif /* lint */ #include "magic.h" @@ -146,6 +146,8 @@ private struct { "max ELF prog sections processed" }, { "elf_shnum", MAGIC_PARAM_ELF_SHNUM_MAX, 0, 0, FILE_ELF_SHNUM_MAX, "max ELF sections processed" }, + { "encoding", MAGIC_PARAM_ENCODING_MAX, 0, 0, FILE_ENCODING_MAX, + "max bytes to scan for encoding" }, { "indir", MAGIC_PARAM_INDIR_MAX, 0, 0, FILE_INDIR_MAX, "recursion limit for indirection" }, { "name", MAGIC_PARAM_NAME_MAX, 0, 0, FILE_NAME_MAX, @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.223 2020/12/08 21:26:00 christos Exp $ + * @(#)$File: file.h,v 1.224 2021/02/05 21:33:49 christos Exp $ */ #ifndef __file_h__ @@ -462,6 +462,7 @@ struct magic_set { uint16_t elf_notes_max; uint16_t regex_max; size_t bytes_max; /* number of bytes to read from file */ + size_t encoding_max; /* bytes to look for encoding */ #ifndef FILE_BYTES_MAX # define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */ #endif @@ -471,6 +472,7 @@ struct magic_set { #define FILE_INDIR_MAX 50 #define FILE_NAME_MAX 50 #define FILE_REGEX_MAX 8192 +#define FILE_ENCODING_MAX (64 * 1024) }; /* Type for Unicode characters */ diff --git a/src/magic.c b/src/magic.c index 89f4e16c..81a0840f 100644 --- a/src/magic.c +++ b/src/magic.c @@ -33,7 +33,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: magic.c,v 1.113 2020/12/08 21:26:00 christos Exp $") +FILE_RCSID("@(#)$File: magic.c,v 1.114 2021/02/05 21:33:49 christos Exp $") #endif /* lint */ #include "magic.h" @@ -617,6 +617,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val) case MAGIC_PARAM_BYTES_MAX: ms->bytes_max = *CAST(const size_t *, val); return 0; + case MAGIC_PARAM_ENCODING_MAX: + ms->encoding_max = *CAST(const size_t *, val); + return 0; default: errno = EINVAL; return -1; @@ -650,6 +653,9 @@ magic_getparam(struct magic_set *ms, int param, void *val) case MAGIC_PARAM_BYTES_MAX: *CAST(size_t *, val) = ms->bytes_max; return 0; + case MAGIC_PARAM_ENCODING_MAX: + *CAST(size_t *, val) = ms->encoding_max; + return 0; default: errno = EINVAL; return -1; |