summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristos Zoulas <christos@zoulas.com>2021-02-05 21:33:49 +0000
committerChristos Zoulas <christos@zoulas.com>2021-02-05 21:33:49 +0000
commitf6dfbb86de9b377f4b8f273d54258b787241e7a2 (patch)
tree17728203f42df61315f1a9c10512dee05d1cfbb9
parent9c74f7b258cfe17b8c7f6eaaf6bbbf4ed14017d0 (diff)
downloadfile-git-f6dfbb86de9b377f4b8f273d54258b787241e7a2.tar.gz
PR/234: halaei: Add limit to the number of bytes to scan for encoding
file_buffer(3) passed the full size of the buffer to the encoding determination function. If the file was too large, we ended up allocating (2 * size + 4 * size) buffers to scan for encoding. Now we limit size to 64K.
-rw-r--r--ChangeLog4
-rw-r--r--doc/file.man5
-rw-r--r--src/apprentice.c3
-rw-r--r--src/encoding.c5
-rw-r--r--src/file.c4
-rw-r--r--src/file.h4
-rw-r--r--src/magic.c8
7 files changed, 26 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 6604b2cb..6ae181f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-05 16:31 Christos Zoulas <christos@zoulas.com>
+
+ * PR/234: Add limit to the number of bytes to scan for encoding
+
2021-02-01 12:31 Christos Zoulas <christos@zoulas.com>
* PR/77: Trim trailing separator.
diff --git a/doc/file.man b/doc/file.man
index 761a51cf..5c803fb2 100644
--- a/doc/file.man
+++ b/doc/file.man
@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.142 2021/01/03 20:51:48 christos Exp $
-.Dd January 3, 2021
+.\" $File: file.man,v 1.143 2021/02/05 21:33:49 christos Exp $
+.Dd February 5, 2021
.Dt FILE __CSECTION__
.Os
.Sh NAME
@@ -343,6 +343,7 @@ Set various parameter limits.
.It Li elf_notes Ta 256 Ta max ELF notes processed
.It Li elf_phnum Ta 2048 Ta max ELF program sections processed
.It Li elf_shnum Ta 32768 Ta max ELF sections processed
+.It Li encoding Ta 65536 Ta max number of bytes to scan for encoding evaluation
.It Li indir Ta 50 Ta recursion limit for indirect magic
.It Li name Ta 50 Ta use count limit for name/use magic
.It Li regex Ta 8192 Ta length limit for regex searches
diff --git a/src/apprentice.c b/src/apprentice.c
index 9b732c50..d7f4a885 100644
--- a/src/apprentice.c
+++ b/src/apprentice.c
@@ -32,7 +32,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.299 2020/12/16 23:38:39 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.300 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -543,6 +543,7 @@ file_ms_alloc(int flags)
ms->elf_notes_max = FILE_ELF_NOTES_MAX;
ms->regex_max = FILE_REGEX_MAX;
ms->bytes_max = FILE_BYTES_MAX;
+ ms->encoding_max = FILE_ENCODING_MAX;
return ms;
free:
free(ms);
diff --git a/src/encoding.c b/src/encoding.c
index 8e04b3ca..c8b40439 100644
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -35,7 +35,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.26 2020/08/15 12:06:07 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.27 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -94,6 +94,9 @@ file_encoding(struct magic_set *ms, const struct buffer *b,
*code = "unknown";
*code_mime = "binary";
+ if (nbytes > ms->encoding_max)
+ nbytes = ms->encoding_max;
+
mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
*ubuf = CAST(file_unichar_t *, calloc(CAST(size_t, 1), mlen));
if (*ubuf == NULL) {
diff --git a/src/file.c b/src/file.c
index 90c6d36e..87b9ab1b 100644
--- a/src/file.c
+++ b/src/file.c
@@ -32,7 +32,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: file.c,v 1.188 2020/12/06 14:58:50 christos Exp $")
+FILE_RCSID("@(#)$File: file.c,v 1.189 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -146,6 +146,8 @@ private struct {
"max ELF prog sections processed" },
{ "elf_shnum", MAGIC_PARAM_ELF_SHNUM_MAX, 0, 0, FILE_ELF_SHNUM_MAX,
"max ELF sections processed" },
+ { "encoding", MAGIC_PARAM_ENCODING_MAX, 0, 0, FILE_ENCODING_MAX,
+ "max bytes to scan for encoding" },
{ "indir", MAGIC_PARAM_INDIR_MAX, 0, 0, FILE_INDIR_MAX,
"recursion limit for indirection" },
{ "name", MAGIC_PARAM_NAME_MAX, 0, 0, FILE_NAME_MAX,
diff --git a/src/file.h b/src/file.h
index 6c390047..86b51ead 100644
--- a/src/file.h
+++ b/src/file.h
@@ -27,7 +27,7 @@
*/
/*
* file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.223 2020/12/08 21:26:00 christos Exp $
+ * @(#)$File: file.h,v 1.224 2021/02/05 21:33:49 christos Exp $
*/
#ifndef __file_h__
@@ -462,6 +462,7 @@ struct magic_set {
uint16_t elf_notes_max;
uint16_t regex_max;
size_t bytes_max; /* number of bytes to read from file */
+ size_t encoding_max; /* bytes to look for encoding */
#ifndef FILE_BYTES_MAX
# define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */
#endif
@@ -471,6 +472,7 @@ struct magic_set {
#define FILE_INDIR_MAX 50
#define FILE_NAME_MAX 50
#define FILE_REGEX_MAX 8192
+#define FILE_ENCODING_MAX (64 * 1024)
};
/* Type for Unicode characters */
diff --git a/src/magic.c b/src/magic.c
index 89f4e16c..81a0840f 100644
--- a/src/magic.c
+++ b/src/magic.c
@@ -33,7 +33,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: magic.c,v 1.113 2020/12/08 21:26:00 christos Exp $")
+FILE_RCSID("@(#)$File: magic.c,v 1.114 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -617,6 +617,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val)
case MAGIC_PARAM_BYTES_MAX:
ms->bytes_max = *CAST(const size_t *, val);
return 0;
+ case MAGIC_PARAM_ENCODING_MAX:
+ ms->encoding_max = *CAST(const size_t *, val);
+ return 0;
default:
errno = EINVAL;
return -1;
@@ -650,6 +653,9 @@ magic_getparam(struct magic_set *ms, int param, void *val)
case MAGIC_PARAM_BYTES_MAX:
*CAST(size_t *, val) = ms->bytes_max;
return 0;
+ case MAGIC_PARAM_ENCODING_MAX:
+ *CAST(size_t *, val) = ms->encoding_max;
+ return 0;
default:
errno = EINVAL;
return -1;