summaryrefslogtreecommitdiff
path: root/src/encoding.c
diff options
context:
space:
mode:
authorChristos Zoulas <christos@zoulas.com>2020-06-27 15:57:39 +0000
committerChristos Zoulas <christos@zoulas.com>2020-06-27 15:57:39 +0000
commit67a62e415b7ce80e9fb8577f5b1b8a5ebf176ead (patch)
tree9dfa695745dd235c7e4a65847cce8492ec546940 /src/encoding.c
parentb314cd76cfbaf1db9ba9a768cfe8d09d8fcdd652 (diff)
downloadfile-git-67a62e415b7ce80e9fb8577f5b1b8a5ebf176ead.tar.gz
Surrogate pairs are not valid utf-8 characters (Michael Liu)
Diffstat (limited to 'src/encoding.c')
-rw-r--r--src/encoding.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/encoding.c b/src/encoding.c
index c3f33431..9438e55f 100644
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -35,7 +35,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.22 2020/06/27 15:57:39 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -376,6 +376,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *
c = (c << 6) + (buf[i] & 0x3f);
}
+ /* Surrogate pair characters are invalid */
+ if (c >= 0xd800 && c <=0xdfff)
+ return -1;
+
if (ubuf)
ubuf[(*ulen)++] = c;
gotone = 1;