diff options
author | Christos Zoulas <christos@zoulas.com> | 2020-06-27 15:57:39 +0000 |
---|---|---|
committer | Christos Zoulas <christos@zoulas.com> | 2020-06-27 15:57:39 +0000 |
commit | 67a62e415b7ce80e9fb8577f5b1b8a5ebf176ead (patch) | |
tree | 9dfa695745dd235c7e4a65847cce8492ec546940 /src/encoding.c | |
parent | b314cd76cfbaf1db9ba9a768cfe8d09d8fcdd652 (diff) | |
download | file-git-67a62e415b7ce80e9fb8577f5b1b8a5ebf176ead.tar.gz |
Surrogate pairs are not valid utf-8 characters (Michael Liu)
Diffstat (limited to 'src/encoding.c')
-rw-r--r-- | src/encoding.c | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/src/encoding.c b/src/encoding.c index c3f33431..9438e55f 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.22 2020/06/27 15:57:39 christos Exp $") #endif /* lint */ #include "magic.h" @@ -376,6 +376,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t * c = (c << 6) + (buf[i] & 0x3f); } + /* Surrogate pair characters are invalid */ + if (c >= 0xd800 && c <=0xdfff) + return -1; + if (ubuf) ubuf[(*ulen)++] = c; gotone = 1; |