summaryrefslogtreecommitdiff
path: root/src/encoding.c
diff options
context:
space:
mode:
authorReuben Thomas <rrt@sc3d.org>2008-11-06 22:49:08 +0000
committerReuben Thomas <rrt@sc3d.org>2008-11-06 22:49:08 +0000
commitcaaf816f93f1eb8e0af373b45714431889caacd1 (patch)
treed84b3c72f3aa7dafe2b3571eed971b2085df9fa1 /src/encoding.c
parent18445c384e1d32b9f16ad65d8ec0bdd6da2bc098 (diff)
downloadfile-git-caaf816f93f1eb8e0af373b45714431889caacd1.tar.gz
Fixes to MIME detection.
Diffstat (limited to 'src/encoding.c')
-rw-r--r--src/encoding.c19
1 files changed, 7 insertions, 12 deletions
diff --git a/src/encoding.c b/src/encoding.c
index fd368353..223f6cbe 100644
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -39,7 +39,7 @@
#include <stdlib.h>
#ifndef lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.68 2008/10/30 10:50:24 rrt Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.1 2008/11/06 21:17:45 rrt Exp $")
#endif /* lint */
private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
@@ -60,7 +60,7 @@ protected int
file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
{
size_t mlen;
- int rv = 0, ucs_type;
+ int rv = 1, ucs_type;
unsigned char *nbuf = NULL;
mlen = (nbytes + 1) * sizeof(nbuf[0]);
@@ -74,18 +74,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
goto done;
}
+ *type = "text";
if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
*code = "ASCII";
*code_mime = "us-ascii";
- *type = "text";
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
*code = "UTF-8 Unicode (with BOM)";
*code_mime = "utf-8";
- *type = "text";
} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
*code = "UTF-8 Unicode";
*code_mime = "utf-8";
- *type = "text";
} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
if (ucs_type == 1) {
*code = "Little-endian UTF-16 Unicode";
@@ -94,28 +92,25 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
*code = "Big-endian UTF-16 Unicode";
*code_mime = "utf-16be";
}
- *type = "character data";
} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
*code = "ISO-8859";
- *type = "text";
*code_mime = "iso-8859-1";
} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
*code = "Non-ISO extended-ASCII";
- *type = "text";
*code_mime = "unknown-8bit";
} else {
from_ebcdic(buf, nbytes, nbuf);
if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
*code = "EBCDIC";
- *type = "character data";
*code_mime = "ebcdic";
} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
*code = "International EBCDIC";
- *type = "character data";
*code_mime = "ebcdic";
- } else /* Doesn't look like text at all */
- rv = -1;
+ } else { /* Doesn't look like text at all */
+ rv = 0;
+ *type = "binary";
+ }
}
done: