summaryrefslogtreecommitdiff
path: root/magic/Magdir/compress
blob: 97a51939cfe5b486a3de5d4ffc5e86055ee6309c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
#------------------------------------------------------------------------------
# $File: compress,v 1.90 2023/04/24 17:26:56 christos Exp $
# compress:  file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.

# standard unix compress
0	string		\037\235	compress'd data
!:mime	application/x-compress
!:apple	LZIVZIVU
!:ext	Z
>2	byte&0x80	>0		block compressed
>2	byte&0x1f	x		%d bits

# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# URL: https://en.wikipedia.org/wiki/Gzip
# Reference: https://tools.ietf.org/html/rfc1952
# Update: Joerg Jenderek, Apr 2019, Dec 2022
#   Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
#	* Original filename is only at offset 10 if "extra field" absent
#	* Produce shorter output - notably, only report compression methods
#         other than 8 ("deflate", the only method defined in RFC 1952).
# Note: find defs -iname '*.trid.xml' -exec grep -q '<Bytes>1F8B08' {} \; -ls
# TODO:
# FBR	Blueberry FlashBack screen Record	https://www.flashbackrecorder.com/
# KPR	KOffice/Calligra KPresenter		application/x-kpresenter
# KPT	KOffice/Calligra KPresenter template?	application/x-kpresenter
# SAV	Diggles Saved Game File			http://www.innonics.com
# SAV	FarCry (demo) saved game		http://www.farcry-thegame.com
# DAT	ZOAGZIP game data format		http://en.wikipedia.org/wiki/SD_Gundam_Capsule_Fighter
0       string          \037\213
# to display gzip compressed (strength=100=2*50) before other (strength=50)?
#!:strength * 2
# no FNAME and FCOMMENT bit implies no file name/comment. That means only binary
>3	byte&0x18	=0
# For binary gzipped no ASCII text should occur
#	mcd-monu-cad.trid.xml
>>10	string		MCD			Monu-Cad Drawing, Component or Font
#>>36	string		Created\ with\ MONU-CAD	
#!:mime	application/octet-stream
# http://fileformats.archiveteam.org/wiki/Monu-CAD
#	http://www.monucad.com/downloads/FullDemo-2005.EXE
#	/HANDS96.MCC	Component
#	/DEMO_DD01.MCD	Drawing
#	/MCALF020.FNT	Font
!:ext	mcc/mcd/fnt
# http://www.generalcadd.com
>>10	string		GXD			General CADD, Drawing or Component
#!:mime	application/octet-stream
#	/gxc/BUILDINGEDGE.gxc			Component
#	/gxd/HOCKETT-STPAUL-WRHSE.gxd		Drawing
#	/gxd/POWERLAND-MILL-ADD-11.gxd		Drawing		v9.1.06
!:ext	gxc/gxd
#>>>13	ubyte		0			\b, version 0
>>>13	string		09			\b, version 9
# other gzipped binary like gzipped tar, VirtualBox extension package,...
>>10	default		x		gzip compressed data
!:mime	application/gzip
>>>0	use	gzip-info
# size of the original (uncompressed) input data modulo 2^32
# TODO: check for GXD MCD cad the reported size
>>>-4	ulelong		x		\b, original size modulo 2^32 %u
# gzipped TAR or VirtualBox extension package
#!:mime	application/x-compressed-tar
#!:mime	application/x-virtualbox-vbox-extpack
# https://www.w3.org/TR/SVG/mimereg.html
#!:mime	image/svg+xml-compressed
#	zlib.3.gz
#	microcode-20180312.tgz
#	tpz same as tgz
#	lua-md5_1.2-1_i386_i486.ipk	https://en.wikipedia.org/wiki/Opkg
#	Oracle_VM_VirtualBox_Extension_Pack-5.0.12-104815.vbox-extpack
#	trees.blend			http://fileformats.archiveteam.org/wiki/BLEND
#	2020-07-19-Note-16-24.xoj	https://xournal.sourceforge.net/manual.html
#	MYgnucash-gz.gnucash		https://wiki.gnucash.org/wiki/GnuCash_XML_format
#	text-rotate.dia			https://en.wikipedia.org/wiki/Dia_(software)
#	MYrdata.RData			https://en.wikipedia.org/wiki/R_(programming_language)
!:ext	gz/tgz/tpz/ipk/vbox-extpack/svgz/blend/dia/gnucash/rdata/xoj
# FNAME/FCOMMENT bit implies file name/comment as iso-8859-1 text
>3	byte&0x18	>0		gzip compressed data
!:mime	application/gzip
# gzipped tar, gzipped Abiword document
#!:mime	application/x-compressed-tar
#!:mime	application/x-abiword-compressed
#!:mime	image/image/svg+xml-compressed
#	kleopatra_splashscreen.svgz	gzipped .svg
#	RSI-Mega-Demo_Disk1.adz		gzipped .adf	http://fileformats.archiveteam.org/wiki/ADF_(Amiga)
#	PostbankTest.kmy		gzipped XML	https://docs.kde.org/stable5/en/kmymoney/kmymoney/details.formats.compressed.html
#	Logo.xcfgz			gzipped .xcf	http://fileformats.archiveteam.org/wiki/XCF
!:ext	gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz
>>0	use	gzip-info
# size of the original (uncompressed) input data modulo 2^32
>>-4	ulelong		x		\b, original size modulo 2^32 %u
#	display information of gzip compressed files
0	name				gzip-info
#>2	byte		x		THIS iS GZIP
>2	byte		<8		\b, reserved method
>2	byte		>8		\b, unknown method
>3	byte		&0x01		\b, ASCII
>3	byte		&0x02		\b, has CRC
>3	byte		&0x04		\b, extra field
>3	byte&0xC	=0x08
>>10	string		x		\b, was "%s"
>3	byte		&0x10		\b, has comment
>3	byte		&0x20		\b, encrypted
>4	ledate		>0		\b, last modified: %s
>8	byte		2		\b, max compression
>8	byte		4		\b, max speed
>9	byte		=0x00		\b, from FAT filesystem (MS-DOS, OS/2, NT)
>9	byte		=0x01		\b, from Amiga
>9	byte		=0x02		\b, from VMS
>9	byte		=0x03		\b, from Unix
>9	byte		=0x04		\b, from VM/CMS
>9	byte		=0x05		\b, from Atari
>9	byte		=0x06		\b, from HPFS filesystem (OS/2, NT)
>9	byte		=0x07		\b, from MacOS
>9	byte		=0x08		\b, from Z-System
>9	byte		=0x09		\b, from CP/M
>9	byte		=0x0A		\b, from TOPS/20
>9	byte		=0x0B		\b, from NTFS filesystem (NT)
>9	byte		=0x0C		\b, from QDOS
>9	byte		=0x0D		\b, from Acorn RISCOS
# size of the original (uncompressed) input data modulo 2^32
#>-4	ulelong		x		\b, original size modulo 2^32 %u
#ERROR: line 114: non zero offset 1048572 at level 1

# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
0	string		\037\036	packed data
!:mime	application/octet-stream
!:ext	z
>2	belong		>1		\b, %d characters originally
>2	belong		=1		\b, %d character originally
#
# This magic number is byte-order-independent.
0	short		0x1f1f		old packed data
!:mime	application/octet-stream

# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
0	short		0x1fff		compacted data
!:mime	application/octet-stream
# This string is valid for SunOS (BE) and a matching "short" is listed
# in the Ultrix (LE) magic file.
0	string		\377\037	compacted data
!:mime	application/octet-stream
0	short		0145405		huf output
!:mime	application/octet-stream

# bzip2
0	string		BZh		bzip2 compressed data
!:mime	application/x-bzip2
!:ext	bz2
>3	byte		>47		\b, block size = %c00k

# bzip	a block-sorting file compressor
#	by Julian Seward <sewardj@cs.man.ac.uk> and others
0	string		BZ0		bzip compressed data
!:mime	application/x-bzip
>3	byte		>47		\b, block size = %c00k

# lzip
0	string		LZIP		lzip compressed data
!:mime application/x-lzip
!:ext lz
>4	byte		x		\b, version: %d

# squeeze and crunch
# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
0	beshort		0x76FF		squeezed data,
>4	string		x		original name %s
0	beshort		0x76FE		crunched data,
>2	string		x		original name %s
0	beshort		0x76FD		LZH compressed data,
>2	string		x		original name %s

# Freeze
0	string		\037\237	frozen file 2.1
0	string		\037\236	frozen file 1.0 (or gzip 0.5)

# SCO compress -H (LZH)
0	string		\037\240	SCO compress -H (LZH) data

# European GSM 06.10 is a provisional standard for full-rate speech
# transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
# excitation/long term prediction) coding at 13 kbit/s.
#
# There's only a magic nibble (4 bits); that nibble repeats every 33
# bytes.  This isn't suited for use, but maybe we can use it someday.
#
# This will cause very short GSM files to be declared as data and
# mismatches to be declared as data too!
#0	byte&0xF0	0xd0		data
#>33	byte&0xF0	0xd0
#>66	byte&0xF0	0xd0
#>99	byte&0xF0	0xd0
#>132	byte&0xF0	0xd0		GSM 06.10 compressed audio

# lzop from <markus.oberhumer@jk.uni-linz.ac.at>
0	string		\x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a	lzop compressed data
!:ext	lzo
>9	beshort		<0x0940
>>9	byte&0xf0	=0x00		- version 0.
>>9	beshort&0x0fff	x		\b%03x,
>>13	byte		1		LZO1X-1,
>>13	byte		2		LZO1X-1(15),
>>13	byte		3		LZO1X-999,
## >>22	bedate		>0		last modified: %s,
>>14	byte		=0x00		os: MS-DOS
>>14	byte		=0x01		os: Amiga
>>14	byte		=0x02		os: VMS
>>14	byte		=0x03		os: Unix
>>14	byte		=0x05		os: Atari
>>14	byte		=0x06		os: OS/2
>>14	byte		=0x07		os: MacOS
>>14	byte		=0x0A		os: Tops/20
>>14	byte		=0x0B		os: WinNT
>>14	byte		=0x0E		os: Win32
>9	beshort		>0x0939
>>9	byte&0xf0	=0x00		- version 0.
>>9	byte&0xf0	=0x10		- version 1.
>>9	byte&0xf0	=0x20		- version 2.
>>9	beshort&0x0fff	x		\b%03x,
>>15	byte		1		LZO1X-1,
>>15	byte		2		LZO1X-1(15),
>>15	byte		3		LZO1X-999,
## >>25	bedate		>0		last modified: %s,
>>17	byte		=0x00		os: MS-DOS
>>17	byte		=0x01		os: Amiga
>>17	byte		=0x02		os: VMS
>>17	byte		=0x03		os: Unix
>>17	byte		=0x05		os: Atari
>>17	byte		=0x06		os: OS/2
>>17	byte		=0x07		os: MacOS
>>17	byte		=0x0A		os: Tops/20
>>17	byte		=0x0B		os: WinNT
>>17	byte		=0x0E		os: Win32

# 4.3BSD-Quasijarus Strong Compression
# https://minnie.tuhs.org/Quasijarus/compress.html
0	string		\037\241	Quasijarus strong compressed data

# From: Cory Dikkers <cdikkers@swbell.net>
0	string		XPKF		Amiga xpkf.library compressed data
0	string		PP11		Power Packer 1.1 compressed data
0	string		PP20		Power Packer 2.0 compressed data,
>4	belong		0x09090909	fast compression
>4	belong		0x090A0A0A	mediocre compression
>4	belong		0x090A0B0B	good compression
>4	belong		0x090A0C0C	very good compression
>4	belong		0x090A0C0D	best compression

# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# https://www.7-zip.org or DOC/7zFormat.txt
#
0	string		7z\274\257\047\034	7-zip archive data,
>6	byte		x			version %d
>7	byte		x			\b.%d
!:mime	application/x-7z-compressed
!:ext 7z/cb7

0	name		lzma			LZMA compressed data,
!:mime	application/x-lzma
!:ext	lzma
>5	lequad		=0xffffffffffffffff	streamed
>5	lequad		!0xffffffffffffffff	non-streamed, size %lld

# Type: LZMA
0	lelong&0xffffff	=0x5d
>12	leshort		0xff
>>0	use		lzma
>12	leshort		0
>>0	use		lzma

# http://tukaani.org/xz/xz-file-format.txt
0	ustring		\xFD7zXZ\x00		XZ compressed data, checksum
!:strength * 2
!:mime	application/x-xz
!:ext	xz
>7	byte&0xf	0x0			NONE
>7	byte&0xf	0x1			CRC32
>7	byte&0xf	0x4			CRC64
>7	byte&0xf	0xa			SHA-256

# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
0	string		LRZI			LRZIP compressed data
>4	byte		x			- version %d
>5	byte		x			\b.%d
>22	byte		1			\b, encrypted
!:mime	application/x-lrzip

# https://fastcompression.blogspot.fi/2013/04/lz4-streaming-format-final.html
0	lelong		0x184d2204	LZ4 compressed data (v1.4+)
!:mime	application/x-lz4
!:ext	lz4
# Added by osm0sis@xda-developers.com
0 	lelong		0x184c2103	LZ4 compressed data (v1.0-v1.3)
!:mime	application/x-lz4
0	lelong		0x184c2102	LZ4 compressed data (v0.1-v0.9)
!:mime	application/x-lz4

# Zstandard/LZ4 skippable frames
# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
0         lelong&0xFFFFFFF0  0x184D2A50
>(4.l+8)  indirect	x

# Zstandard Dictionary ID subroutine
0     name        zstd-dictionary-id
# Single Segment = True
>0    byte        &0x20   \b, Dictionary ID:
>>0   byte&0x03   0       None
>>0   byte&0x03   1
>>>1  byte        x       %u
>>0   byte&0x03   2
>>>1  leshort     x       %u
>>0   byte&0x03   3
>>>1  lelong      x       %u
# Single Segment = False
>0    byte        ^0x20   \b, Dictionary ID:
>>0   byte&0x03   0       None
>>0   byte&0x03   1
>>>2  byte        x       %u
>>0   byte&0x03   2
>>>2  leshort     x       %u
>>0   byte&0x03   3
>>>2  lelong      x       %u

# Zstandard compressed data
# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
0     lelong       0xFD2FB522  Zstandard compressed data (v0.2)
!:mime  application/zstd
!:ext zst
0     lelong       0xFD2FB523  Zstandard compressed data (v0.3)
!:mime  application/zstd
!:ext zst
0     lelong       0xFD2FB524  Zstandard compressed data (v0.4)
!:mime  application/zstd
!:ext zst
0     lelong       0xFD2FB525  Zstandard compressed data (v0.5)
!:mime  application/zstd
!:ext zst
0     lelong       0xFD2FB526  Zstandard compressed data (v0.6)
!:mime  application/zstd
!:ext zst
0     lelong       0xFD2FB527  Zstandard compressed data (v0.7)
!:mime  application/zstd
!:ext zst
>4    use          zstd-dictionary-id
0     lelong       0xFD2FB528  Zstandard compressed data (v0.8+)
!:mime  application/zstd
!:ext zst
>4    use          zstd-dictionary-id

# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
0  lelong    0xEC30A437  Zstandard dictionary
!:mime  application/x-std-dictionary
>4 lelong    x           (ID %u)

# AFX compressed files (Wolfram Kleff)
2	string		-afx-		AFX compressed file data

# Supplementary magic data for the file(1) command to support
# rzip(1).  The format is described in magic(5).
#
# Copyright (C) 2003 by Andrew Tridgell.  You may do whatever you want with
# this file.
#
0	string		RZIP		rzip compressed data
>4	byte		x		- version %d
>5	byte		x		\b.%d
>6	belong		x		(%d bytes)

0	string		ArC\x01		FreeArc archive <http://freearc.org>

# Type:	DACT compressed files
0	long	0x444354C3	DACT compressed data
>4	byte	>-1		(version %i.
>5	byte	>-1		%i.
>6	byte	>-1		%i)
>7	long	>0		, original size: %i bytes
>15	long	>30		, block size: %i bytes

# Valve Pack (VPK) files
0	lelong	0x55aa1234	Valve Pak file
>0x4	lelong	x		\b, version %u
>0x8	lelong	x		\b, %u entries

# Snappy framing format
# https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
0	string	\377\006\0\0sNaPpY	snappy framed data
!:mime	application/x-snappy-framed

# qpress, https://www.quicklz.com/
0	string	qpress10	qpress compressed data
!:mime	application/x-qpress

# Zlib https://www.ietf.org/rfc/rfc6713.txt
0	string/b	x
>0	beshort%31	=0
>>0	byte&0xf	=8
>>>0	byte&0x80 	=0	zlib compressed data
!:mime	application/zlib

# BWC compression
0	string		BWC
>3	byte		0	BWC compressed data

# UCL compression
0	bequad		0x00e955434cff011a	UCL compressed data

# Softlib archive
0	string		SLIB	Softlib archive
>4	leshort		x	\b, version %d
>6	leshort		x	(contains %d files)

# URL:  https://github.com/lzfse/lzfse/blob/master/src/lzfse_internal.h#L276
# From: Eric Hall <eric.hall@darkart.com>
0	string	bvx-	lzfse encoded, no compression
0	string	bvx1	lzfse compressed, uncompressed tables
0	string	bvx2	lzfse compressed, compressed tables
0	string	bvxn	lzfse encoded, lzvn compressed

# pcxLib.exe compression program
# http://www.shikadi.net/moddingwiki/PCX_Library
0	string/b	pcxLib
>0x0A	string/b	Copyright\020(c)\020Genus\020Microprogramming,\020Inc.	pcxLib compressed

# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/SW/ORA/ORAFormatSpecification.htm
0	uleshort	0x7c49	
>2	lelong		0x80	ORA FASTQ compressed file
>>6	ulelong		x	\b, DNA size %u
>>10	ulelong		x	\b, read names size %u
>>14	ulelong		x	\b, quality buffer 1 size %u
>>18	ulelong		x	\b, quality buffer 2 size %u
>>22	ulelong		x	\b, sequence buffer size %u
>>26	ulelong		x	\b, N-position buffer size %u
>>30	ulelong		x	\b, crypto buffer size %u
>>34	ulelong		x	\b, misc  buffer 1 size %u
>>38	ulelong		x	\b, misc  buffer 2 size %u
>>42	ulelong		x	\b, flags %#x
>>46	lelong		x	\b, read size %d
>>50	lelong		x	\b, number of reads %d
>>54	leshort		x	\b, version %d

# https://github.com/kspalaiologos/bzip3/blob/master/doc/file_format.md
0	string/b	BZ3v1	bzip3 compressed data
>5	ulelong		x	\b, blocksize %u


# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/\
# SW/ORA/ORAFormatSpecification.htm
# From Guillaume Rizk
0	short	=0x7C49 DRAGEN ORA file,
>-261	short	=0x7C49 with metadata:
>-125	u8	x	NB reads: %llu,
>-109	u8	x	NB bases: %llu.
>-219	u4&0x02	2	File contains interleaved paired reads