From 10429269fbeb9442be4c5592b9111ccedbfefd0d Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Sat, 2 Sep 2017 08:54:31 +0000 Subject: file-5.32 --- magic/Magdir/archive | 362 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 297 insertions(+), 65 deletions(-) (limited to 'magic/Magdir/archive') diff --git a/magic/Magdir/archive b/magic/Magdir/archive index 4ef73a7..abecf71 100644 --- a/magic/Magdir/archive +++ b/magic/Magdir/archive @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------ -# $File: archive,v 1.88 2014/08/16 10:42:17 christos Exp $ +# $File: archive,v 1.108 2017/08/30 13:45:10 christos Exp $ # archive: file(1) magic for archive formats (see also "msdos" for self- # extracting compressed archives) # @@ -246,7 +246,15 @@ # BA # TODO: idarc says "bytes 0-2 == bytes 3-5" # TTComp -0 string \0\6 TTComp archive data +# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive +# Update: Joerg Jenderek +# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others +0 string \0\6 +# look for first keyword of Panorama database *.pan +>12 search/261 DESIGN +# skip keyword with low entropy +>12 default x TTComp archive, binary, 4K dictionary +# (version 5.25) labeled the above entry as "TTComp archive data" # ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 0 string ESP ESP archive data # ZPack @@ -434,16 +442,34 @@ # AIN 0 string \x33\x18 AIN archive data 0 string \x33\x17 AIN archive data -# XPA32 -0 string xpa\0\1 XPA32 archive data +# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 # SZip (TODO: doesn't catch all versions) 0 string SZ\x0a\4 SZip archive data # XPack DiskImage -0 string jm XPack DiskImage archive data +# *.XDI updated by Joerg Jenderek Sep 2015 +# ftp://ftp.sac.sk/pub/sac/pack/0index.txt +# GRR: this test is still too general as it catches also text files starting with jm +0 string jm +# only found examples with this additional characteristic 2 bytes +>2 string \x2\x4 Xpack DiskImage archive data +#!:ext xdi # XPack Data -0 string xpa XPack archive data +# *.xpa updated by Joerg Jenderek Sep 2015 +# ftp://ftp.elf.stuba.sk/pub/pc/pack/ +0 string xpa XPA +!:ext xpa +# XPA32 +# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip +# created by XPA32.EXE version 1.0.2 for Windows +>0 string xpa\0\1 \b32 archive data +# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 +>3 ubeshort !0x0001 \bck archive data # XPack Single Data -0 string \xc3\x8d\ jm XPack single archive data +# changed by Joerg Jenderek Sep 2015 back to like in version 5.12 +# letter 'I'+ acute accent is equivalent to \xcd +0 string \xcd\ jm Xpack single archive data +#!:mime application/x-xpa-compressed +!:ext xpa # TODO: missing due to unknown magic/magic at end of file: #DWC @@ -526,55 +552,212 @@ >>0x36 string >\0 fstype %.8s # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) -2 string -lh0- LHarc 1.x/ARX archive data [lh0] -!:mime application/x-lharc -2 string -lh1- LHarc 1.x/ARX archive data [lh1] -!:mime application/x-lharc -2 string -lz4- LHarc 1.x archive data [lz4] -!:mime application/x-lharc -2 string -lz5- LHarc 1.x archive data [lz5] -!:mime application/x-lharc +# Update: Joerg Jenderek +# URL: https://en.wikipedia.org/wiki/LHA_(file_format) +# Reference: http://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html +# +# check and display information of lharc (LHa,PMarc) file +0 name lharc-file +# check 1st character of method id like -lz4- -lh5- or -pm2- +>2 string - +# check 5th character of method id +>>6 string - +# check header level 0 1 2 3 +>>>20 ubyte <4 +# check 2nd, 3th and 4th character of method id +>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b +!:mime application/x-lzh-compressed +# creator type "LHA " +!:apple ????LHA +# display archive type name like "LHa/LZS archive data" or "LArc archive" +>>>>>2 string -lz \b +!:ext lzs +# already known -lzs- -lz4- -lz5- with old names +>>>>>>2 string -lzs LHa/LZS archive data +>>>>>>3 regex \^lz[45] LHarc 1.x archive data +# missing -lz?- with wikipedia names +>>>>>>3 regex \^lz[2378] LArc archive +# display archive type name like "LHa (2.x) archive data" +>>>>>2 string -lh \b +# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names +>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data +# LHice archiver use ".ICE" as name extension instead usual one ".lzh" +# FOOBAR archiver use ".foo" as name extension instead usual one +# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment +>>>>>>>2 string -lh1 \b +!:ext lha/lzh/ice +>>>>>>3 regex \^lh[23d] LHa 2.x? archive data +>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data +>>>>>>3 regex \^lh[456] LHa (2.x) archive data +>>>>>>>2 string -lh5 \b +# https://en.wikipedia.org/wiki/BIOS +# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like +# bios.rom , kd7_v14.bin, 1010.004, ... +!:ext lha/lzh/rom/bin +# missing -lh?- variants (Joe Jared) +>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive +# UNLHA32 2.67a +>>>>>>2 string -lhx LHa (UNLHA32) archive +# lha archives with standard file name extensions ".lha" ".lzh" +>>>>>>3 regex !\^(lh1|lh5) \b +!:ext lha/lzh +# this should not happen if all -lh variants are described +>>>>>>2 default x LHa (unknown) archive +#!:ext lha +# PMarc +>>>>>3 regex \^pm[012] PMarc archive data +!:ext pma +# append method id without leading and trailing minus character +>>>>>3 string x [%3.3s] +>>>>>>0 use lharc-header +# +# check and display information of lharc header +0 name lharc-header +# header size 0x4 , 0x1b-0x61 +>0 ubyte x +# compressed data size != compressed file size +#>7 ulelong x \b, data size %d +# attribute: 0x2~?? 0x10~symlink|target 0x20~normal +#>19 ubyte x \b, 19_0x%x +# level identifier 0 1 2 3 +#>20 ubyte x \b, level %d +# time stamp +#>15 ubelong x DATE 0x%8.8x +# OS ID for level 1 +>20 ubyte 1 +# 0x20 types find for *.rom files +>>(21.b+24) ubyte <0x21 \b, 0x%x OS +# ascii type like M for MSDOS +>>(21.b+24) ubyte >0x20 \b, '%c' OS +# OS ID for level 2 +>20 ubyte 2 +#>>23 ubyte x \b, OS ID 0x%x +>>23 ubyte <0x21 \b, 0x%x OS +>>23 ubyte >0x20 \b, '%c' OS +# filename only for level 0 and 1 +>20 ubyte <2 +# length of filename +>>21 ubyte >0 \b, with +# filename +>>>21 pstring x "%s" +# +#2 string -lh0- LHarc 1.x/ARX archive data [lh0] +#!:mime application/x-lharc +2 string -lh0- +>0 use lharc-file +#2 string -lh1- LHarc 1.x/ARX archive data [lh1] +#!:mime application/x-lharc +2 string -lh1- +>0 use lharc-file +# NEW -lz2- ... -lz8- +2 string -lz2- +>0 use lharc-file +2 string -lz3- +>0 use lharc-file +2 string -lz4- +>0 use lharc-file +2 string -lz5- +>0 use lharc-file +2 string -lz7- +>0 use lharc-file +2 string -lz8- +>0 use lharc-file # [never seen any but the last; -lh4- reported in comp.compression:] -2 string -lzs- LHa/LZS archive data [lzs] -!:mime application/x-lha -2 string -lh\40- LHa 2.x? archive data [lh ] -!:mime application/x-lha -2 string -lhd- LHa 2.x? archive data [lhd] -!:mime application/x-lha -2 string -lh2- LHa 2.x? archive data [lh2] -!:mime application/x-lha -2 string -lh3- LHa 2.x? archive data [lh3] -!:mime application/x-lha -2 string -lh4- LHa (2.x) archive data [lh4] -!:mime application/x-lha -2 string -lh5- LHa (2.x) archive data [lh5] -!:mime application/x-lha -2 string -lh6- LHa (2.x) archive data [lh6] -!:mime application/x-lha -2 string -lh7- LHa (2.x)/LHark archive data [lh7] -!:mime application/x-lha ->20 byte x - header level %d +#2 string -lzs- LHa/LZS archive data [lzs] +2 string -lzs- +>0 use lharc-file +# According to wikipedia and others such a version does not exist +#2 string -lh\40- LHa 2.x? archive data [lh ] +#2 string -lhd- LHa 2.x? archive data [lhd] +2 string -lhd- +>0 use lharc-file +#2 string -lh2- LHa 2.x? archive data [lh2] +2 string -lh2- +>0 use lharc-file +#2 string -lh3- LHa 2.x? archive data [lh3] +2 string -lh3- +>0 use lharc-file +#2 string -lh4- LHa (2.x) archive data [lh4] +2 string -lh4- +>0 use lharc-file +#2 string -lh5- LHa (2.x) archive data [lh5] +2 string -lh5- +>0 use lharc-file +#2 string -lh6- LHa (2.x) archive data [lh6] +2 string -lh6- +>0 use lharc-file +#2 string -lh7- LHa (2.x)/LHark archive data [lh7] +2 string -lh7- +# !:mime application/x-lha +# >20 byte x - header level %d +>0 use lharc-file +# NEW -lh8- ... -lhe- , -lhx- +2 string -lh8- +>0 use lharc-file +2 string -lh9- +>0 use lharc-file +2 string -lha- +>0 use lharc-file +2 string -lhb- +>0 use lharc-file +2 string -lhc- +>0 use lharc-file +2 string -lhe- +>0 use lharc-file +2 string -lhx- +>0 use lharc-file # taken from idarc [JW] 2 string -lZ PUT archive data -2 string -lz LZS archive data +# already done by LHarc magics +# this should never happen if all sub types of LZS archive are identified +#2 string -lz LZS archive data 2 string -sw1- Swag archive data -# RAR archiver (Greg Roelofs, newt@uchicago.edu) -0 string Rar! RAR archive data, +0 name rar-file-header +>24 byte 15 \b, v1.5 +>24 byte 20 \b, v2.0 +>24 byte 29 \b, v4 +>15 byte 0 \b, os: MS-DOS +>15 byte 1 \b, os: OS/2 +>15 byte 2 \b, os: Win32 +>15 byte 3 \b, os: Unix +>15 byte 4 \b, os: Mac OS +>15 byte 5 \b, os: BeOS + +0 name rar-archive-header +>3 leshort&0x1ff >0 \b, flags: +>>3 leshort &0x01 ArchiveVolume +>>3 leshort &0x02 Commented +>>3 leshort &0x04 Locked +>>3 leshort &0x10 NewVolumeNaming +>>3 leshort &0x08 Solid +>>3 leshort &0x20 Authenticated +>>3 leshort &0x40 RecoveryRecordPresent +>>3 leshort &0x80 EncryptedBlockHeader +>>3 leshort &0x100 FirstVolume + +# RAR (Roshal Archive) archive +0 string Rar!\x1a\7\0 RAR archive data !:mime application/x-rar ->44 byte x v%0x, ->10 byte >0 flags: ->>10 byte &0x01 Archive volume, ->>10 byte &0x02 Commented, ->>10 byte &0x04 Locked, ->>10 byte &0x08 Solid, ->>10 byte &0x20 Authenticated, ->35 byte 0 os: MS-DOS ->35 byte 1 os: OS/2 ->35 byte 2 os: Win32 ->35 byte 3 os: Unix -# some old version? idarc says: -0 string RE\x7e\x5e RAR archive data +!:ext rar/cbr +# file header +>(0xc.l+9) byte 0x74 +>>(0xc.l+7) use rar-file-header +# subblock seems to share information with file header +>(0xc.l+9) byte 0x7a +>>(0xc.l+7) use rar-file-header +>9 byte 0x73 +>>7 use rar-archive-header + +0 string Rar!\x1a\7\1\0 RAR archive data, v5 +!:mime application/x-rar +!:ext rar + +# Very old RAR archive +# http://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf +0 string RE\x7e\x5e RAR archive data ((26.s+30) leshort 0xcafe Java archive data (JAR) !:mime application/java-archive +# iOS App +>(26.s+30) leshort !0xcafe +>>26 string !\x8\0\0\0mimetype +>>>30 string Payload/ +>>>>38 search/64 .app/ iOS App +!:mime application/x-ios-app + + # Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) # Next line excludes specialized formats: >(26.s+30) leshort !0xcafe @@ -714,7 +908,17 @@ >>>4 byte 0x0a \b, at least v1.0 to extract >>>4 byte 0x0b \b, at least v1.1 to extract >>>4 byte 0x14 \b, at least v2.0 to extract ->>>4 byte 0x2d \b, at least v3.0 to extract +>>>4 byte 0x15 \b, at least v2.1 to extract +>>>4 byte 0x19 \b, at least v2.5 to extract +>>>4 byte 0x1b \b, at least v2.7 to extract +>>>4 byte 0x2d \b, at least v4.5 to extract +>>>4 byte 0x2e \b, at least v4.6 to extract +>>>4 byte 0x32 \b, at least v5.0 to extract +>>>4 byte 0x33 \b, at least v5.1 to extract +>>>4 byte 0x34 \b, at least v5.2 to extract +>>>4 byte 0x3d \b, at least v6.1 to extract +>>>4 byte 0x3e \b, at least v6.2 to extract +>>>4 byte 0x3f \b, at least v6.3 to extract >>>0x161 string WINZIP \b, WinZIP self-extracting # StarView Metafile @@ -746,12 +950,24 @@ 0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data # # PMA (CP/M derivative of LHA) +# Update: Joerg Jenderek +# URL: https://en.wikipedia.org/wiki/LHA_(file_format) # -2 string -pm0- PMarc archive data [pm0] -2 string -pm1- PMarc archive data [pm1] -2 string -pm2- PMarc archive data [pm2] +#2 string -pm0- PMarc archive data [pm0] +2 string -pm0- +>0 use lharc-file +#2 string -pm1- PMarc archive data [pm1] +2 string -pm1- +>0 use lharc-file +#2 string -pm2- PMarc archive data [pm2] +2 string -pm2- +>0 use lharc-file 2 string -pms- PMarc SFX archive (CP/M, DOS) +#!:mime application/x-foobar-exec +!:ext com 5 string -pc1- PopCom compressed executable (CP/M) +#!:mime application/x- +#!:ext com # From Rafael Laboissiere # The Project Revision Control System (see @@ -784,6 +1000,9 @@ # Felix von Leitner 0 string d8:announce BitTorrent file !:mime application/x-bittorrent +# Durval Menezes, +0 string d13:announce-list BitTorrent file +!:mime application/x-bittorrent # Atari MSA archive - Teemu Hukkanen 0 beshort 0x0e0f Atari MSA archive data @@ -871,19 +1090,16 @@ # From "Nelson A. de Oliveira" 0 string MPQ\032 MoPaQ (MPQ) archive -# From: Dirk Jagdmann -# xar archive format: http://code.google.com/p/xar/ -0 string xar! xar archive ->6 beshort x - version %d - # From: "Nelson A. de Oliveira" # .kgb 0 string KGB_arch KGB Archiver file >10 string x with compression level %.1s # xar (eXtensible ARchiver) archive +# xar archive format: http://code.google.com/p/xar/ # From: "David Remahl" 0 string xar! xar archive +!:mime application/x-xar #>4 beshort x header size %d >6 beshort x version %d, #>8 quad x compressed TOC: %d, @@ -911,6 +1127,9 @@ # ZPAQ: http://mattmahoney.net/dc/zpaq.html 0 string zPQ ZPAQ stream >3 byte x \b, level %d +# From: Barry Carter +# http://encode.ru/threads/456-zpaq-updates/page32 +0 string 7kSt ZPAQ file # BBeB ebook, unencrypted (LRF format) # URL: http://www.sven.de/librie/Librie/LrfFormat @@ -936,12 +1155,12 @@ >3 ubyte 0 \b, no compression >3 ubyte 2 \b, fast compression (Z1) >3 ubyte 3 \b, medium compression (Z2) ->3 ubyte >3 +>3 ubyte >3 >>3 ubyte <11 \b, compression (Z%d-1) ->2 ubyte&0x08 0x00 +>2 ubyte&0x08 0x00 # ~ 30 byte password field only for *.gho >>12 ubequad !0 \b, password protected ->>44 ubyte !1 +>>44 ubyte !1 # 1~Image All, sector-by-sector only for *.gho >>>10 ubyte 1 \b, sector copy # 1~Image Boot track only for *.gho @@ -951,6 +1170,19 @@ # optional image description only *.gho >>0xff string >\0 "%-.254s" # look for DOS sector end sequence ->0xE08 search/7776 \x55\xAA ->>&-512 indirect x \b; contains - +>0xE08 search/7776 \x55\xAA +>>&-512 indirect x \b; contains + +# Google Chrome extensions +# https://developer.chrome.com/extensions/crx +# https://developer.chrome.com/extensions/hosting +0 string Cr24 Google Chrome extension +!:mime application/x-chrome-extension +>4 ulong x \b, version %u + +# SeqBox - Sequenced container +# ext: sbx, seqbox +# Marco Pontello marcopon@gmail.com +# reference: https://github.com/MarcoPon/SeqBox +0 string SBx SeqBox, +>3 byte x version %d -- cgit v1.2.1