diff options
author | Tom Tromey <tromey@gcc.gnu.org> | 2005-09-12 17:04:02 +0000 |
---|---|---|
committer | Tom Tromey <tromey@gcc.gnu.org> | 2005-09-12 17:04:02 +0000 |
commit | 3ec980b182de950223663d4a3ef65177fa887501 (patch) | |
tree | 253197a1e98aea7fe0dd20a442e82f1f30007b5a /zlib | |
parent | 303ae446cf2ab1469d83e83f4cc5168c899c3cee (diff) | |
download | gcc-3ec980b182de950223663d4a3ef65177fa887501.tar.gz |
This commit was generated by cvs2svn to compensate for changes in r104181,
which included commits to RCS files with non-trunk default branches.
From-SVN: r104182
Diffstat (limited to 'zlib')
45 files changed, 3906 insertions, 3348 deletions
diff --git a/zlib/as400/compile.clp b/zlib/as400/compile.clp index dcd24215d62..85549515006 100644 --- a/zlib/as400/compile.clp +++ b/zlib/as400/compile.clp @@ -118,6 +118,6 @@ &MODLIB/INFTREES &MODLIB/TREES + &MODLIB/UNCOMPR &MODLIB/ZUTIL) + SRCFILE(&SRCLIB/&CTLFILE) SRCMBR(BNDSRC) + - TEXT('ZLIB 1.2.1') TGTRLS(V4R4M0) + TEXT('ZLIB 1.2.3') TGTRLS(V4R4M0) ENDPGM diff --git a/zlib/as400/readme.txt b/zlib/as400/readme.txt index eef7cb2ec63..beae13f565b 100644 --- a/zlib/as400/readme.txt +++ b/zlib/as400/readme.txt @@ -1,4 +1,4 @@ - ZLIB version 1.2.1 for AS400 installation instructions + ZLIB version 1.2.3 for AS400 installation instructions I) From an AS400 *SAVF file: diff --git a/zlib/as400/zlib.inc b/zlib/as400/zlib.inc index 03c6cf089bb..7bbfb7e4171 100644 --- a/zlib/as400/zlib.inc +++ b/zlib/as400/zlib.inc @@ -1,7 +1,7 @@ * ZLIB.INC - Interface to the general purpose compression library * * ILE RPG400 version by Patrick Monnerat, DATASPHERE. - * Version 1.2.1 + * Version 1.2.3 * * * WARNING: @@ -20,8 +20,12 @@ * Constants ************************************************************************** * - D ZLIB_VERSION C '1.2.1' Header's version - D ZLIB_VERNUM C X'1210' + * Versioning information. + * + D ZLIB_VERSION C '1.2.3' + D ZLIB_VERNUM C X'1230' + * + * Other equates. * D Z_NO_FLUSH C 0 D Z_SYNC_FLUSH C 2 diff --git a/zlib/contrib/ada/mtest.adb b/zlib/contrib/ada/mtest.adb index 91a96cd1e9c..c4dfd080f0c 100644 --- a/zlib/contrib/ada/mtest.adb +++ b/zlib/contrib/ada/mtest.adb @@ -5,10 +5,10 @@ -- -- -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- Continuous test for ZLib multithreading. If the test is fail --- Wou should provide thread safe allocation routines for the Z_Stream. +-- Continuous test for ZLib multithreading. If the test would fail +-- we should provide thread safe allocation routines for the Z_Stream. -- --- $Id: mtest.adb,v 1.2 2003/08/12 12:11:05 vagul Exp $ +-- $Id: mtest.adb,v 1.4 2004/07/23 07:49:54 vagul Exp $ with ZLib; with Ada.Streams; @@ -148,6 +148,9 @@ procedure MTest is pragma Unreferenced (Test); + Dummy : Character; + begin - null; + Ada.Text_IO.Get_Immediate (Dummy); + Stop := True; end MTest; diff --git a/zlib/contrib/ada/read.adb b/zlib/contrib/ada/read.adb index 184ea00c318..1f2efbfeb80 100644 --- a/zlib/contrib/ada/read.adb +++ b/zlib/contrib/ada/read.adb @@ -6,7 +6,7 @@ -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: read.adb,v 1.7 2003/08/12 12:12:35 vagul Exp $ +-- $Id: read.adb,v 1.8 2004/05/31 10:53:40 vagul Exp $ -- Test/demo program for the generic read interface. @@ -68,7 +68,11 @@ procedure Read is -- ZLib.Read -- reading data from the File_In. - procedure Read is new ZLib.Read (Read, Read_Buffer, Read_First, Read_Last); + procedure Read is new ZLib.Read + (Read, + Read_Buffer, + Rest_First => Read_First, + Rest_Last => Read_Last); ---------- -- Read -- @@ -103,6 +107,7 @@ procedure Read is Pack_Size := 0; Offset := 1; Read_First := Read_Buffer'Last + 1; + Read_Last := Read_Buffer'Last; end Reset; begin diff --git a/zlib/contrib/ada/readme.txt b/zlib/contrib/ada/readme.txt index ad02c225ad0..ce4d2cadf0d 100644 --- a/zlib/contrib/ada/readme.txt +++ b/zlib/contrib/ada/readme.txt @@ -1,23 +1,34 @@ - - ZLib for Ada thick binding (ZLib.Ada) - Release 1.2 + ZLib for Ada thick binding (ZLib.Ada) + Release 1.3 ZLib.Ada is a thick binding interface to the popular ZLib data compression library, available at http://www.gzip.org/zlib/. It provides Ada-style access to the ZLib C library. - Here are the main changes since ZLib.Ada 1.1: + Here are the main changes since ZLib.Ada 1.2: + +- Attension: ZLib.Read generic routine have a initialization requirement + for Read_Last parameter now. It is a bit incompartible with previous version, + but extends functionality, we could use new parameters Allow_Read_Some and + Flush now. + +- Added Is_Open routines to ZLib and ZLib.Streams packages. -- The default header type has a name "Default" now. Auto is used only for - automatic GZip/ZLib header detection. +- Add pragma Assert to check Stream_Element is 8 bit. -- Added test for multitasking mtest.adb. +- Fix extraction to buffer with exact known decompressed size. Error reported by + Steve Sangwine. -- Added GNAT project file zlib.gpr. +- Fix definition of ULong (changed to unsigned_long), fix regression on 64 bits + computers. Patch provided by Pascal Obry. +- Add Status_Error exception definition. - How to build ZLib.Ada under GNAT +- Add pragma Assertion that Ada.Streams.Stream_Element size is 8 bit. + + + How to build ZLib.Ada under GNAT You should have the ZLib library already build on your computer, before building ZLib.Ada. Make the directory of ZLib.Ada sources current and @@ -30,7 +41,7 @@ Or use the GNAT project file build for GNAT 3.15 or later: gnatmake -Pzlib.gpr -L<directory where libz.a is> - How to build ZLib.Ada under Aonix ObjectAda for Win32 7.2.2 + How to build ZLib.Ada under Aonix ObjectAda for Win32 7.2.2 1. Make a project with all *.ads and *.adb files from the distribution. 2. Build the libz.a library from the ZLib C sources. @@ -40,7 +51,7 @@ Or use the GNAT project file build for GNAT 3.15 or later: 6. Build the executable using test.adb as a main procedure. - How to use ZLib.Ada + How to use ZLib.Ada The source files test.adb and read.adb are small demo programs that show the main functionality of ZLib.Ada. @@ -50,3 +61,5 @@ The routines from the package specifications are commented. Homepage: http://zlib-ada.sourceforge.net/ Author: Dmitriy Anisimkov <anisimkov@yahoo.com> + +Contributors: Pascal Obry <pascal@obry.org>, Steve Sangwine <sjs@essex.ac.uk> diff --git a/zlib/contrib/ada/zlib-streams.adb b/zlib/contrib/ada/zlib-streams.adb index d213b5c1766..b6497bae286 100644 --- a/zlib/contrib/ada/zlib-streams.adb +++ b/zlib/contrib/ada/zlib-streams.adb @@ -6,7 +6,7 @@ -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: zlib-streams.adb,v 1.9 2003/08/12 13:15:31 vagul Exp $ +-- $Id: zlib-streams.adb,v 1.10 2004/05/31 10:53:40 vagul Exp $ with Ada.Unchecked_Deallocation; @@ -90,6 +90,7 @@ package body ZLib.Streams is Stream.Buffer := new Buffer_Subtype; Stream.Rest_First := Stream.Buffer'Last + 1; + Stream.Rest_Last := Stream.Buffer'Last; end if; end Create; @@ -113,6 +114,15 @@ package body ZLib.Streams is end loop; end Flush; + ------------- + -- Is_Open -- + ------------- + + function Is_Open (Stream : Stream_Type) return Boolean is + begin + return Is_Open (Stream.Reader) or else Is_Open (Stream.Writer); + end Is_Open; + ---------- -- Read -- ---------- diff --git a/zlib/contrib/ada/zlib-streams.ads b/zlib/contrib/ada/zlib-streams.ads index 1d5e9048991..f0193c6baee 100644 --- a/zlib/contrib/ada/zlib-streams.ads +++ b/zlib/contrib/ada/zlib-streams.ads @@ -6,7 +6,7 @@ -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: zlib-streams.ads,v 1.11 2003/08/12 13:15:31 vagul Exp $ +-- $Id: zlib-streams.ads,v 1.12 2004/05/31 10:53:40 vagul Exp $ package ZLib.Streams is @@ -77,6 +77,8 @@ package ZLib.Streams is -- !!! When the Need_Header is False ZLib-Ada is using undocumented -- ZLib 1.1.4 functionality to do not create/wait for ZLib headers. + function Is_Open (Stream : Stream_Type) return Boolean; + procedure Close (Stream : in out Stream_Type); private diff --git a/zlib/contrib/ada/zlib-thin.adb b/zlib/contrib/ada/zlib-thin.adb index 163bd5b972f..0ca4a712046 100644 --- a/zlib/contrib/ada/zlib-thin.adb +++ b/zlib/contrib/ada/zlib-thin.adb @@ -6,12 +6,11 @@ -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: zlib-thin.adb,v 1.6 2003/01/21 15:26:37 vagul Exp $ +-- $Id: zlib-thin.adb,v 1.8 2003/12/14 18:27:31 vagul Exp $ package body ZLib.Thin is - ZLIB_VERSION : constant Chars_Ptr := - Interfaces.C.Strings.New_String ("1.1.4"); + ZLIB_VERSION : constant Chars_Ptr := zlibVersion; Z_Stream_Size : constant Int := Z_Stream'Size / System.Storage_Unit; @@ -38,14 +37,6 @@ package body ZLib.Thin is ------------------ function Deflate_Init - (strm : in Z_Streamp; - level : in Int := Z_DEFAULT_COMPRESSION) - return Int is - begin - return deflateInit (strm, level, ZLIB_VERSION, Z_Stream_Size); - end Deflate_Init; - - function Deflate_Init (strm : Z_Streamp; level : Int; method : Int; @@ -69,16 +60,15 @@ package body ZLib.Thin is -- Inflate_Init -- ------------------ - function Inflate_Init (strm : Z_Streamp) return Int is - begin - return inflateInit (strm, ZLIB_VERSION, Z_Stream_Size); - end Inflate_Init; - function Inflate_Init (strm : Z_Streamp; windowBits : Int) return Int is begin return inflateInit2 (strm, windowBits, ZLIB_VERSION, Z_Stream_Size); end Inflate_Init; + ------------------------ + -- Last_Error_Message -- + ------------------------ + function Last_Error_Message (Strm : in Z_Stream) return String is use Interfaces.C.Strings; begin @@ -89,54 +79,28 @@ package body ZLib.Thin is end if; end Last_Error_Message; - ------------- - -- Need_In -- - ------------- - - function Need_In (strm : Z_Stream) return Boolean is - begin - return strm.Avail_In = 0; - end Need_In; - - -------------- - -- Need_Out -- - -------------- - - function Need_Out (strm : Z_Stream) return Boolean is - begin - return strm.Avail_Out = 0; - end Need_Out; - ------------ -- Set_In -- ------------ procedure Set_In (Strm : in out Z_Stream; - Buffer : in Byte_Access; - Size : in UInt) is + Buffer : in Voidp; + Size : in UInt) is begin Strm.Next_In := Buffer; Strm.Avail_In := Size; end Set_In; - procedure Set_In - (Strm : in out Z_Stream; - Buffer : in Voidp; - Size : in UInt) is - begin - Set_In (Strm, Bytes.To_Pointer (Buffer), Size); - end Set_In; - ------------------ -- Set_Mem_Func -- ------------------ procedure Set_Mem_Func (Strm : in out Z_Stream; - Opaque : in Voidp; - Alloc : in alloc_func; - Free : in free_func) is + Opaque : in Voidp; + Alloc : in alloc_func; + Free : in free_func) is begin Strm.opaque := Opaque; Strm.zalloc := Alloc; @@ -149,21 +113,13 @@ package body ZLib.Thin is procedure Set_Out (Strm : in out Z_Stream; - Buffer : in Byte_Access; - Size : in UInt) is + Buffer : in Voidp; + Size : in UInt) is begin Strm.Next_Out := Buffer; Strm.Avail_Out := Size; end Set_Out; - procedure Set_Out - (Strm : in out Z_Stream; - Buffer : in Voidp; - Size : in UInt) is - begin - Set_Out (Strm, Bytes.To_Pointer (Buffer), Size); - end Set_Out; - -------------- -- Total_In -- -------------- diff --git a/zlib/contrib/ada/zlib-thin.ads b/zlib/contrib/ada/zlib-thin.ads index c227374f64b..d4407eb800d 100644 --- a/zlib/contrib/ada/zlib-thin.ads +++ b/zlib/contrib/ada/zlib-thin.ads @@ -6,10 +6,11 @@ -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: zlib-thin.ads,v 1.8 2003/08/12 13:16:51 vagul Exp $ +-- $Id: zlib-thin.ads,v 1.11 2004/07/23 06:33:11 vagul Exp $ with Interfaces.C.Strings; -with System.Address_To_Access_Conversions; + +with System; private package ZLib.Thin is @@ -36,18 +37,18 @@ private package ZLib.Thin is -- zconf.h:216 type Int is new Interfaces.C.int; - type ULong is new Interfaces.C.unsigned; -- 32 bits or more - -- zconf.h:217 + type ULong is new Interfaces.C.unsigned_long; -- 32 bits or more + -- zconf.h:217 subtype Chars_Ptr is Interfaces.C.Strings.chars_ptr; type ULong_Access is access ULong; type Int_Access is access Int; - subtype Voidp is System.Address; -- zconf.h:232 - package Bytes is new System.Address_To_Access_Conversions (Byte); + subtype Voidp is System.Address; -- zconf.h:232 - subtype Byte_Access is Bytes.Object_Pointer; + subtype Byte_Access is Voidp; + Nul : constant Voidp := System.Null_Address; -- end from zconf Z_NO_FLUSH : constant := 8#0000#; -- zlib.h:125 @@ -251,12 +252,6 @@ private package ZLib.Thin is stream_size : Int) return Int; - function Deflate_Init - (strm : in Z_Streamp; - level : in Int := Z_DEFAULT_COMPRESSION) - return Int; - pragma Inline (Deflate_Init); - function deflateInit2 (strm : Z_Streamp; level : Int; @@ -284,9 +279,6 @@ private package ZLib.Thin is stream_size : Int) return Int; - function Inflate_Init (strm : Z_Streamp) return Int; - pragma Inline (Inflate_Init); - function inflateInit2 (strm : in Z_Streamp; windowBits : in Int; @@ -318,20 +310,6 @@ private package ZLib.Thin is -- has dropped to zero. The application must initialize zalloc, zfree and -- opaque before calling the init function. - function Need_In (strm : in Z_Stream) return Boolean; - -- return true when we do not need to setup Next_In and Avail_In fields. - pragma Inline (Need_In); - - function Need_Out (strm : in Z_Stream) return Boolean; - -- return true when we do not need to setup Next_Out and Avail_Out field. - pragma Inline (Need_Out); - - procedure Set_In - (Strm : in out Z_Stream; - Buffer : in Byte_Access; - Size : in UInt); - pragma Inline (Set_In); - procedure Set_In (Strm : in out Z_Stream; Buffer : in Voidp; @@ -340,12 +318,6 @@ private package ZLib.Thin is procedure Set_Out (Strm : in out Z_Stream; - Buffer : in Byte_Access; - Size : in UInt); - pragma Inline (Set_Out); - - procedure Set_Out - (Strm : in out Z_Stream; Buffer : in Voidp; Size : in UInt); pragma Inline (Set_Out); @@ -388,19 +360,13 @@ private package ZLib.Thin is function zlibCompileFlags return ULong; - function deflatePrime - (strm : Z_Streamp; - bits : Int; - value : Int) - return Int; - private type Z_Stream is record -- zlib.h:68 - Next_In : Byte_Access; -- next input byte + Next_In : Voidp := Nul; -- next input byte Avail_In : UInt := 0; -- number of bytes available at next_in Total_In : ULong := 0; -- total nb of input bytes read so far - Next_Out : Byte_Access; -- next output byte should be put there + Next_Out : Voidp := Nul; -- next output byte should be put there Avail_Out : UInt := 0; -- remaining free space at next_out Total_Out : ULong := 0; -- total nb of bytes output so far msg : Chars_Ptr; -- last error message, NULL if no error @@ -460,14 +426,13 @@ private pragma Import (C, inflateSyncPoint, "inflateSyncPoint"); pragma Import (C, get_crc_table, "get_crc_table"); - -- added in zlib 1.2.1: + -- since zlib 1.2.0: pragma Import (C, inflateCopy, "inflateCopy"); pragma Import (C, compressBound, "compressBound"); pragma Import (C, deflateBound, "deflateBound"); pragma Import (C, gzungetc, "gzungetc"); pragma Import (C, zlibCompileFlags, "zlibCompileFlags"); - pragma Import (C, deflatePrime, "deflatePrime"); pragma Import (C, inflateBackInit, "inflateBackInit_"); diff --git a/zlib/contrib/ada/zlib.adb b/zlib/contrib/ada/zlib.adb index 93bf8852f72..8b6fd686ac7 100644 --- a/zlib/contrib/ada/zlib.adb +++ b/zlib/contrib/ada/zlib.adb @@ -1,12 +1,12 @@ ---------------------------------------------------------------- -- ZLib for Ada thick binding. -- -- -- --- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- Copyright (C) 2002-2004 Dmitriy Anisimkov -- -- -- -- Open source license information is in the zlib.ads file. -- ---------------------------------------------------------------- --- $Id: zlib.adb,v 1.19 2003/07/13 16:02:19 vagul Exp $ +-- $Id: zlib.adb,v 1.31 2004/09/06 06:53:19 vagul Exp $ with Ada.Exceptions; with Ada.Unchecked_Conversion; @@ -34,7 +34,7 @@ package body ZLib is VERSION_ERROR); type Flate_Step_Function is access - function (Strm : Thin.Z_Streamp; flush : Thin.Int) return Thin.Int; + function (Strm : in Thin.Z_Streamp; Flush : in Thin.Int) return Thin.Int; pragma Convention (C, Flate_Step_Function); type Flate_End_Function is access @@ -82,13 +82,13 @@ package body ZLib is Flush_Finish : constant array (Boolean) of Flush_Mode := (True => Finish, False => No_Flush); - procedure Raise_Error (Stream : Z_Stream); + procedure Raise_Error (Stream : in Z_Stream); pragma Inline (Raise_Error); - procedure Raise_Error (Message : String); + procedure Raise_Error (Message : in String); pragma Inline (Raise_Error); - procedure Check_Error (Stream : Z_Stream; Code : Thin.Int); + procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int); procedure Free is new Ada.Unchecked_Deallocation (Z_Stream, Z_Stream_Access); @@ -118,7 +118,7 @@ package body ZLib is -- Check_Error -- ----------------- - procedure Check_Error (Stream : Z_Stream; Code : Thin.Int) is + procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int) is use type Thin.Int; begin if Code /= Thin.Z_OK then @@ -138,10 +138,11 @@ package body ZLib is is Code : Thin.Int; begin - Code := Flate (Filter.Compression).Done - (To_Thin_Access (Filter.Strm)); + if not Ignore_Error and then not Is_Open (Filter) then + raise Status_Error; + end if; - Filter.Opened := False; + Code := Flate (Filter.Compression).Done (To_Thin_Access (Filter.Strm)); if Ignore_Error or else Code = Thin.Z_OK then Free (Filter.Strm); @@ -154,7 +155,7 @@ package body ZLib is Ada.Exceptions.Raise_Exception (ZLib_Error'Identity, Return_Code_Enum'Image (Return_Code (Code)) - & ": " & Error_Message); + & ": " & Error_Message); end; end if; end Close; @@ -170,10 +171,9 @@ package body ZLib is is use Thin; begin - return Unsigned_32 (crc32 - (ULong (CRC), - Bytes.To_Pointer (Data'Address), - Data'Length)); + return Unsigned_32 (crc32 (ULong (CRC), + Data'Address, + Data'Length)); end CRC32; procedure CRC32 @@ -192,13 +192,17 @@ package body ZLib is Level : in Compression_Level := Default_Compression; Strategy : in Strategy_Type := Default_Strategy; Method : in Compression_Method := Deflated; - Window_Bits : in Window_Bits_Type := 15; - Memory_Level : in Memory_Level_Type := 8; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Memory_Level : in Memory_Level_Type := Default_Memory_Level; Header : in Header_Type := Default) is use type Thin.Int; Win_Bits : Thin.Int := Thin.Int (Window_Bits); begin + if Is_Open (Filter) then + raise Status_Error; + end if; + -- We allow ZLib to make header only in case of default header type. -- Otherwise we would either do header by ourselfs, or do not do -- header at all. @@ -216,10 +220,9 @@ package body ZLib is Filter.Offset := Simple_GZip_Header'Last + 1; end if; - Filter.Strm := new Z_Stream; + Filter.Strm := new Z_Stream; Filter.Compression := True; Filter.Stream_End := False; - Filter.Opened := True; Filter.Header := Header; if Thin.Deflate_Init @@ -255,18 +258,18 @@ package body ZLib is ----------------------- procedure Generic_Translate - (Filter : in out ZLib.Filter_Type; - In_Buffer_Size : Integer := Default_Buffer_Size; - Out_Buffer_Size : Integer := Default_Buffer_Size) + (Filter : in out ZLib.Filter_Type; + In_Buffer_Size : in Integer := Default_Buffer_Size; + Out_Buffer_Size : in Integer := Default_Buffer_Size) is - In_Buffer : Stream_Element_Array - (1 .. Stream_Element_Offset (In_Buffer_Size)); + In_Buffer : Stream_Element_Array + (1 .. Stream_Element_Offset (In_Buffer_Size)); Out_Buffer : Stream_Element_Array - (1 .. Stream_Element_Offset (Out_Buffer_Size)); - Last : Stream_Element_Offset; - In_Last : Stream_Element_Offset; - In_First : Stream_Element_Offset; - Out_Last : Stream_Element_Offset; + (1 .. Stream_Element_Offset (Out_Buffer_Size)); + Last : Stream_Element_Offset; + In_Last : Stream_Element_Offset; + In_First : Stream_Element_Offset; + Out_Last : Stream_Element_Offset; begin Main : loop Data_In (In_Buffer, Last); @@ -275,18 +278,21 @@ package body ZLib is loop Translate - (Filter, - In_Buffer (In_First .. Last), - In_Last, - Out_Buffer, - Out_Last, - Flush_Finish (Last < In_Buffer'First)); + (Filter => Filter, + In_Data => In_Buffer (In_First .. Last), + In_Last => In_Last, + Out_Data => Out_Buffer, + Out_Last => Out_Last, + Flush => Flush_Finish (Last < In_Buffer'First)); - Data_Out (Out_Buffer (Out_Buffer'First .. Out_Last)); + if Out_Buffer'First <= Out_Last then + Data_Out (Out_Buffer (Out_Buffer'First .. Out_Last)); + end if; exit Main when Stream_End (Filter); -- The end of in buffer. + exit when In_Last = Last; In_First := In_Last + 1; @@ -301,7 +307,7 @@ package body ZLib is procedure Inflate_Init (Filter : in out Filter_Type; - Window_Bits : in Window_Bits_Type := 15; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; Header : in Header_Type := Default) is use type Thin.Int; @@ -320,6 +326,10 @@ package body ZLib is end Check_Version; begin + if Is_Open (Filter) then + raise Status_Error; + end if; + case Header is when None => Check_Version; @@ -344,10 +354,9 @@ package body ZLib is when Default => null; end case; - Filter.Strm := new Z_Stream; + Filter.Strm := new Z_Stream; Filter.Compression := False; Filter.Stream_End := False; - Filter.Opened := True; Filter.Header := Header; if Thin.Inflate_Init @@ -357,16 +366,25 @@ package body ZLib is end if; end Inflate_Init; + ------------- + -- Is_Open -- + ------------- + + function Is_Open (Filter : in Filter_Type) return Boolean is + begin + return Filter.Strm /= null; + end Is_Open; + ----------------- -- Raise_Error -- ----------------- - procedure Raise_Error (Message : String) is + procedure Raise_Error (Message : in String) is begin Ada.Exceptions.Raise_Exception (ZLib_Error'Identity, Message); end Raise_Error; - procedure Raise_Error (Stream : Z_Stream) is + procedure Raise_Error (Stream : in Z_Stream) is begin Raise_Error (Last_Error_Message (Stream)); end Raise_Error; @@ -378,21 +396,29 @@ package body ZLib is procedure Read (Filter : in out Filter_Type; Item : out Ada.Streams.Stream_Element_Array; - Last : out Ada.Streams.Stream_Element_Offset) + Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode := No_Flush) is In_Last : Stream_Element_Offset; Item_First : Ada.Streams.Stream_Element_Offset := Item'First; + V_Flush : Flush_Mode := Flush; begin pragma Assert (Rest_First in Buffer'First .. Buffer'Last + 1); + pragma Assert (Rest_Last in Buffer'First - 1 .. Buffer'Last); loop - if Rest_First > Buffer'Last then + if Rest_Last = Buffer'First - 1 then + V_Flush := Finish; + + elsif Rest_First > Rest_Last then Read (Buffer, Rest_Last); Rest_First := Buffer'First; - end if; - pragma Assert (Rest_Last in Buffer'First - 1 .. Buffer'Last); + if Rest_Last < Buffer'First then + V_Flush := Finish; + end if; + end if; Translate (Filter => Filter, @@ -400,11 +426,13 @@ package body ZLib is In_Last => In_Last, Out_Data => Item (Item_First .. Item'Last), Out_Last => Last, - Flush => Flush_Finish (Rest_Last < Rest_First)); + Flush => V_Flush); Rest_First := In_Last + 1; - exit when Last = Item'Last or else Stream_End (Filter); + exit when Stream_End (Filter) + or else Last = Item'Last + or else (Last >= Item'First and then Allow_Read_Some); Item_First := Last + 1; end loop; @@ -489,11 +517,11 @@ package body ZLib is Code : Thin.Int; begin - if Filter.Opened = False then - raise ZLib_Error; + if not Is_Open (Filter) then + raise Status_Error; end if; - if Out_Data'Length = 0 then + if Out_Data'Length = 0 and then In_Data'Length = 0 then raise Constraint_Error; end if; @@ -514,7 +542,6 @@ package body ZLib is - Stream_Element_Offset (Avail_In (Filter.Strm.all)); Out_Last := Out_Data'Last - Stream_Element_Offset (Avail_Out (Filter.Strm.all)); - end Translate_Auto; -------------------- @@ -529,7 +556,7 @@ package body ZLib is Out_Last : out Ada.Streams.Stream_Element_Offset; Flush : in Flush_Mode) is - Out_First : Stream_Element_Offset; + Out_First : Stream_Element_Offset; procedure Add_Data (Data : in Stream_Element_Array); -- Add data to stream from the Filter.Offset till necessary, @@ -596,7 +623,7 @@ package body ZLib is Add_Data (Simple_GZip_Header); Translate_Auto - (Filter => Filter, + (Filter => Filter, In_Data => In_Data, In_Last => In_Last, Out_Data => Out_Data (Out_First .. Out_Data'Last), @@ -604,7 +631,6 @@ package body ZLib is Flush => Flush); CRC32 (Filter.CRC, In_Data (In_Data'First .. In_Last)); - end if; if Filter.Stream_End and then Out_Last <= Out_Data'Last then @@ -642,10 +668,11 @@ package body ZLib is procedure Write (Filter : in out Filter_Type; Item : in Ada.Streams.Stream_Element_Array; - Flush : in Flush_Mode) + Flush : in Flush_Mode := No_Flush) is - Buffer : Stream_Element_Array (1 .. Buffer_Size); - In_Last, Out_Last : Stream_Element_Offset; + Buffer : Stream_Element_Array (1 .. Buffer_Size); + In_Last : Stream_Element_Offset; + Out_Last : Stream_Element_Offset; In_First : Stream_Element_Offset := Item'First; begin if Item'Length = 0 and Flush = No_Flush then @@ -654,7 +681,7 @@ package body ZLib is loop Translate - (Filter => Filter, + (Filter => Filter, In_Data => Item (In_First .. Item'Last), In_Last => In_Last, Out_Data => Buffer, diff --git a/zlib/contrib/ada/zlib.ads b/zlib/contrib/ada/zlib.ads index b72e4d2d5d7..79ffc4095cf 100644 --- a/zlib/contrib/ada/zlib.ads +++ b/zlib/contrib/ada/zlib.ads @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- ZLib for Ada thick binding. -- -- -- --- Copyright (C) 2002-2003 Dmitriy Anisimkov -- +-- Copyright (C) 2002-2004 Dmitriy Anisimkov -- -- -- -- This library is free software; you can redistribute it and/or modify -- -- it under the terms of the GNU General Public License as published by -- @@ -25,7 +25,7 @@ -- covered by the GNU Public License. -- ------------------------------------------------------------------------------ --- $Id: zlib.ads,v 1.17 2003/08/12 13:19:07 vagul Exp $ +-- $Id: zlib.ads,v 1.26 2004/09/06 06:53:19 vagul Exp $ with Ada.Streams; @@ -33,7 +33,8 @@ with Interfaces; package ZLib is - ZLib_Error : exception; + ZLib_Error : exception; + Status_Error : exception; type Compression_Level is new Integer range -1 .. 9; @@ -55,12 +56,15 @@ package ZLib is subtype Count is Ada.Streams.Stream_Element_Count; + Default_Memory_Level : constant Memory_Level_Type := 8; + Default_Window_Bits : constant Window_Bits_Type := 15; + ---------------------------------- -- Compression method constants -- ---------------------------------- Deflated : constant Compression_Method; - -- Only one method allowed in this ZLib version. + -- Only one method allowed in this ZLib version --------------------------------- -- Compression level constants -- @@ -79,21 +83,29 @@ package ZLib is -- Regular way for compression, no flush Partial_Flush : constant Flush_Mode; - -- will be removed, use Z_SYNC_FLUSH instead + -- Will be removed, use Z_SYNC_FLUSH instead Sync_Flush : constant Flush_Mode; - -- all pending output is flushed to the output buffer and the output + -- All pending output is flushed to the output buffer and the output -- is aligned on a byte boundary, so that the decompressor can get all -- input data available so far. (In particular avail_in is zero after the -- call if enough output space has been provided before the call.) -- Flushing may degrade compression for some compression algorithms and so -- it should be used only when necessary. + Block_Flush : constant Flush_Mode; + -- Z_BLOCK requests that inflate() stop + -- if and when it get to the next deflate block boundary. When decoding the + -- zlib or gzip format, this will cause inflate() to return immediately + -- after the header and before the first block. When doing a raw inflate, + -- inflate() will go ahead and process the first block, and will return + -- when it gets to the end of that block, or when it runs out of data. + Full_Flush : constant Flush_Mode; - -- all output is flushed as with SYNC_FLUSH, and the compression state + -- All output is flushed as with SYNC_FLUSH, and the compression state -- is reset so that decompression can restart from this point if previous -- compressed data has been damaged or if random access is desired. Using - -- FULL_FLUSH too often can seriously degrade the compression. + -- Full_Flush too often can seriously degrade the compression. Finish : constant Flush_Mode; -- Just for tell the compressor that input data is complete. @@ -111,7 +123,7 @@ package ZLib is Default_Buffer_Size : constant := 4096; - type Filter_Type is limited private; + type Filter_Type is tagged limited private; -- The filter is for compression and for decompression. -- The usage of the type is depend of its initialization. @@ -124,8 +136,8 @@ package ZLib is Level : in Compression_Level := Default_Compression; Strategy : in Strategy_Type := Default_Strategy; Method : in Compression_Method := Deflated; - Window_Bits : in Window_Bits_Type := 15; - Memory_Level : in Memory_Level_Type := 8; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; + Memory_Level : in Memory_Level_Type := Default_Memory_Level; Header : in Header_Type := Default); -- Compressor initialization. -- When Header parameter is Auto or Default, then default zlib header @@ -136,7 +148,7 @@ package ZLib is procedure Inflate_Init (Filter : in out Filter_Type; - Window_Bits : in Window_Bits_Type := 15; + Window_Bits : in Window_Bits_Type := Default_Window_Bits; Header : in Header_Type := Default); -- Decompressor initialization. -- Default header type mean that ZLib default header is expecting in the @@ -146,10 +158,14 @@ package ZLib is -- input compressed stream. -- Auto header type mean that header type (GZip or Native) would be -- detected automatically in the input stream. - -- Note that header types parameter values None, GZip and Auto is - -- supporting for inflate routine only in ZLib versions 1.2.0.2 and later. + -- Note that header types parameter values None, GZip and Auto are + -- supported for inflate routine only in ZLib versions 1.2.0.2 and later. -- Deflate_Init is supporting all header types. + function Is_Open (Filter : in Filter_Type) return Boolean; + pragma Inline (Is_Open); + -- Is the filter opened for compression or decompression. + procedure Close (Filter : in out Filter_Type; Ignore_Error : in Boolean := False); @@ -167,31 +183,31 @@ package ZLib is (Filter : in out Filter_Type; In_Buffer_Size : in Integer := Default_Buffer_Size; Out_Buffer_Size : in Integer := Default_Buffer_Size); - -- Compressing/decompressing data arrived from Data_In routine + -- Compress/decompress data fetch from Data_In routine and pass the result -- to the Data_Out routine. User should provide Data_In and Data_Out -- for compression/decompression data flow. - -- Compression or decompression depend on initialization of Filter. + -- Compression or decompression depend on Filter initialization. function Total_In (Filter : in Filter_Type) return Count; pragma Inline (Total_In); - -- Return total number of input bytes read so far. + -- Returns total number of input bytes read so far function Total_Out (Filter : in Filter_Type) return Count; pragma Inline (Total_Out); - -- Return total number of bytes output so far. + -- Returns total number of bytes output so far function CRC32 (CRC : in Unsigned_32; Data : in Ada.Streams.Stream_Element_Array) return Unsigned_32; pragma Inline (CRC32); - -- Calculate CRC32, it could be necessary for make gzip format. + -- Compute CRC32, it could be necessary for make gzip format procedure CRC32 (CRC : in out Unsigned_32; Data : in Ada.Streams.Stream_Element_Array); pragma Inline (CRC32); - -- Calculate CRC32, it could be necessary for make gzip format. + -- Compute CRC32, it could be necessary for make gzip format ------------------------------------------------- -- Below is more complex low level routines. -- @@ -204,15 +220,11 @@ package ZLib is Out_Data : out Ada.Streams.Stream_Element_Array; Out_Last : out Ada.Streams.Stream_Element_Offset; Flush : in Flush_Mode); - -- Compressing/decompressing the datas from In_Data buffer to the - -- Out_Data buffer. - -- In_Data is incoming data portion, - -- In_Last is the index of last element from In_Data accepted by the - -- Filter. - -- Out_Data is the buffer for output data from the filter. - -- Out_Last is the last element of the received data from Filter. - -- To tell the filter that incoming data is complete put the - -- Flush parameter to FINISH. + -- Compress/decompress the In_Data buffer and place the result into + -- Out_Data. In_Last is the index of last element from In_Data accepted by + -- the Filter. Out_Last is the last element of the received data from + -- Filter. To tell the filter that incoming data are complete put the + -- Flush parameter to Finish. function Stream_End (Filter : in Filter_Type) return Boolean; pragma Inline (Stream_End); @@ -239,10 +251,9 @@ package ZLib is procedure Write (Filter : in out Filter_Type; Item : in Ada.Streams.Stream_Element_Array; - Flush : in Flush_Mode); - -- Compressing/Decompressing data from Item to the - -- generic parameter procedure Write. - -- Output buffer size could be set in Buffer_Size generic parameter. + Flush : in Flush_Mode := No_Flush); + -- Compress/Decompress data from Item to the generic parameter procedure + -- Write. Output buffer size could be set in Buffer_Size generic parameter. generic with procedure Read @@ -257,33 +268,41 @@ package ZLib is Rest_First, Rest_Last : in out Ada.Streams.Stream_Element_Offset; -- Rest_First have to be initialized to Buffer'Last + 1 + -- Rest_Last have to be initialized to Buffer'Last -- before usage. + Allow_Read_Some : in Boolean := False; + -- Is it allowed to return Last < Item'Last before end of data. + procedure Read (Filter : in out Filter_Type; Item : out Ada.Streams.Stream_Element_Array; - Last : out Ada.Streams.Stream_Element_Offset); - -- Compressing/Decompressing data from generic parameter - -- procedure Read to the Item. - -- User should provide Buffer for the operation - -- and Rest_First variable first time initialized to the Buffer'Last + 1. + Last : out Ada.Streams.Stream_Element_Offset; + Flush : in Flush_Mode := No_Flush); + -- Compress/Decompress data from generic parameter procedure Read to the + -- Item. User should provide Buffer and initialized Rest_First, Rest_Last + -- indicators. If Allow_Read_Some is True, Read routines could return + -- Last < Item'Last only at end of stream. private use Ada.Streams; - type Flush_Mode is new Integer range 0 .. 4; + pragma Assert (Ada.Streams.Stream_Element'Size = 8); + pragma Assert (Ada.Streams.Stream_Element'Modulus = 2**8); + + type Flush_Mode is new Integer range 0 .. 5; type Compression_Method is new Integer range 8 .. 8; type Strategy_Type is new Integer range 0 .. 3; No_Flush : constant Flush_Mode := 0; + Partial_Flush : constant Flush_Mode := 1; Sync_Flush : constant Flush_Mode := 2; Full_Flush : constant Flush_Mode := 3; Finish : constant Flush_Mode := 4; - Partial_Flush : constant Flush_Mode := 1; - -- will be removed, use Z_SYNC_FLUSH instead + Block_Flush : constant Flush_Mode := 5; Filtered : constant Strategy_Type := 1; Huffman_Only : constant Strategy_Type := 2; @@ -296,7 +315,7 @@ private type Z_Stream_Access is access all Z_Stream; - type Filter_Type is record + type Filter_Type is tagged limited record Strm : Z_Stream_Access; Compression : Boolean; Stream_End : Boolean; @@ -304,8 +323,6 @@ private CRC : Unsigned_32; Offset : Stream_Element_Offset; -- Offset for gzip header/footer output. - - Opened : Boolean := False; end record; end ZLib; diff --git a/zlib/contrib/ada/zlib.gpr b/zlib/contrib/ada/zlib.gpr index 0f58985e30b..296b22aa966 100644 --- a/zlib/contrib/ada/zlib.gpr +++ b/zlib/contrib/ada/zlib.gpr @@ -3,10 +3,10 @@ project Zlib is for Languages use ("Ada"); for Source_Dirs use ("."); for Object_Dir use "."; - for Main use ("test.adb", "mtest.adb", "read.adb"); + for Main use ("test.adb", "mtest.adb", "read.adb", "buffer_demo"); package Compiler is - for Default_Switches ("ada") use ("-gnatwbcfilopru", "-gnatVcdfimorst", "-gnatyabcefhiklmnoprst"); + for Default_Switches ("ada") use ("-gnatwcfilopru", "-gnatVcdfimorst", "-gnatyabcefhiklmnoprst"); end Compiler; package Linker is @@ -18,4 +18,3 @@ project Zlib is end Builder; end Zlib; - diff --git a/zlib/contrib/delphi/ZLib.pas b/zlib/contrib/delphi/ZLib.pas index 61ffd08fd1a..3f2b8b4a5cd 100644 --- a/zlib/contrib/delphi/ZLib.pas +++ b/zlib/contrib/delphi/ZLib.pas @@ -152,7 +152,7 @@ procedure DecompressToUserBuf(const InBuf: Pointer; InBytes: Integer; const OutBuf: Pointer; BufSize: Integer); const - zlib_version = '1.2.1'; + zlib_version = '1.2.3'; type EZlibError = class(Exception); @@ -344,7 +344,7 @@ begin strm.avail_out := OutBytes; DCheck(inflateInit_(strm, zlib_version, sizeof(strm))); try - while DCheck(inflate(strm, Z_FINISH)) <> Z_STREAM_END do + while DCheck(inflate(strm, Z_NO_FLUSH)) <> Z_STREAM_END do begin P := OutBuf; Inc(OutBytes, BufInc); @@ -510,7 +510,7 @@ begin Result := Count - FZRec.avail_out; Exit; end; - FZRec.next_in := FBuffer; + FZRec.next_in := FBuffer; FStrmPos := FStrm.Position; Progress(Self); end; diff --git a/zlib/contrib/infback9/infback9.c b/zlib/contrib/infback9/infback9.c index 34a95fc0dac..f5ddde67da6 100644 --- a/zlib/contrib/infback9/infback9.c +++ b/zlib/contrib/infback9/infback9.c @@ -430,6 +430,9 @@ void FAR *out_desc; } } + /* handle error breaks in while */ + if (mode == BAD) break; + /* build code tables */ state->next = state->codes; lencode = (code const FAR *)(state->next); diff --git a/zlib/contrib/infback9/infback9.h b/zlib/contrib/infback9/infback9.h index 10bf58c368e..1073c0a38e6 100644 --- a/zlib/contrib/infback9/infback9.h +++ b/zlib/contrib/infback9/infback9.h @@ -16,6 +16,10 @@ * zlib.h must be included before this header file. */ +#ifdef __cplusplus +extern "C" { +#endif + ZEXTERN int ZEXPORT inflateBack9 OF((z_stream FAR *strm, in_func in, void FAR *in_desc, out_func out, void FAR *out_desc)); @@ -27,3 +31,7 @@ ZEXTERN int ZEXPORT inflateBack9Init_ OF((z_stream FAR *strm, #define inflateBack9Init(strm, window) \ inflateBack9Init_((strm), (window), \ ZLIB_VERSION, sizeof(z_stream)) + +#ifdef __cplusplus +} +#endif diff --git a/zlib/contrib/infback9/inftree9.c b/zlib/contrib/infback9/inftree9.c index fe76a1b1cd8..0993f75646c 100644 --- a/zlib/contrib/infback9/inftree9.c +++ b/zlib/contrib/infback9/inftree9.c @@ -1,5 +1,5 @@ /* inftree9.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2003 Mark Adler + * Copyright (C) 1995-2005 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -9,7 +9,7 @@ #define MAXBITS 15 const char inflate9_copyright[] = - " inflate9 1.2.1 Copyright 1995-2003 Mark Adler "; + " inflate9 1.2.3 Copyright 1995-2005 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -64,7 +64,7 @@ unsigned short FAR *work; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, - 133, 133, 133, 133, 144, 76, 66}; + 133, 133, 133, 133, 144, 201, 196}; static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, @@ -128,7 +128,7 @@ unsigned short FAR *work; left -= count[len]; if (left < 0) return -1; /* over-subscribed */ } - if (left > 0 && (type == CODES || (codes - count[0] != 1))) + if (left > 0 && (type == CODES || max != 1)) return -1; /* incomplete set */ /* generate offsets into symbol table for each length for sorting */ diff --git a/zlib/contrib/infback9/inftree9.h b/zlib/contrib/infback9/inftree9.h index 6a699a798f5..a268084eea6 100644 --- a/zlib/contrib/infback9/inftree9.h +++ b/zlib/contrib/infback9/inftree9.h @@ -36,12 +36,12 @@ typedef struct { */ /* Maximum size of dynamic tree. The maximum found in a long but non- - exhaustive search was 1004 code structures (850 for length/literals - and 154 for distances, the latter actually the result of an + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an exhaustive search). The true maximum is not known, but the value below is more than safe. */ -#define ENOUGH 1440 -#define MAXD 154 +#define ENOUGH 2048 +#define MAXD 592 /* Type of code to build for inftable() */ typedef enum { diff --git a/zlib/contrib/inflate86/inffas86.c b/zlib/contrib/inflate86/inffas86.c index 4534693a47d..6da76358ccb 100644 --- a/zlib/contrib/inflate86/inffas86.c +++ b/zlib/contrib/inflate86/inffas86.c @@ -7,6 +7,15 @@ * Copyright (C) 2003 Chris Anderson <christop@charm.net> * Please use the copyright conditions above. * + * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also + * slightly quicker on x86 systems because, instead of using rep movsb to copy + * data, it uses rep movsw, which moves data in 2-byte chunks instead of single + * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates + * from http://fedora.linux.duke.edu/fc1_x86_64 + * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with + * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, + * when decompressing mozilla-source-1.3.tar.gz. + * * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at * the moment. I have successfully compiled and tested this code with gcc2.96, @@ -65,33 +74,44 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ { struct inflate_state FAR *state; struct inffast_ar { - void *esp; /* esp save */ - unsigned char FAR *in; /* local strm->next_in */ - unsigned char FAR *last; /* while in < last, enough input available */ - unsigned char FAR *out; /* local strm->next_out */ - unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ - unsigned char FAR *end; /* while out < end, enough space available */ - unsigned wsize; /* window size or zero if not using window */ - unsigned write; /* window write index */ - unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ - unsigned long hold; /* local strm->hold */ - unsigned bits; /* local strm->bits */ - code const FAR *lcode; /* local strm->lencode */ - code const FAR *dcode; /* local strm->distcode */ - unsigned lmask; /* mask for first level of length codes */ - unsigned dmask; /* mask for first level of distance codes */ - unsigned len; /* match length, unused bytes */ - unsigned dist; /* match distance */ - unsigned status; /* this is set when state changes */ +/* 64 32 x86 x86_64 */ +/* ar offset register */ +/* 0 0 */ void *esp; /* esp save */ +/* 8 4 */ void *ebp; /* ebp save */ +/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ +/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ +/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ +/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ +/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ +/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ +/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ +/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ +/* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */ +/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ +/* 92 48 */ unsigned wsize; /* window size */ +/* 96 52 */ unsigned write; /* window write index */ +/*100 56 */ unsigned lmask; /* r12 mask for lcode */ +/*104 60 */ unsigned dmask; /* r13 mask for dcode */ +/*108 64 */ unsigned len; /* r14 match length */ +/*112 68 */ unsigned dist; /* r15 match distance */ +/*116 72 */ unsigned status; /* set when state chng*/ } ar; +#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) +#define PAD_AVAIL_IN 6 +#define PAD_AVAIL_OUT 258 +#else +#define PAD_AVAIL_IN 5 +#define PAD_AVAIL_OUT 257 +#endif + /* copy state to local variables */ state = (struct inflate_state FAR *)strm->state; ar.in = strm->next_in; - ar.last = ar.in + (strm->avail_in - 5); + ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); ar.out = strm->next_out; ar.beg = ar.out - (start - strm->avail_out); - ar.end = ar.out + (strm->avail_out - 257); + ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); ar.wsize = state->wsize; ar.write = state->write; ar.window = state->window; @@ -105,32 +125,368 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ /* decode literals and length/distances until end-of-block or not enough input data or output space */ - /* align in on 2 byte boundary */ - if (((unsigned long)(void *)ar.in & 0x1) != 0) { + /* align in on 1/2 hold size boundary */ + while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { ar.hold += (unsigned long)*ar.in++ << ar.bits; ar.bits += 8; } -#if defined( __GNUC__ ) || defined( __ICC ) +#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) + __asm__ __volatile__ ( +" leaq %0, %%rax\n" +" movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */ +" movq %%rsp, (%%rax)\n" +" movq %%rax, %%rsp\n" /* make rsp point to &ar */ +" movq 16(%%rsp), %%rsi\n" /* rsi = in */ +" movq 32(%%rsp), %%rdi\n" /* rdi = out */ +" movq 24(%%rsp), %%r9\n" /* r9 = last */ +" movq 48(%%rsp), %%r10\n" /* r10 = end */ +" movq 64(%%rsp), %%rbp\n" /* rbp = lcode */ +" movq 72(%%rsp), %%r11\n" /* r11 = dcode */ +" movq 80(%%rsp), %%rdx\n" /* rdx = hold */ +" movl 88(%%rsp), %%ebx\n" /* ebx = bits */ +" movl 100(%%rsp), %%r12d\n" /* r12d = lmask */ +" movl 104(%%rsp), %%r13d\n" /* r13d = dmask */ + /* r14d = len */ + /* r15d = dist */ +" cld\n" +" cmpq %%rdi, %%r10\n" +" je .L_one_time\n" /* if only one decode left */ +" cmpq %%rsi, %%r9\n" +" je .L_one_time\n" +" jmp .L_do_loop\n" + +".L_one_time:\n" +" movq %%r12, %%r8\n" /* r8 = lmask */ +" cmpb $32, %%bl\n" +" ja .L_get_length_code_one_time\n" + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ +" jmp .L_get_length_code_one_time\n" + +".align 32,0x90\n" +".L_while_test:\n" +" cmpq %%rdi, %%r10\n" +" jbe .L_break_loop\n" +" cmpq %%rsi, %%r9\n" +" jbe .L_break_loop\n" + +".L_do_loop:\n" +" movq %%r12, %%r8\n" /* r8 = lmask */ +" cmpb $32, %%bl\n" +" ja .L_get_length_code\n" /* if (32 < bits) */ + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ + +".L_get_length_code:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ + +" movb %%ah, %%cl\n" /* cl = this.bits */ +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ + +" testb %%al, %%al\n" +" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ + +" movq %%r12, %%r8\n" /* r8 = lmask */ +" shrl $16, %%eax\n" /* output this.val char */ +" stosb\n" + +".L_get_length_code_one_time:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ + +".L_dolen:\n" +" movb %%ah, %%cl\n" /* cl = this.bits */ +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ + +" testb %%al, %%al\n" +" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ + +" shrl $16, %%eax\n" /* output this.val char */ +" stosb\n" +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_length_base:\n" +" movl %%eax, %%r14d\n" /* len = this */ +" shrl $16, %%r14d\n" /* len = this.val */ +" movb %%al, %%cl\n" + +" testb $16, %%al\n" +" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_decode_distance\n" /* if (!op) */ + +".L_add_bits_to_len:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrq %%cl, %%rdx\n" +" addl %%eax, %%r14d\n" /* len += hold & mask[op] */ + +".L_decode_distance:\n" +" movq %%r13, %%r8\n" /* r8 = dmask */ +" cmpb $32, %%bl\n" +" ja .L_get_distance_code\n" /* if (32 < bits) */ + +" lodsl\n" /* eax = *(uint *)in++ */ +" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ +" addb $32, %%bl\n" /* bits += 32 */ +" shlq %%cl, %%rax\n" +" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ + +".L_get_distance_code:\n" +" andq %%rdx, %%r8\n" /* r8 &= hold */ +" movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */ + +".L_dodist:\n" +" movl %%eax, %%r15d\n" /* dist = this */ +" shrl $16, %%r15d\n" /* dist = this.val */ +" movb %%ah, %%cl\n" +" subb %%ah, %%bl\n" /* bits -= this.bits */ +" shrq %%cl, %%rdx\n" /* hold >>= this.bits */ +" movb %%al, %%cl\n" /* cl = this.op */ + +" testb $16, %%al\n" /* if ((op & 16) == 0) */ +" jz .L_test_for_second_level_dist\n" +" andb $15, %%cl\n" /* op &= 15 */ +" jz .L_check_dist_one\n" + +".L_add_bits_to_dist:\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" /* (1 << op) - 1 */ +" andl %%edx, %%eax\n" /* eax &= hold */ +" shrq %%cl, %%rdx\n" +" addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */ + +".L_check_window:\n" +" movq %%rsi, %%r8\n" /* save in so from can use it's reg */ +" movq %%rdi, %%rax\n" +" subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */ + +" cmpl %%r15d, %%eax\n" +" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ + +" movl %%r14d, %%ecx\n" /* ecx = len */ +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ + +" sarl %%ecx\n" +" jnc .L_copy_two\n" /* if len % 2 == 0 */ + +" rep movsw\n" +" movb (%%rsi), %%al\n" +" movb %%al, (%%rdi)\n" +" incq %%rdi\n" + +" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ +" jmp .L_while_test\n" + +".L_copy_two:\n" +" rep movsw\n" +" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_check_dist_one:\n" +" cmpl $1, %%r15d\n" /* if dist 1, is a memset */ +" jne .L_check_window\n" +" cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */ +" je .L_check_window\n" + +" movl %%r14d, %%ecx\n" /* ecx = len */ +" movb -1(%%rdi), %%al\n" +" movb %%al, %%ah\n" + +" sarl %%ecx\n" +" jnc .L_set_two\n" +" movb %%al, (%%rdi)\n" +" incq %%rdi\n" + +".L_set_two:\n" +" rep stosw\n" +" jmp .L_while_test\n" + +".align 32,0x90\n" +".L_test_for_second_level_length:\n" +" testb $64, %%al\n" +" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl %%r14d, %%eax\n" /* eax += len */ +" movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ +" jmp .L_dolen\n" + +".align 32,0x90\n" +".L_test_for_second_level_dist:\n" +" testb $64, %%al\n" +" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ + +" xorl %%eax, %%eax\n" +" incl %%eax\n" +" shll %%cl, %%eax\n" +" decl %%eax\n" +" andl %%edx, %%eax\n" /* eax &= hold */ +" addl %%r15d, %%eax\n" /* eax += dist */ +" movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ +" jmp .L_dodist\n" + +".align 32,0x90\n" +".L_clip_window:\n" +" movl %%eax, %%ecx\n" /* ecx = nbytes */ +" movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */ +" negl %%ecx\n" /* nbytes = -nbytes */ + +" cmpl %%r15d, %%eax\n" +" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ + +" addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */ +" cmpl $0, 96(%%rsp)\n" +" jne .L_wrap_around_window\n" /* if (write != 0) */ + +" movq 56(%%rsp), %%rsi\n" /* from = window */ +" subl %%ecx, %%eax\n" /* eax -= nbytes */ +" addq %%rax, %%rsi\n" /* from += wsize - nbytes */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%r14d\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* eax -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = &out[ -dist ] */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_wrap_around_window:\n" +" movl 96(%%rsp), %%eax\n" /* eax = write */ +" cmpl %%eax, %%ecx\n" +" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ + +" movl 92(%%rsp), %%esi\n" /* from = wsize */ +" addq 56(%%rsp), %%rsi\n" /* from += window */ +" addq %%rax, %%rsi\n" /* from += write */ +" subq %%rcx, %%rsi\n" /* from -= nbytes */ +" subl %%eax, %%ecx\n" /* nbytes -= write */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq 56(%%rsp), %%rsi\n" /* from = window */ +" movl 96(%%rsp), %%ecx\n" /* nbytes = write */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ +" jmp .L_do_copy\n" + +".align 32,0x90\n" +".L_contiguous_in_window:\n" +" movq 56(%%rsp), %%rsi\n" /* rsi = window */ +" addq %%rax, %%rsi\n" +" subq %%rcx, %%rsi\n" /* from += write - nbytes */ + +" movl %%r14d, %%eax\n" /* eax = len */ +" cmpl %%ecx, %%eax\n" +" jbe .L_do_copy\n" /* if (nbytes >= len) */ + +" subl %%ecx, %%eax\n" /* len -= nbytes */ +" rep movsb\n" +" movq %%rdi, %%rsi\n" +" subq %%r15, %%rsi\n" /* from = out - dist */ +" jmp .L_do_copy\n" /* if (nbytes >= len) */ + +".align 32,0x90\n" +".L_do_copy:\n" +" movl %%eax, %%ecx\n" /* ecx = len */ +" rep movsb\n" + +" movq %%r8, %%rsi\n" /* move in back to %esi, toss from */ +" jmp .L_while_test\n" + +".L_test_for_end_of_block:\n" +" testb $32, %%al\n" +" jz .L_invalid_literal_length_code\n" +" movl $1, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_literal_length_code:\n" +" movl $2, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_code:\n" +" movl $3, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_invalid_distance_too_far:\n" +" movl $4, 116(%%rsp)\n" +" jmp .L_break_loop_with_status\n" + +".L_break_loop:\n" +" movl $0, 116(%%rsp)\n" + +".L_break_loop_with_status:\n" +/* put in, out, bits, and hold back into ar and pop esp */ +" movq %%rsi, 16(%%rsp)\n" /* in */ +" movq %%rdi, 32(%%rsp)\n" /* out */ +" movl %%ebx, 88(%%rsp)\n" /* bits */ +" movq %%rdx, 80(%%rsp)\n" /* hold */ +" movq (%%rsp), %%rax\n" /* restore rbp and rsp */ +" movq 8(%%rsp), %%rbp\n" +" movq %%rax, %%rsp\n" + : + : "m" (ar) + : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" + ); +#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 ) __asm__ __volatile__ ( " leal %0, %%eax\n" -" pushf\n" -" pushl %%ebp\n" -" movl %%esp, (%%eax)\n" +" movl %%esp, (%%eax)\n" /* save esp, ebp */ +" movl %%ebp, 4(%%eax)\n" " movl %%eax, %%esp\n" -" movl 4(%%esp), %%esi\n" /* esi = in */ -" movl 12(%%esp), %%edi\n" /* edi = out */ -" movl 36(%%esp), %%edx\n" /* edx = hold */ -" movl 40(%%esp), %%ebx\n" /* ebx = bits */ -" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ +" movl 8(%%esp), %%esi\n" /* esi = in */ +" movl 16(%%esp), %%edi\n" /* edi = out */ +" movl 40(%%esp), %%edx\n" /* edx = hold */ +" movl 44(%%esp), %%ebx\n" /* ebx = bits */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ " cld\n" " jmp .L_do_loop\n" +".align 32,0x90\n" ".L_while_test:\n" -" cmpl %%edi, 20(%%esp)\n" +" cmpl %%edi, 24(%%esp)\n" /* out < end */ " jbe .L_break_loop\n" -" cmpl %%esi, 8(%%esp)\n" +" cmpl %%esi, 12(%%esp)\n" /* in < last */ " jbe .L_break_loop\n" ".L_do_loop:\n" @@ -145,7 +501,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ ".L_get_length_code:\n" -" movl 52(%%esp), %%eax\n" /* eax = lmask */ +" movl 56(%%esp), %%eax\n" /* eax = lmask */ " andl %%edx, %%eax\n" /* eax &= hold */ " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ @@ -161,10 +517,11 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ " stosb\n" " jmp .L_while_test\n" +".align 32,0x90\n" ".L_test_for_length_base:\n" " movl %%eax, %%ecx\n" /* len = this */ " shrl $16, %%ecx\n" /* len = this.val */ -" movl %%ecx, 60(%%esp)\n" /* len = this */ +" movl %%ecx, 64(%%esp)\n" /* save len */ " movb %%al, %%cl\n" " testb $16, %%al\n" @@ -184,13 +541,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ " movb %%ch, %%cl\n" /* move op back to ecx */ ".L_add_bits_to_len:\n" -" movl $1, %%eax\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" " shll %%cl, %%eax\n" " decl %%eax\n" -" subb %%cl, %%bl\n" " andl %%edx, %%eax\n" /* eax &= hold */ " shrl %%cl, %%edx\n" -" addl %%eax, 60(%%esp)\n" /* len += hold & mask[op] */ +" addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */ ".L_decode_distance:\n" " cmpb $15, %%bl\n" @@ -204,8 +562,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ ".L_get_distance_code:\n" -" movl 56(%%esp), %%eax\n" /* eax = dmask */ -" movl 48(%%esp), %%ecx\n" /* ecx = dcode */ +" movl 60(%%esp), %%eax\n" /* eax = dmask */ +" movl 36(%%esp), %%ecx\n" /* ecx = dcode */ " andl %%edx, %%eax\n" /* eax &= hold */ " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ @@ -234,223 +592,228 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ " movb %%ch, %%cl\n" /* move op back to ecx */ ".L_add_bits_to_dist:\n" -" movl $1, %%eax\n" +" subb %%cl, %%bl\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" " shll %%cl, %%eax\n" " decl %%eax\n" /* (1 << op) - 1 */ -" subb %%cl, %%bl\n" " andl %%edx, %%eax\n" /* eax &= hold */ " shrl %%cl, %%edx\n" " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ ".L_check_window:\n" -" movl %%esi, 4(%%esp)\n" /* save in so from can use it's reg */ +" movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */ " movl %%edi, %%eax\n" -" subl 16(%%esp), %%eax\n" /* nbytes = out - beg */ +" subl 20(%%esp), %%eax\n" /* nbytes = out - beg */ " cmpl %%ebp, %%eax\n" " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ -" movl 60(%%esp), %%ecx\n" +" movl 64(%%esp), %%ecx\n" /* ecx = len */ " movl %%edi, %%esi\n" " subl %%ebp, %%esi\n" /* from = out - dist */ -" subl $3, %%ecx\n" /* copy from to out */ +" sarl %%ecx\n" +" jnc .L_copy_two\n" /* if len % 2 == 0 */ + +" rep movsw\n" " movb (%%esi), %%al\n" " movb %%al, (%%edi)\n" -" movb 1(%%esi), %%al\n" -" movb 2(%%esi), %%ah\n" -" addl $3, %%esi\n" -" movb %%al, 1(%%edi)\n" -" movb %%ah, 2(%%edi)\n" -" addl $3, %%edi\n" -" rep movsb\n" +" incl %%edi\n" -" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */ -" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ " jmp .L_while_test\n" +".L_copy_two:\n" +" rep movsw\n" +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ +" jmp .L_while_test\n" + +".align 32,0x90\n" ".L_check_dist_one:\n" " cmpl $1, %%ebp\n" /* if dist 1, is a memset */ " jne .L_check_window\n" -" cmpl %%edi, 16(%%esp)\n" -" je .L_check_window\n" +" cmpl %%edi, 20(%%esp)\n" +" je .L_check_window\n" /* out == beg, if outside window */ + +" movl 64(%%esp), %%ecx\n" /* ecx = len */ +" movb -1(%%edi), %%al\n" +" movb %%al, %%ah\n" + +" sarl %%ecx\n" +" jnc .L_set_two\n" +" movb %%al, (%%edi)\n" +" incl %%edi\n" -" decl %%edi\n" -" movl 60(%%esp), %%ecx\n" -" movb (%%edi), %%al\n" -" subl $3, %%ecx\n" - -" movb %%al, 1(%%edi)\n" /* memset out with from[-1] */ -" movb %%al, 2(%%edi)\n" -" movb %%al, 3(%%edi)\n" -" addl $4, %%edi\n" -" rep stosb\n" -" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ +".L_set_two:\n" +" rep stosw\n" +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ " jmp .L_while_test\n" +".align 32,0x90\n" ".L_test_for_second_level_length:\n" " testb $64, %%al\n" " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ -" movl $1, %%eax\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" " shll %%cl, %%eax\n" " decl %%eax\n" " andl %%edx, %%eax\n" /* eax &= hold */ -" addl 60(%%esp), %%eax\n" /* eax += this.val */ +" addl 64(%%esp), %%eax\n" /* eax += len */ " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ " jmp .L_dolen\n" +".align 32,0x90\n" ".L_test_for_second_level_dist:\n" " testb $64, %%al\n" " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ -" movl $1, %%eax\n" +" xorl %%eax, %%eax\n" +" incl %%eax\n" " shll %%cl, %%eax\n" " decl %%eax\n" " andl %%edx, %%eax\n" /* eax &= hold */ -" addl %%ebp, %%eax\n" /* eax += this.val */ -" movl 48(%%esp), %%ecx\n" /* ecx = dcode */ +" addl %%ebp, %%eax\n" /* eax += dist */ +" movl 36(%%esp), %%ecx\n" /* ecx = dcode */ " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ " jmp .L_dodist\n" +".align 32,0x90\n" ".L_clip_window:\n" " movl %%eax, %%ecx\n" -" movl 24(%%esp), %%eax\n" /* prepare for dist compare */ +" movl 48(%%esp), %%eax\n" /* eax = wsize */ " negl %%ecx\n" /* nbytes = -nbytes */ -" movl 32(%%esp), %%esi\n" /* from = window */ +" movl 28(%%esp), %%esi\n" /* from = window */ " cmpl %%ebp, %%eax\n" " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ -" cmpl $0, 28(%%esp)\n" +" cmpl $0, 52(%%esp)\n" " jne .L_wrap_around_window\n" /* if (write != 0) */ " subl %%ecx, %%eax\n" " addl %%eax, %%esi\n" /* from += wsize - nbytes */ -" movl 60(%%esp), %%eax\n" -" cmpl %%ecx, %%eax\n" -" jbe .L_do_copy1\n" /* if (nbytes >= len) */ - -" subl %%ecx, %%eax\n" /* len -= nbytes */ -" rep movsb\n" -" movl %%edi, %%esi\n" -" subl %%ebp, %%esi\n" /* from = out - dist */ -" jmp .L_do_copy1\n" - +" movl 64(%%esp), %%eax\n" /* eax = len */ " cmpl %%ecx, %%eax\n" -" jbe .L_do_copy1\n" /* if (nbytes >= len) */ +" jbe .L_do_copy\n" /* if (nbytes >= len) */ " subl %%ecx, %%eax\n" /* len -= nbytes */ " rep movsb\n" " movl %%edi, %%esi\n" " subl %%ebp, %%esi\n" /* from = out - dist */ -" jmp .L_do_copy1\n" +" jmp .L_do_copy\n" +".align 32,0x90\n" ".L_wrap_around_window:\n" -" movl 28(%%esp), %%eax\n" +" movl 52(%%esp), %%eax\n" /* eax = write */ " cmpl %%eax, %%ecx\n" " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ -" addl 24(%%esp), %%esi\n" -" addl %%eax, %%esi\n" -" subl %%ecx, %%esi\n" /* from += wsize + write - nbytes */ +" addl 48(%%esp), %%esi\n" /* from += wsize */ +" addl %%eax, %%esi\n" /* from += write */ +" subl %%ecx, %%esi\n" /* from -= nbytes */ " subl %%eax, %%ecx\n" /* nbytes -= write */ -" movl 60(%%esp), %%eax\n" +" movl 64(%%esp), %%eax\n" /* eax = len */ " cmpl %%ecx, %%eax\n" -" jbe .L_do_copy1\n" /* if (nbytes >= len) */ +" jbe .L_do_copy\n" /* if (nbytes >= len) */ " subl %%ecx, %%eax\n" /* len -= nbytes */ " rep movsb\n" -" movl 32(%%esp), %%esi\n" /* from = window */ -" movl 28(%%esp), %%ecx\n" /* nbytes = write */ +" movl 28(%%esp), %%esi\n" /* from = window */ +" movl 52(%%esp), %%ecx\n" /* nbytes = write */ " cmpl %%ecx, %%eax\n" -" jbe .L_do_copy1\n" /* if (nbytes >= len) */ +" jbe .L_do_copy\n" /* if (nbytes >= len) */ " subl %%ecx, %%eax\n" /* len -= nbytes */ " rep movsb\n" " movl %%edi, %%esi\n" " subl %%ebp, %%esi\n" /* from = out - dist */ -" jmp .L_do_copy1\n" +" jmp .L_do_copy\n" +".align 32,0x90\n" ".L_contiguous_in_window:\n" " addl %%eax, %%esi\n" " subl %%ecx, %%esi\n" /* from += write - nbytes */ -" movl 60(%%esp), %%eax\n" +" movl 64(%%esp), %%eax\n" /* eax = len */ " cmpl %%ecx, %%eax\n" -" jbe .L_do_copy1\n" /* if (nbytes >= len) */ +" jbe .L_do_copy\n" /* if (nbytes >= len) */ " subl %%ecx, %%eax\n" /* len -= nbytes */ " rep movsb\n" " movl %%edi, %%esi\n" " subl %%ebp, %%esi\n" /* from = out - dist */ +" jmp .L_do_copy\n" /* if (nbytes >= len) */ -".L_do_copy1:\n" +".align 32,0x90\n" +".L_do_copy:\n" " movl %%eax, %%ecx\n" " rep movsb\n" -" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */ -" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ +" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ +" movl 32(%%esp), %%ebp\n" /* ebp = lcode */ " jmp .L_while_test\n" ".L_test_for_end_of_block:\n" " testb $32, %%al\n" " jz .L_invalid_literal_length_code\n" -" movl $1, 68(%%esp)\n" +" movl $1, 72(%%esp)\n" " jmp .L_break_loop_with_status\n" ".L_invalid_literal_length_code:\n" -" movl $2, 68(%%esp)\n" +" movl $2, 72(%%esp)\n" " jmp .L_break_loop_with_status\n" ".L_invalid_distance_code:\n" -" movl $3, 68(%%esp)\n" +" movl $3, 72(%%esp)\n" " jmp .L_break_loop_with_status\n" ".L_invalid_distance_too_far:\n" -" movl 4(%%esp), %%esi\n" -" movl $4, 68(%%esp)\n" +" movl 8(%%esp), %%esi\n" +" movl $4, 72(%%esp)\n" " jmp .L_break_loop_with_status\n" ".L_break_loop:\n" -" movl $0, 68(%%esp)\n" +" movl $0, 72(%%esp)\n" ".L_break_loop_with_status:\n" /* put in, out, bits, and hold back into ar and pop esp */ -" movl %%esi, 4(%%esp)\n" -" movl %%edi, 12(%%esp)\n" -" movl %%ebx, 40(%%esp)\n" -" movl %%edx, 36(%%esp)\n" +" movl %%esi, 8(%%esp)\n" /* save in */ +" movl %%edi, 16(%%esp)\n" /* save out */ +" movl %%ebx, 44(%%esp)\n" /* save bits */ +" movl %%edx, 40(%%esp)\n" /* save hold */ +" movl 4(%%esp), %%ebp\n" /* restore esp, ebp */ " movl (%%esp), %%esp\n" -" popl %%ebp\n" -" popf\n" : : "m" (ar) : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" ); -#elif defined( _MSC_VER ) +#elif defined( _MSC_VER ) && ! defined( _M_AMD64 ) __asm { lea eax, ar - pushfd - push ebp - mov [eax], esp + mov [eax], esp /* save esp, ebp */ + mov [eax+4], ebp mov esp, eax - mov esi, [esp+4] /* esi = in */ - mov edi, [esp+12] /* edi = out */ - mov edx, [esp+36] /* edx = hold */ - mov ebx, [esp+40] /* ebx = bits */ - mov ebp, [esp+44] /* ebp = lcode */ + mov esi, [esp+8] /* esi = in */ + mov edi, [esp+16] /* edi = out */ + mov edx, [esp+40] /* edx = hold */ + mov ebx, [esp+44] /* ebx = bits */ + mov ebp, [esp+32] /* ebp = lcode */ cld jmp L_do_loop +ALIGN 4 L_while_test: - cmp [esp+20], edi + cmp [esp+24], edi jbe L_break_loop - cmp [esp+8], esi + cmp [esp+12], esi jbe L_break_loop L_do_loop: @@ -465,7 +828,7 @@ L_do_loop: or edx, eax /* hold |= *((ushort *)in)++ << bits */ L_get_length_code: - mov eax, [esp+52] /* eax = lmask */ + mov eax, [esp+56] /* eax = lmask */ and eax, edx /* eax &= hold */ mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ @@ -481,10 +844,11 @@ L_dolen: stosb jmp L_while_test +ALIGN 4 L_test_for_length_base: mov ecx, eax /* len = this */ shr ecx, 16 /* len = this.val */ - mov [esp+60], ecx /* len = this */ + mov [esp+64], ecx /* save len */ mov cl, al test al, 16 @@ -504,13 +868,14 @@ L_test_for_length_base: mov cl, ch /* move op back to ecx */ L_add_bits_to_len: - mov eax, 1 + sub bl, cl + xor eax, eax + inc eax shl eax, cl dec eax - sub bl, cl and eax, edx /* eax &= hold */ shr edx, cl - add [esp+60], eax /* len += hold & mask[op] */ + add [esp+64], eax /* len += hold & mask[op] */ L_decode_distance: cmp bl, 15 @@ -524,8 +889,8 @@ L_decode_distance: or edx, eax /* hold |= *((ushort *)in)++ << bits */ L_get_distance_code: - mov eax, [esp+56] /* eax = dmask */ - mov ecx, [esp+48] /* ecx = dcode */ + mov eax, [esp+60] /* eax = dmask */ + mov ecx, [esp+36] /* ecx = dcode */ and eax, edx /* eax &= hold */ mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ @@ -554,200 +919,207 @@ L_dodist: mov cl, ch /* move op back to ecx */ L_add_bits_to_dist: - mov eax, 1 + sub bl, cl + xor eax, eax + inc eax shl eax, cl dec eax /* (1 << op) - 1 */ - sub bl, cl and eax, edx /* eax &= hold */ shr edx, cl add ebp, eax /* dist += hold & ((1 << op) - 1) */ L_check_window: - mov [esp+4], esi /* save in so from can use it's reg */ + mov [esp+8], esi /* save in so from can use it's reg */ mov eax, edi - sub eax, [esp+16] /* nbytes = out - beg */ + sub eax, [esp+20] /* nbytes = out - beg */ cmp eax, ebp jb L_clip_window /* if (dist > nbytes) 4.2% */ - mov ecx, [esp+60] + mov ecx, [esp+64] /* ecx = len */ mov esi, edi sub esi, ebp /* from = out - dist */ - sub ecx, 3 /* copy from to out */ + sar ecx, 1 + jnc L_copy_two + + rep movsw mov al, [esi] mov [edi], al - mov al, [esi+1] - mov ah, [esi+2] - add esi, 3 - mov [edi+1], al - mov [edi+2], ah - add edi, 3 - rep movsb + inc edi + + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ + jmp L_while_test - mov esi, [esp+4] /* move in back to %esi, toss from */ - mov ebp, [esp+44] /* ebp = lcode */ +L_copy_two: + rep movsw + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ jmp L_while_test +ALIGN 4 L_check_dist_one: cmp ebp, 1 /* if dist 1, is a memset */ jne L_check_window - cmp [esp+16], edi - je L_check_window - - dec edi - mov ecx, [esp+60] - mov al, [edi] - sub ecx, 3 - - mov [edi+1], al /* memset out with from[-1] */ - mov [edi+2], al - mov [edi+3], al - add edi, 4 - rep stosb - mov ebp, [esp+44] /* ebp = lcode */ + cmp [esp+20], edi + je L_check_window /* out == beg, if outside window */ + + mov ecx, [esp+64] /* ecx = len */ + mov al, [edi-1] + mov ah, al + + sar ecx, 1 + jnc L_set_two + mov [edi], al /* memset out with from[-1] */ + inc edi + +L_set_two: + rep stosw + mov ebp, [esp+32] /* ebp = lcode */ jmp L_while_test +ALIGN 4 L_test_for_second_level_length: test al, 64 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ - mov eax, 1 + xor eax, eax + inc eax shl eax, cl dec eax and eax, edx /* eax &= hold */ - add eax, [esp+60] /* eax += this.val */ + add eax, [esp+64] /* eax += len */ mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ jmp L_dolen +ALIGN 4 L_test_for_second_level_dist: test al, 64 jnz L_invalid_distance_code /* if ((op & 64) != 0) */ - mov eax, 1 + xor eax, eax + inc eax shl eax, cl dec eax and eax, edx /* eax &= hold */ - add eax, ebp /* eax += this.val */ - mov ecx, [esp+48] /* ecx = dcode */ + add eax, ebp /* eax += dist */ + mov ecx, [esp+36] /* ecx = dcode */ mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ jmp L_dodist +ALIGN 4 L_clip_window: mov ecx, eax - mov eax, [esp+24] /* prepare for dist compare */ + mov eax, [esp+48] /* eax = wsize */ neg ecx /* nbytes = -nbytes */ - mov esi, [esp+32] /* from = window */ + mov esi, [esp+28] /* from = window */ cmp eax, ebp jb L_invalid_distance_too_far /* if (dist > wsize) */ add ecx, ebp /* nbytes = dist - nbytes */ - cmp dword ptr [esp+28], 0 + cmp dword ptr [esp+52], 0 jne L_wrap_around_window /* if (write != 0) */ sub eax, ecx add esi, eax /* from += wsize - nbytes */ - mov eax, [esp+60] - cmp eax, ecx - jbe L_do_copy1 /* if (nbytes >= len) */ - - sub eax, ecx /* len -= nbytes */ - rep movsb - mov esi, edi - sub esi, ebp /* from = out - dist */ - jmp L_do_copy1 - + mov eax, [esp+64] /* eax = len */ cmp eax, ecx - jbe L_do_copy1 /* if (nbytes >= len) */ + jbe L_do_copy /* if (nbytes >= len) */ sub eax, ecx /* len -= nbytes */ rep movsb mov esi, edi sub esi, ebp /* from = out - dist */ - jmp L_do_copy1 + jmp L_do_copy +ALIGN 4 L_wrap_around_window: - mov eax, [esp+28] + mov eax, [esp+52] /* eax = write */ cmp ecx, eax jbe L_contiguous_in_window /* if (write >= nbytes) */ - add esi, [esp+24] - add esi, eax - sub esi, ecx /* from += wsize + write - nbytes */ + add esi, [esp+48] /* from += wsize */ + add esi, eax /* from += write */ + sub esi, ecx /* from -= nbytes */ sub ecx, eax /* nbytes -= write */ - mov eax, [esp+60] + mov eax, [esp+64] /* eax = len */ cmp eax, ecx - jbe L_do_copy1 /* if (nbytes >= len) */ + jbe L_do_copy /* if (nbytes >= len) */ sub eax, ecx /* len -= nbytes */ rep movsb - mov esi, [esp+32] /* from = window */ - mov ecx, [esp+28] /* nbytes = write */ + mov esi, [esp+28] /* from = window */ + mov ecx, [esp+52] /* nbytes = write */ cmp eax, ecx - jbe L_do_copy1 /* if (nbytes >= len) */ + jbe L_do_copy /* if (nbytes >= len) */ sub eax, ecx /* len -= nbytes */ rep movsb mov esi, edi sub esi, ebp /* from = out - dist */ - jmp L_do_copy1 + jmp L_do_copy +ALIGN 4 L_contiguous_in_window: add esi, eax sub esi, ecx /* from += write - nbytes */ - mov eax, [esp+60] + mov eax, [esp+64] /* eax = len */ cmp eax, ecx - jbe L_do_copy1 /* if (nbytes >= len) */ + jbe L_do_copy /* if (nbytes >= len) */ sub eax, ecx /* len -= nbytes */ rep movsb mov esi, edi sub esi, ebp /* from = out - dist */ + jmp L_do_copy -L_do_copy1: +ALIGN 4 +L_do_copy: mov ecx, eax rep movsb - mov esi, [esp+4] /* move in back to %esi, toss from */ - mov ebp, [esp+44] /* ebp = lcode */ + mov esi, [esp+8] /* move in back to %esi, toss from */ + mov ebp, [esp+32] /* ebp = lcode */ jmp L_while_test L_test_for_end_of_block: test al, 32 jz L_invalid_literal_length_code - mov dword ptr [esp+68], 1 + mov dword ptr [esp+72], 1 jmp L_break_loop_with_status L_invalid_literal_length_code: - mov dword ptr [esp+68], 2 + mov dword ptr [esp+72], 2 jmp L_break_loop_with_status L_invalid_distance_code: - mov dword ptr [esp+68], 3 + mov dword ptr [esp+72], 3 jmp L_break_loop_with_status L_invalid_distance_too_far: mov esi, [esp+4] - mov dword ptr [esp+68], 4 + mov dword ptr [esp+72], 4 jmp L_break_loop_with_status L_break_loop: - mov dword ptr [esp+68], 0 + mov dword ptr [esp+72], 0 L_break_loop_with_status: /* put in, out, bits, and hold back into ar and pop esp */ - mov [esp+4], esi - mov [esp+12], edi - mov [esp+40], ebx - mov [esp+36], edx + mov [esp+8], esi /* save in */ + mov [esp+16], edi /* save out */ + mov [esp+44], ebx /* save bits */ + mov [esp+40], edx /* save hold */ + mov ebp, [esp+4] /* restore esp, ebp */ mov esp, [esp] - pop ebp - popfd } +#else +#error "x86 architecture not defined" #endif if (ar.status > 1) { @@ -772,10 +1144,12 @@ L_break_loop_with_status: /* update state and return */ strm->next_in = ar.in; strm->next_out = ar.out; - strm->avail_in = (unsigned)(ar.in < ar.last ? 5 + (ar.last - ar.in) : - 5 - (ar.in - ar.last)); - strm->avail_out = (unsigned)(ar.out < ar.end ? 257 + (ar.end - ar.out) : - 257 - (ar.out - ar.end)); + strm->avail_in = (unsigned)(ar.in < ar.last ? + PAD_AVAIL_IN + (ar.last - ar.in) : + PAD_AVAIL_IN - (ar.in - ar.last)); + strm->avail_out = (unsigned)(ar.out < ar.end ? + PAD_AVAIL_OUT + (ar.end - ar.out) : + PAD_AVAIL_OUT - (ar.out - ar.end)); state->hold = ar.hold; state->bits = ar.bits; return; diff --git a/zlib/contrib/inflate86/inffast.S b/zlib/contrib/inflate86/inffast.S index f9bffd5eb9d..2245a2905bd 100644 --- a/zlib/contrib/inflate86/inffast.S +++ b/zlib/contrib/inflate86/inffast.S @@ -188,17 +188,8 @@ /* * typedef enum inflate_mode consts, in inflate.h */ -#ifndef NO_GUNZIP -#define GUNZIP -#endif - -#ifdef GUNZIP #define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */ #define INFLATE_MODE_BAD 26 -#else -#define INFLATE_MODE_TYPE 3 -#define INFLATE_MODE_BAD 17 -#endif #if ! defined( USE_MMX ) && ! defined( NO_MMX ) diff --git a/zlib/contrib/masm686/match.asm b/zlib/contrib/masm686/match.asm index 2287804d443..4b03a71abd5 100644 --- a/zlib/contrib/masm686/match.asm +++ b/zlib/contrib/masm686/match.asm @@ -1,408 +1,413 @@ - -; match.asm -- Pentium-Pro optimized version of longest_match() -; -; Updated for zlib 1.1.3 and converted to MASM 6.1x -; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> -; and Chuck Walbourn <chuckw@kinesoft.com> -; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> -; -; This is free software; you can redistribute it and/or modify it -; under the terms of the GNU General Public License. - -; Based on match.S -; Written for zlib 1.1.2 -; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> - - .686P - .MODEL FLAT - -;=========================================================================== -; EQUATES -;=========================================================================== - -MAX_MATCH EQU 258 -MIN_MATCH EQU 3 -MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) -MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) - -;=========================================================================== -; STRUCTURES -;=========================================================================== - -; This STRUCT assumes a 4-byte alignment - -DEFLATE_STATE STRUCT -ds_strm dd ? -ds_status dd ? -ds_pending_buf dd ? -ds_pending_buf_size dd ? -ds_pending_out dd ? -ds_pending dd ? -ds_wrap dd ? -ds_data_type db ? -ds_method db ? - db ? ; padding - db ? ; padding -ds_last_flush dd ? -ds_w_size dd ? ; used -ds_w_bits dd ? -ds_w_mask dd ? ; used -ds_window dd ? ; used -ds_window_size dd ? -ds_prev dd ? ; used -ds_head dd ? -ds_ins_h dd ? -ds_hash_size dd ? -ds_hash_bits dd ? -ds_hash_mask dd ? -ds_hash_shift dd ? -ds_block_start dd ? -ds_match_length dd ? ; used -ds_prev_match dd ? ; used -ds_match_available dd ? -ds_strstart dd ? ; used -ds_match_start dd ? ; used -ds_lookahead dd ? ; used -ds_prev_length dd ? ; used -ds_max_chain_length dd ? ; used -ds_max_laxy_match dd ? -ds_level dd ? -ds_strategy dd ? -ds_good_match dd ? ; used -ds_nice_match dd ? ; used - -; Don't need anymore of the struct for match -DEFLATE_STATE ENDS - -;=========================================================================== -; CODE -;=========================================================================== -_TEXT SEGMENT - -;--------------------------------------------------------------------------- -; match_init -;--------------------------------------------------------------------------- - ALIGN 4 -PUBLIC _match_init -_match_init PROC - ; no initialization needed - ret -_match_init ENDP - -;--------------------------------------------------------------------------- -; uInt longest_match(deflate_state *deflatestate, IPos curmatch) -;--------------------------------------------------------------------------- - ALIGN 4 - -PUBLIC _longest_match -_longest_match PROC - -; Since this code uses EBP for a scratch register, the stack frame must -; be manually constructed and referenced relative to the ESP register. - -; Stack image -; Variables -chainlenwmask = 0 ; high word: current chain len - ; low word: s->wmask -window = 4 ; local copy of s->window -windowbestlen = 8 ; s->window + bestlen -scanend = 12 ; last two bytes of string -scanstart = 16 ; first two bytes of string -scanalign = 20 ; dword-misalignment of string -nicematch = 24 ; a good enough match size -bestlen = 28 ; size of best match so far -scan = 32 ; ptr to string wanting match -varsize = 36 ; number of bytes (also offset to last saved register) - -; Saved Registers (actually pushed into place) -ebx_save = 36 -edi_save = 40 -esi_save = 44 -ebp_save = 48 - -; Parameters -retaddr = 52 -deflatestate = 56 -curmatch = 60 - -; Save registers that the compiler may be using - push ebp - push edi - push esi - push ebx - -; Allocate local variable space - sub esp,varsize - -; Retrieve the function arguments. ecx will hold cur_match -; throughout the entire function. edx will hold the pointer to the -; deflate_state structure during the function's setup (before -; entering the main loop). - - mov edx, [esp+deflatestate] -ASSUME edx:PTR DEFLATE_STATE - - mov ecx, [esp+curmatch] - -; uInt wmask = s->w_mask; -; unsigned chain_length = s->max_chain_length; -; if (s->prev_length >= s->good_match) { -; chain_length >>= 2; -; } - - mov eax, [edx].ds_prev_length - mov ebx, [edx].ds_good_match - cmp eax, ebx - mov eax, [edx].ds_w_mask - mov ebx, [edx].ds_max_chain_length - jl SHORT LastMatchGood - shr ebx, 2 -LastMatchGood: - -; chainlen is decremented once beforehand so that the function can -; use the sign flag instead of the zero flag for the exit test. -; It is then shifted into the high word, to make room for the wmask -; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - mov [esp+chainlenwmask], ebx - -; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - mov eax, [edx].ds_nice_match - mov ebx, [edx].ds_lookahead - cmp ebx, eax - jl SHORT LookaheadLess - mov ebx, eax -LookaheadLess: - mov [esp+nicematch], ebx - -;/* register Bytef *scan = s->window + s->strstart; */ - - mov esi, [edx].ds_window - mov [esp+window], esi - mov ebp, [edx].ds_strstart - lea edi, [esi+ebp] - mov [esp+scan],edi - -;/* Determine how many bytes the scan ptr is off from being */ -;/* dword-aligned. */ - - mov eax, edi - neg eax - and eax, 3 - mov [esp+scanalign], eax - -;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ -;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ - - mov eax, [edx].ds_w_size - sub eax, MIN_LOOKAHEAD - sub ebp, eax - jg SHORT LimitPositive - xor ebp, ebp -LimitPositive: - -;/* int best_len = s->prev_length; */ - - mov eax, [edx].ds_prev_length - mov [esp+bestlen], eax - -;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ - - add esi, eax - mov [esp+windowbestlen], esi - -;/* register ush scan_start = *(ushf*)scan; */ -;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ -;/* Posf *prev = s->prev; */ - - movzx ebx, WORD PTR[edi] - mov [esp+scanstart], ebx - movzx ebx, WORD PTR[eax+edi-1] - mov [esp+scanend], ebx - mov edi, [edx].ds_prev - -;/* Jump into the main loop. */ - - mov edx, [esp+chainlenwmask] - jmp SHORT LoopEntry - -;/* do { -; * match = s->window + cur_match; -; * if (*(ushf*)(match+best_len-1) != scan_end || -; * *(ushf*)match != scan_start) continue; -; * [...] -; * } while ((cur_match = prev[cur_match & wmask]) > limit -; * && --chain_length != 0); -; * -; * Here is the inner loop of the function. The function will spend the -; * majority of its time in this loop, and majority of that time will -; * be spent in the first ten instructions. -; * -; * Within this loop: -; * %ebx = scanend -; * %ecx = curmatch -; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -; * %esi = windowbestlen - i.e., (window + bestlen) -; * %edi = prev -; * %ebp = limit -; */ - - ALIGN 4 -LookupLoop: - and ecx, edx - movzx ecx, WORD PTR[edi+ecx*2] - cmp ecx, ebp - jbe LeaveNow - sub edx, 000010000H - js LeaveNow - -LoopEntry: - movzx eax, WORD PTR[esi+ecx-1] - cmp eax, ebx - jnz SHORT LookupLoop - - mov eax, [esp+window] - movzx eax, WORD PTR[eax+ecx] - cmp eax, [esp+scanstart] - jnz SHORT LookupLoop - -;/* Store the current value of chainlen. */ - - mov [esp+chainlenwmask], edx - -;/* Point %edi to the string under scrutiny, and %esi to the string we */ -;/* are hoping to match it up with. In actuality, %esi and %edi are */ -;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ -;/* initialized to -(MAX_MATCH_8 - scanalign). */ - - mov esi, [esp+window] - mov edi, [esp+scan] - add esi, ecx - mov eax, [esp+scanalign] - mov edx, -MAX_MATCH_8 - lea edi, [edi+eax+MAX_MATCH_8] - lea esi, [esi+eax+MAX_MATCH_8] - -;/* Test the strings for equality, 8 bytes at a time. At the end, -; * adjust %edx so that it is offset to the exact byte that mismatched. -; * -; * We already know at this point that the first three bytes of the -; * strings match each other, and they can be safely passed over before -; * starting the compare loop. So what this code does is skip over 0-3 -; * bytes, as much as necessary in order to dword-align the %edi -; * pointer. (%esi will still be misaligned three times out of four.) -; * -; * It should be confessed that this loop usually does not represent -; * much of the total running time. Replacing it with a more -; * straightforward "rep cmpsb" would not drastically degrade -; * performance. -; */ - -LoopCmps: - mov eax, DWORD PTR[esi+edx] - xor eax, DWORD PTR[edi+edx] - jnz SHORT LeaveLoopCmps - - mov eax, DWORD PTR[esi+edx+4] - xor eax, DWORD PTR[edi+edx+4] - jnz SHORT LeaveLoopCmps4 - - add edx, 8 - jnz SHORT LoopCmps - jmp LenMaximum - ALIGN 4 - -LeaveLoopCmps4: - add edx, 4 - -LeaveLoopCmps: - test eax, 00000FFFFH - jnz SHORT LenLower - - add edx, 2 - shr eax, 16 - -LenLower: - sub al, 1 - adc edx, 0 - -;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ -;/* then automatically accept it as the best possible match and leave. */ - - lea eax, [edi+edx] - mov edi, [esp+scan] - sub eax, edi - cmp eax, MAX_MATCH - jge SHORT LenMaximum - -;/* If the length of the match is not longer than the best match we */ -;/* have so far, then forget it and return to the lookup loop. */ - - mov edx, [esp+deflatestate] - mov ebx, [esp+bestlen] - cmp eax, ebx - jg SHORT LongerMatch - mov esi, [esp+windowbestlen] - mov edi, [edx].ds_prev - mov ebx, [esp+scanend] - mov edx, [esp+chainlenwmask] - jmp LookupLoop - ALIGN 4 - -;/* s->match_start = cur_match; */ -;/* best_len = len; */ -;/* if (len >= nice_match) break; */ -;/* scan_end = *(ushf*)(scan+best_len-1); */ - -LongerMatch: - mov ebx, [esp+nicematch] - mov [esp+bestlen], eax - mov [edx].ds_match_start, ecx - cmp eax, ebx - jge SHORT LeaveNow - mov esi, [esp+window] - add esi, eax - mov [esp+windowbestlen], esi - movzx ebx, WORD PTR[edi+eax-1] - mov edi, [edx].ds_prev - mov [esp+scanend], ebx - mov edx, [esp+chainlenwmask] - jmp LookupLoop - ALIGN 4 - -;/* Accept the current string, with the maximum possible length. */ - -LenMaximum: - mov edx, [esp+deflatestate] - mov DWORD PTR[esp+bestlen], MAX_MATCH - mov [edx].ds_match_start, ecx - -;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ -;/* return s->lookahead; */ - -LeaveNow: - mov edx, [esp+deflatestate] - mov ebx, [esp+bestlen] - mov eax, [edx].ds_lookahead - cmp ebx, eax - jg SHORT LookaheadRet - mov eax, ebx -LookaheadRet: - -; Restore the stack and return from whence we came. - - add esp, varsize - pop ebx - pop esi - pop edi - pop ebp - ret - -_longest_match ENDP - -_TEXT ENDS -END +
+; match.asm -- Pentium-Pro optimized version of longest_match()
+;
+; Updated for zlib 1.1.3 and converted to MASM 6.1x
+; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com>
+; and Chuck Walbourn <chuckw@kinesoft.com>
+; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro>
+;
+; This is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License.
+
+; Based on match.S
+; Written for zlib 1.1.2
+; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
+;
+; Modified by Gilles Vollant (2005) for add gzhead and gzindex
+
+ .686P
+ .MODEL FLAT
+
+;===========================================================================
+; EQUATES
+;===========================================================================
+
+MAX_MATCH EQU 258
+MIN_MATCH EQU 3
+MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1)
+MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7))
+
+;===========================================================================
+; STRUCTURES
+;===========================================================================
+
+; This STRUCT assumes a 4-byte alignment
+
+DEFLATE_STATE STRUCT
+ds_strm dd ?
+ds_status dd ?
+ds_pending_buf dd ?
+ds_pending_buf_size dd ?
+ds_pending_out dd ?
+ds_pending dd ?
+ds_wrap dd ?
+; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h)
+ds_gzhead dd ?
+ds_gzindex dd ?
+ds_data_type db ?
+ds_method db ?
+ db ? ; padding
+ db ? ; padding
+ds_last_flush dd ?
+ds_w_size dd ? ; used
+ds_w_bits dd ?
+ds_w_mask dd ? ; used
+ds_window dd ? ; used
+ds_window_size dd ?
+ds_prev dd ? ; used
+ds_head dd ?
+ds_ins_h dd ?
+ds_hash_size dd ?
+ds_hash_bits dd ?
+ds_hash_mask dd ?
+ds_hash_shift dd ?
+ds_block_start dd ?
+ds_match_length dd ? ; used
+ds_prev_match dd ? ; used
+ds_match_available dd ?
+ds_strstart dd ? ; used
+ds_match_start dd ? ; used
+ds_lookahead dd ? ; used
+ds_prev_length dd ? ; used
+ds_max_chain_length dd ? ; used
+ds_max_laxy_match dd ?
+ds_level dd ?
+ds_strategy dd ?
+ds_good_match dd ? ; used
+ds_nice_match dd ? ; used
+
+; Don't need anymore of the struct for match
+DEFLATE_STATE ENDS
+
+;===========================================================================
+; CODE
+;===========================================================================
+_TEXT SEGMENT
+
+;---------------------------------------------------------------------------
+; match_init
+;---------------------------------------------------------------------------
+ ALIGN 4
+PUBLIC _match_init
+_match_init PROC
+ ; no initialization needed
+ ret
+_match_init ENDP
+
+;---------------------------------------------------------------------------
+; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
+;---------------------------------------------------------------------------
+ ALIGN 4
+
+PUBLIC _longest_match
+_longest_match PROC
+
+; Since this code uses EBP for a scratch register, the stack frame must
+; be manually constructed and referenced relative to the ESP register.
+
+; Stack image
+; Variables
+chainlenwmask = 0 ; high word: current chain len
+ ; low word: s->wmask
+window = 4 ; local copy of s->window
+windowbestlen = 8 ; s->window + bestlen
+scanend = 12 ; last two bytes of string
+scanstart = 16 ; first two bytes of string
+scanalign = 20 ; dword-misalignment of string
+nicematch = 24 ; a good enough match size
+bestlen = 28 ; size of best match so far
+scan = 32 ; ptr to string wanting match
+varsize = 36 ; number of bytes (also offset to last saved register)
+
+; Saved Registers (actually pushed into place)
+ebx_save = 36
+edi_save = 40
+esi_save = 44
+ebp_save = 48
+
+; Parameters
+retaddr = 52
+deflatestate = 56
+curmatch = 60
+
+; Save registers that the compiler may be using
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+; Allocate local variable space
+ sub esp,varsize
+
+; Retrieve the function arguments. ecx will hold cur_match
+; throughout the entire function. edx will hold the pointer to the
+; deflate_state structure during the function's setup (before
+; entering the main loop).
+
+ mov edx, [esp+deflatestate]
+ASSUME edx:PTR DEFLATE_STATE
+
+ mov ecx, [esp+curmatch]
+
+; uInt wmask = s->w_mask;
+; unsigned chain_length = s->max_chain_length;
+; if (s->prev_length >= s->good_match) {
+; chain_length >>= 2;
+; }
+
+ mov eax, [edx].ds_prev_length
+ mov ebx, [edx].ds_good_match
+ cmp eax, ebx
+ mov eax, [edx].ds_w_mask
+ mov ebx, [edx].ds_max_chain_length
+ jl SHORT LastMatchGood
+ shr ebx, 2
+LastMatchGood:
+
+; chainlen is decremented once beforehand so that the function can
+; use the sign flag instead of the zero flag for the exit test.
+; It is then shifted into the high word, to make room for the wmask
+; value, which it will always accompany.
+
+ dec ebx
+ shl ebx, 16
+ or ebx, eax
+ mov [esp+chainlenwmask], ebx
+
+; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ mov eax, [edx].ds_nice_match
+ mov ebx, [edx].ds_lookahead
+ cmp ebx, eax
+ jl SHORT LookaheadLess
+ mov ebx, eax
+LookaheadLess:
+ mov [esp+nicematch], ebx
+
+;/* register Bytef *scan = s->window + s->strstart; */
+
+ mov esi, [edx].ds_window
+ mov [esp+window], esi
+ mov ebp, [edx].ds_strstart
+ lea edi, [esi+ebp]
+ mov [esp+scan],edi
+
+;/* Determine how many bytes the scan ptr is off from being */
+;/* dword-aligned. */
+
+ mov eax, edi
+ neg eax
+ and eax, 3
+ mov [esp+scanalign], eax
+
+;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
+;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
+
+ mov eax, [edx].ds_w_size
+ sub eax, MIN_LOOKAHEAD
+ sub ebp, eax
+ jg SHORT LimitPositive
+ xor ebp, ebp
+LimitPositive:
+
+;/* int best_len = s->prev_length; */
+
+ mov eax, [edx].ds_prev_length
+ mov [esp+bestlen], eax
+
+;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
+
+ add esi, eax
+ mov [esp+windowbestlen], esi
+
+;/* register ush scan_start = *(ushf*)scan; */
+;/* register ush scan_end = *(ushf*)(scan+best_len-1); */
+;/* Posf *prev = s->prev; */
+
+ movzx ebx, WORD PTR[edi]
+ mov [esp+scanstart], ebx
+ movzx ebx, WORD PTR[eax+edi-1]
+ mov [esp+scanend], ebx
+ mov edi, [edx].ds_prev
+
+;/* Jump into the main loop. */
+
+ mov edx, [esp+chainlenwmask]
+ jmp SHORT LoopEntry
+
+;/* do {
+; * match = s->window + cur_match;
+; * if (*(ushf*)(match+best_len-1) != scan_end ||
+; * *(ushf*)match != scan_start) continue;
+; * [...]
+; * } while ((cur_match = prev[cur_match & wmask]) > limit
+; * && --chain_length != 0);
+; *
+; * Here is the inner loop of the function. The function will spend the
+; * majority of its time in this loop, and majority of that time will
+; * be spent in the first ten instructions.
+; *
+; * Within this loop:
+; * %ebx = scanend
+; * %ecx = curmatch
+; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
+; * %esi = windowbestlen - i.e., (window + bestlen)
+; * %edi = prev
+; * %ebp = limit
+; */
+
+ ALIGN 4
+LookupLoop:
+ and ecx, edx
+ movzx ecx, WORD PTR[edi+ecx*2]
+ cmp ecx, ebp
+ jbe LeaveNow
+ sub edx, 000010000H
+ js LeaveNow
+
+LoopEntry:
+ movzx eax, WORD PTR[esi+ecx-1]
+ cmp eax, ebx
+ jnz SHORT LookupLoop
+
+ mov eax, [esp+window]
+ movzx eax, WORD PTR[eax+ecx]
+ cmp eax, [esp+scanstart]
+ jnz SHORT LookupLoop
+
+;/* Store the current value of chainlen. */
+
+ mov [esp+chainlenwmask], edx
+
+;/* Point %edi to the string under scrutiny, and %esi to the string we */
+;/* are hoping to match it up with. In actuality, %esi and %edi are */
+;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
+;/* initialized to -(MAX_MATCH_8 - scanalign). */
+
+ mov esi, [esp+window]
+ mov edi, [esp+scan]
+ add esi, ecx
+ mov eax, [esp+scanalign]
+ mov edx, -MAX_MATCH_8
+ lea edi, [edi+eax+MAX_MATCH_8]
+ lea esi, [esi+eax+MAX_MATCH_8]
+
+;/* Test the strings for equality, 8 bytes at a time. At the end,
+; * adjust %edx so that it is offset to the exact byte that mismatched.
+; *
+; * We already know at this point that the first three bytes of the
+; * strings match each other, and they can be safely passed over before
+; * starting the compare loop. So what this code does is skip over 0-3
+; * bytes, as much as necessary in order to dword-align the %edi
+; * pointer. (%esi will still be misaligned three times out of four.)
+; *
+; * It should be confessed that this loop usually does not represent
+; * much of the total running time. Replacing it with a more
+; * straightforward "rep cmpsb" would not drastically degrade
+; * performance.
+; */
+
+LoopCmps:
+ mov eax, DWORD PTR[esi+edx]
+ xor eax, DWORD PTR[edi+edx]
+ jnz SHORT LeaveLoopCmps
+
+ mov eax, DWORD PTR[esi+edx+4]
+ xor eax, DWORD PTR[edi+edx+4]
+ jnz SHORT LeaveLoopCmps4
+
+ add edx, 8
+ jnz SHORT LoopCmps
+ jmp LenMaximum
+ ALIGN 4
+
+LeaveLoopCmps4:
+ add edx, 4
+
+LeaveLoopCmps:
+ test eax, 00000FFFFH
+ jnz SHORT LenLower
+
+ add edx, 2
+ shr eax, 16
+
+LenLower:
+ sub al, 1
+ adc edx, 0
+
+;/* Calculate the length of the match. If it is longer than MAX_MATCH, */
+;/* then automatically accept it as the best possible match and leave. */
+
+ lea eax, [edi+edx]
+ mov edi, [esp+scan]
+ sub eax, edi
+ cmp eax, MAX_MATCH
+ jge SHORT LenMaximum
+
+;/* If the length of the match is not longer than the best match we */
+;/* have so far, then forget it and return to the lookup loop. */
+
+ mov edx, [esp+deflatestate]
+ mov ebx, [esp+bestlen]
+ cmp eax, ebx
+ jg SHORT LongerMatch
+ mov esi, [esp+windowbestlen]
+ mov edi, [edx].ds_prev
+ mov ebx, [esp+scanend]
+ mov edx, [esp+chainlenwmask]
+ jmp LookupLoop
+ ALIGN 4
+
+;/* s->match_start = cur_match; */
+;/* best_len = len; */
+;/* if (len >= nice_match) break; */
+;/* scan_end = *(ushf*)(scan+best_len-1); */
+
+LongerMatch:
+ mov ebx, [esp+nicematch]
+ mov [esp+bestlen], eax
+ mov [edx].ds_match_start, ecx
+ cmp eax, ebx
+ jge SHORT LeaveNow
+ mov esi, [esp+window]
+ add esi, eax
+ mov [esp+windowbestlen], esi
+ movzx ebx, WORD PTR[edi+eax-1]
+ mov edi, [edx].ds_prev
+ mov [esp+scanend], ebx
+ mov edx, [esp+chainlenwmask]
+ jmp LookupLoop
+ ALIGN 4
+
+;/* Accept the current string, with the maximum possible length. */
+
+LenMaximum:
+ mov edx, [esp+deflatestate]
+ mov DWORD PTR[esp+bestlen], MAX_MATCH
+ mov [edx].ds_match_start, ecx
+
+;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
+;/* return s->lookahead; */
+
+LeaveNow:
+ mov edx, [esp+deflatestate]
+ mov ebx, [esp+bestlen]
+ mov eax, [edx].ds_lookahead
+ cmp ebx, eax
+ jg SHORT LookaheadRet
+ mov eax, ebx
+LookaheadRet:
+
+; Restore the stack and return from whence we came.
+
+ add esp, varsize
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+_longest_match ENDP
+
+_TEXT ENDS
+END
diff --git a/zlib/contrib/masmx86/gvmat32.asm b/zlib/contrib/masmx86/gvmat32.asm index ec360e67ada..874bb2d482b 100644 --- a/zlib/contrib/masmx86/gvmat32.asm +++ b/zlib/contrib/masmx86/gvmat32.asm @@ -1,905 +1,972 @@ -; -; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 -; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. -; File written by Gilles Vollant, by modifiying the longest_match -; from Jean-loup Gailly in deflate.c -; It need wmask == 0x7fff -; (assembly code is faster with a fixed wmask) -; -; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) -; I compile with : "ml /coff /Zi /c gvmat32.asm" -; - -;uInt longest_match_7fff(s, cur_match) -; deflate_state *s; -; IPos cur_match; /* current match */ - - NbStack equ 76 - cur_match equ dword ptr[esp+NbStack-0] - str_s equ dword ptr[esp+NbStack-4] -; 5 dword on top (ret,ebp,esi,edi,ebx) - adrret equ dword ptr[esp+NbStack-8] - pushebp equ dword ptr[esp+NbStack-12] - pushedi equ dword ptr[esp+NbStack-16] - pushesi equ dword ptr[esp+NbStack-20] - pushebx equ dword ptr[esp+NbStack-24] - - chain_length equ dword ptr [esp+NbStack-28] - limit equ dword ptr [esp+NbStack-32] - best_len equ dword ptr [esp+NbStack-36] - window equ dword ptr [esp+NbStack-40] - prev equ dword ptr [esp+NbStack-44] - scan_start equ word ptr [esp+NbStack-48] - wmask equ dword ptr [esp+NbStack-52] - match_start_ptr equ dword ptr [esp+NbStack-56] - nice_match equ dword ptr [esp+NbStack-60] - scan equ dword ptr [esp+NbStack-64] - - windowlen equ dword ptr [esp+NbStack-68] - match_start equ dword ptr [esp+NbStack-72] - strend equ dword ptr [esp+NbStack-76] - NbStackAdd equ (NbStack-24) - - .386p - - name gvmatch - .MODEL FLAT - - - -; all the +4 offsets are due to the addition of pending_buf_size (in zlib -; in the deflate_state structure since the asm code was first written -; (if you compile with zlib 1.0.4 or older, remove the +4). -; Note : these value are good with a 8 bytes boundary pack structure - dep_chain_length equ 70h+4 - dep_window equ 2ch+4 - dep_strstart equ 60h+4 - dep_prev_length equ 6ch+4 - dep_nice_match equ 84h+4 - dep_w_size equ 20h+4 - dep_prev equ 34h+4 - dep_w_mask equ 28h+4 - dep_good_match equ 80h+4 - dep_match_start equ 64h+4 - dep_lookahead equ 68h+4 - - -_TEXT segment - -IFDEF NOUNDERLINE - public longest_match_7fff - public longest_match_686 -; public match_init -ELSE - public _longest_match_7fff - public _longest_match_686 -; public _match_init -ENDIF - - MAX_MATCH equ 258 - MIN_MATCH equ 3 - MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) - - - -IFDEF NOUNDERLINE -;match_init proc near -; ret -;match_init endp -ELSE -;_match_init proc near -; ret -;_match_init endp -ENDIF - - -IFDEF NOUNDERLINE -longest_match_7fff proc near -ELSE -_longest_match_7fff proc near -ENDIF - - mov edx,[esp+4] - - - - push ebp - push edi - push esi - push ebx - - sub esp,NbStackAdd - -; initialize or check the variables used in match.asm. - mov ebp,edx - -; chain_length = s->max_chain_length -; if (prev_length>=good_match) chain_length >>= 2 - mov edx,[ebp+dep_chain_length] - mov ebx,[ebp+dep_prev_length] - cmp [ebp+dep_good_match],ebx - ja noshr - shr edx,2 -noshr: -; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop - inc edx - mov edi,[ebp+dep_nice_match] - mov chain_length,edx - mov eax,[ebp+dep_lookahead] - cmp eax,edi -; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - jae nolookaheadnicematch - mov edi,eax -nolookaheadnicematch: -; best_len = s->prev_length - mov best_len,ebx - -; window = s->window - mov esi,[ebp+dep_window] - mov ecx,[ebp+dep_strstart] - mov window,esi - - mov nice_match,edi -; scan = window + strstart - add esi,ecx - mov scan,esi -; dx = *window - mov dx,word ptr [esi] -; bx = *(window+best_len-1) - mov bx,word ptr [esi+ebx-1] - add esi,MAX_MATCH-1 -; scan_start = *scan - mov scan_start,dx -; strend = scan + MAX_MATCH-1 - mov strend,esi -; bx = scan_end = *(window+best_len-1) - -; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -; s->strstart - (IPos)MAX_DIST(s) : NIL; - - mov esi,[ebp+dep_w_size] - sub esi,MIN_LOOKAHEAD -; here esi = MAX_DIST(s) - sub ecx,esi - ja nodist - xor ecx,ecx -nodist: - mov limit,ecx - -; prev = s->prev - mov edx,[ebp+dep_prev] - mov prev,edx - -; - mov edx,dword ptr [ebp+dep_match_start] - mov bp,scan_start - mov eax,cur_match - mov match_start,edx - - mov edx,window - mov edi,edx - add edi,best_len - mov esi,prev - dec edi -; windowlen = window + best_len -1 - mov windowlen,edi - - jmp beginloop2 - align 4 - -; here, in the loop -; eax = ax = cur_match -; ecx = limit -; bx = scan_end -; bp = scan_start -; edi = windowlen (window + best_len -1) -; esi = prev - - -;// here; chain_length <=16 -normalbeg0add16: - add chain_length,16 - jz exitloop -normalbeg0: - cmp word ptr[edi+eax],bx - je normalbeg2noroll -rcontlabnoroll: -; cur_match = prev[cur_match & wmask] - and eax,7fffh - mov ax,word ptr[esi+eax*2] -; if cur_match > limit, go to exitloop - cmp ecx,eax - jnb exitloop -; if --chain_length != 0, go to exitloop - dec chain_length - jnz normalbeg0 - jmp exitloop - -normalbeg2noroll: -; if (scan_start==*(cur_match+window)) goto normalbeg2 - cmp bp,word ptr[edx+eax] - jne rcontlabnoroll - jmp normalbeg2 - -contloop3: - mov edi,windowlen - -; cur_match = prev[cur_match & wmask] - and eax,7fffh - mov ax,word ptr[esi+eax*2] -; if cur_match > limit, go to exitloop - cmp ecx,eax -jnbexitloopshort1: - jnb exitloop -; if --chain_length != 0, go to exitloop - - -; begin the main loop -beginloop2: - sub chain_length,16+1 -; if chain_length <=16, don't use the unrolled loop - jna normalbeg0add16 - -do16: - cmp word ptr[edi+eax],bx - je normalbeg2dc0 - -maccn MACRO lab - and eax,7fffh - mov ax,word ptr[esi+eax*2] - cmp ecx,eax - jnb exitloop - cmp word ptr[edi+eax],bx - je lab - ENDM - -rcontloop0: - maccn normalbeg2dc1 - -rcontloop1: - maccn normalbeg2dc2 - -rcontloop2: - maccn normalbeg2dc3 - -rcontloop3: - maccn normalbeg2dc4 - -rcontloop4: - maccn normalbeg2dc5 - -rcontloop5: - maccn normalbeg2dc6 - -rcontloop6: - maccn normalbeg2dc7 - -rcontloop7: - maccn normalbeg2dc8 - -rcontloop8: - maccn normalbeg2dc9 - -rcontloop9: - maccn normalbeg2dc10 - -rcontloop10: - maccn short normalbeg2dc11 - -rcontloop11: - maccn short normalbeg2dc12 - -rcontloop12: - maccn short normalbeg2dc13 - -rcontloop13: - maccn short normalbeg2dc14 - -rcontloop14: - maccn short normalbeg2dc15 - -rcontloop15: - and eax,7fffh - mov ax,word ptr[esi+eax*2] - cmp ecx,eax - jnb exitloop - - sub chain_length,16 - ja do16 - jmp normalbeg0add16 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -normbeg MACRO rcontlab,valsub -; if we are here, we know that *(match+best_len-1) == scan_end - cmp bp,word ptr[edx+eax] -; if (match != scan_start) goto rcontlab - jne rcontlab -; calculate the good chain_length, and we'll compare scan and match string - add chain_length,16-valsub - jmp iseq - ENDM - - -normalbeg2dc11: - normbeg rcontloop11,11 - -normalbeg2dc12: - normbeg short rcontloop12,12 - -normalbeg2dc13: - normbeg short rcontloop13,13 - -normalbeg2dc14: - normbeg short rcontloop14,14 - -normalbeg2dc15: - normbeg short rcontloop15,15 - -normalbeg2dc10: - normbeg rcontloop10,10 - -normalbeg2dc9: - normbeg rcontloop9,9 - -normalbeg2dc8: - normbeg rcontloop8,8 - -normalbeg2dc7: - normbeg rcontloop7,7 - -normalbeg2dc6: - normbeg rcontloop6,6 - -normalbeg2dc5: - normbeg rcontloop5,5 - -normalbeg2dc4: - normbeg rcontloop4,4 - -normalbeg2dc3: - normbeg rcontloop3,3 - -normalbeg2dc2: - normbeg rcontloop2,2 - -normalbeg2dc1: - normbeg rcontloop1,1 - -normalbeg2dc0: - normbeg rcontloop0,0 - - -; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end - -normalbeg2: - mov edi,window - - cmp bp,word ptr[edi+eax] - jne contloop3 ; if *(ushf*)match != scan_start, continue - -iseq: -; if we are here, we know that *(match+best_len-1) == scan_end -; and (match == scan_start) - - mov edi,edx - mov esi,scan ; esi = scan - add edi,eax ; edi = window + cur_match = match - - mov edx,[esi+3] ; compare manually dword at match+3 - xor edx,[edi+3] ; and scan +3 - - jz begincompare ; if equal, go to long compare - -; we will determine the unmatch byte and calculate len (in esi) - or dl,dl - je eq1rr - mov esi,3 - jmp trfinval -eq1rr: - or dx,dx - je eq1 - - mov esi,4 - jmp trfinval -eq1: - and edx,0ffffffh - jz eq11 - mov esi,5 - jmp trfinval -eq11: - mov esi,6 - jmp trfinval - -begincompare: - ; here we now scan and match begin same - add edi,6 - add esi,6 - mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes - repe cmpsd ; loop until mismatch - - je trfin ; go to trfin if not unmatch -; we determine the unmatch byte - sub esi,4 - mov edx,[edi-4] - xor edx,[esi] - - or dl,dl - jnz trfin - inc esi - - or dx,dx - jnz trfin - inc esi - - and edx,0ffffffh - jnz trfin - inc esi - -trfin: - sub esi,scan ; esi = len -trfinval: -; here we have finised compare, and esi contain len of equal string - cmp esi,best_len ; if len > best_len, go newbestlen - ja short newbestlen -; now we restore edx, ecx and esi, for the big loop - mov esi,prev - mov ecx,limit - mov edx,window - jmp contloop3 - -newbestlen: - mov best_len,esi ; len become best_len - - mov match_start,eax ; save new position as match_start - cmp esi,nice_match ; if best_len >= nice_match, exit - jae exitloop - mov ecx,scan - mov edx,window ; restore edx=window - add ecx,esi - add esi,edx - - dec esi - mov windowlen,esi ; windowlen = window + best_len-1 - mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end - -; now we restore ecx and esi, for the big loop : - mov esi,prev - mov ecx,limit - jmp contloop3 - -exitloop: -; exit : s->match_start=match_start - mov ebx,match_start - mov ebp,str_s - mov ecx,best_len - mov dword ptr [ebp+dep_match_start],ebx - mov eax,dword ptr [ebp+dep_lookahead] - cmp ecx,eax - ja minexlo - mov eax,ecx -minexlo: -; return min(best_len,s->lookahead) - -; restore stack and register ebx,esi,edi,ebp - add esp,NbStackAdd - - pop ebx - pop esi - pop edi - pop ebp - ret -InfoAuthor: -; please don't remove this string ! -; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! - db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah - - - -IFDEF NOUNDERLINE -longest_match_7fff endp -ELSE -_longest_match_7fff endp -ENDIF - - -IFDEF NOUNDERLINE -cpudetect32 proc near -ELSE -_cpudetect32 proc near -ENDIF - - push ebx - - pushfd ; push original EFLAGS - pop eax ; get original EFLAGS - mov ecx, eax ; save original EFLAGS - xor eax, 40000h ; flip AC bit in EFLAGS - push eax ; save new EFLAGS value on stack - popfd ; replace current EFLAGS value - pushfd ; get new EFLAGS - pop eax ; store new EFLAGS in EAX - xor eax, ecx ; can’t toggle AC bit, processor=80386 - jz end_cpu_is_386 ; jump if 80386 processor - push ecx - popfd ; restore AC bit in EFLAGS first - - pushfd - pushfd - pop ecx - - mov eax, ecx ; get original EFLAGS - xor eax, 200000h ; flip ID bit in EFLAGS - push eax ; save new EFLAGS value on stack - popfd ; replace current EFLAGS value - pushfd ; get new EFLAGS - pop eax ; store new EFLAGS in EAX - popfd ; restore original EFLAGS - xor eax, ecx ; can’t toggle ID bit, - je is_old_486 ; processor=old - - mov eax,1 - db 0fh,0a2h ;CPUID - -exitcpudetect: - pop ebx - ret - -end_cpu_is_386: - mov eax,0300h - jmp exitcpudetect - -is_old_486: - mov eax,0400h - jmp exitcpudetect - -IFDEF NOUNDERLINE -cpudetect32 endp -ELSE -_cpudetect32 endp -ENDIF - - - - -MAX_MATCH equ 258 -MIN_MATCH equ 3 -MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) -MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) - - -;;; stack frame offsets - -chainlenwmask equ esp + 0 ; high word: current chain len - ; low word: s->wmask -window equ esp + 4 ; local copy of s->window -windowbestlen equ esp + 8 ; s->window + bestlen -scanstart equ esp + 16 ; first two bytes of string -scanend equ esp + 12 ; last two bytes of string -scanalign equ esp + 20 ; dword-misalignment of string -nicematch equ esp + 24 ; a good enough match size -bestlen equ esp + 28 ; size of best match so far -scan equ esp + 32 ; ptr to string wanting match - -LocalVarsSize equ 36 -; saved ebx byte esp + 36 -; saved edi byte esp + 40 -; saved esi byte esp + 44 -; saved ebp byte esp + 48 -; return address byte esp + 52 -deflatestate equ esp + 56 ; the function arguments -curmatch equ esp + 60 - -;;; Offsets for fields in the deflate_state structure. These numbers -;;; are calculated from the definition of deflate_state, with the -;;; assumption that the compiler will dword-align the fields. (Thus, -;;; changing the definition of deflate_state could easily cause this -;;; program to crash horribly, without so much as a warning at -;;; compile time. Sigh.) - -dsWSize equ 36 -dsWMask equ 44 -dsWindow equ 48 -dsPrev equ 56 -dsMatchLen equ 88 -dsPrevMatch equ 92 -dsStrStart equ 100 -dsMatchStart equ 104 -dsLookahead equ 108 -dsPrevLen equ 112 -dsMaxChainLen equ 116 -dsGoodMatch equ 132 -dsNiceMatch equ 136 - - -;;; match.asm -- Pentium-Pro-optimized version of longest_match() -;;; Written for zlib 1.1.2 -;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> -;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html -;;; -;;; This is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License. - -;GLOBAL _longest_match, _match_init - - -;SECTION .text - -;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) - -;_longest_match: -IFDEF NOUNDERLINE -longest_match_686 proc near -ELSE -_longest_match_686 proc near -ENDIF - - -;;; Save registers that the compiler may be using, and adjust esp to -;;; make room for our stack frame. - - push ebp - push edi - push esi - push ebx - sub esp, LocalVarsSize - -;;; Retrieve the function arguments. ecx will hold cur_match -;;; throughout the entire function. edx will hold the pointer to the -;;; deflate_state structure during the function's setup (before -;;; entering the main loop. - - mov edx, [deflatestate] - mov ecx, [curmatch] - -;;; uInt wmask = s->w_mask; -;;; unsigned chain_length = s->max_chain_length; -;;; if (s->prev_length >= s->good_match) { -;;; chain_length >>= 2; -;;; } - - mov eax, [edx + dsPrevLen] - mov ebx, [edx + dsGoodMatch] - cmp eax, ebx - mov eax, [edx + dsWMask] - mov ebx, [edx + dsMaxChainLen] - jl LastMatchGood - shr ebx, 2 -LastMatchGood: - -;;; chainlen is decremented once beforehand so that the function can -;;; use the sign flag instead of the zero flag for the exit test. -;;; It is then shifted into the high word, to make room for the wmask -;;; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - mov [chainlenwmask], ebx - -;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - mov eax, [edx + dsNiceMatch] - mov ebx, [edx + dsLookahead] - cmp ebx, eax - jl LookaheadLess - mov ebx, eax -LookaheadLess: mov [nicematch], ebx - -;;; register Bytef *scan = s->window + s->strstart; - - mov esi, [edx + dsWindow] - mov [window], esi - mov ebp, [edx + dsStrStart] - lea edi, [esi + ebp] - mov [scan], edi - -;;; Determine how many bytes the scan ptr is off from being -;;; dword-aligned. - - mov eax, edi - neg eax - and eax, 3 - mov [scanalign], eax - -;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -;;; s->strstart - (IPos)MAX_DIST(s) : NIL; - - mov eax, [edx + dsWSize] - sub eax, MIN_LOOKAHEAD - sub ebp, eax - jg LimitPositive - xor ebp, ebp -LimitPositive: - -;;; int best_len = s->prev_length; - - mov eax, [edx + dsPrevLen] - mov [bestlen], eax - -;;; Store the sum of s->window + best_len in esi locally, and in esi. - - add esi, eax - mov [windowbestlen], esi - -;;; register ush scan_start = *(ushf*)scan; -;;; register ush scan_end = *(ushf*)(scan+best_len-1); -;;; Posf *prev = s->prev; - - movzx ebx, word ptr [edi] - mov [scanstart], ebx - movzx ebx, word ptr [edi + eax - 1] - mov [scanend], ebx - mov edi, [edx + dsPrev] - -;;; Jump into the main loop. - - mov edx, [chainlenwmask] - jmp short LoopEntry - -align 4 - -;;; do { -;;; match = s->window + cur_match; -;;; if (*(ushf*)(match+best_len-1) != scan_end || -;;; *(ushf*)match != scan_start) continue; -;;; [...] -;;; } while ((cur_match = prev[cur_match & wmask]) > limit -;;; && --chain_length != 0); -;;; -;;; Here is the inner loop of the function. The function will spend the -;;; majority of its time in this loop, and majority of that time will -;;; be spent in the first ten instructions. -;;; -;;; Within this loop: -;;; ebx = scanend -;;; ecx = curmatch -;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -;;; esi = windowbestlen - i.e., (window + bestlen) -;;; edi = prev -;;; ebp = limit - -LookupLoop: - and ecx, edx - movzx ecx, word ptr [edi + ecx*2] - cmp ecx, ebp - jbe LeaveNow - sub edx, 00010000h - js LeaveNow -LoopEntry: movzx eax, word ptr [esi + ecx - 1] - cmp eax, ebx - jnz LookupLoop - mov eax, [window] - movzx eax, word ptr [eax + ecx] - cmp eax, [scanstart] - jnz LookupLoop - -;;; Store the current value of chainlen. - - mov [chainlenwmask], edx - -;;; Point edi to the string under scrutiny, and esi to the string we -;;; are hoping to match it up with. In actuality, esi and edi are -;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is -;;; initialized to -(MAX_MATCH_8 - scanalign). - - mov esi, [window] - mov edi, [scan] - add esi, ecx - mov eax, [scanalign] - mov edx, 0fffffef8h; -(MAX_MATCH_8) - lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] - lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] - -;;; Test the strings for equality, 8 bytes at a time. At the end, -;;; adjust edx so that it is offset to the exact byte that mismatched. -;;; -;;; We already know at this point that the first three bytes of the -;;; strings match each other, and they can be safely passed over before -;;; starting the compare loop. So what this code does is skip over 0-3 -;;; bytes, as much as necessary in order to dword-align the edi -;;; pointer. (esi will still be misaligned three times out of four.) -;;; -;;; It should be confessed that this loop usually does not represent -;;; much of the total running time. Replacing it with a more -;;; straightforward "rep cmpsb" would not drastically degrade -;;; performance. - -LoopCmps: - mov eax, [esi + edx] - xor eax, [edi + edx] - jnz LeaveLoopCmps - mov eax, [esi + edx + 4] - xor eax, [edi + edx + 4] - jnz LeaveLoopCmps4 - add edx, 8 - jnz LoopCmps - jmp short LenMaximum -LeaveLoopCmps4: add edx, 4 -LeaveLoopCmps: test eax, 0000FFFFh - jnz LenLower - add edx, 2 - shr eax, 16 -LenLower: sub al, 1 - adc edx, 0 - -;;; Calculate the length of the match. If it is longer than MAX_MATCH, -;;; then automatically accept it as the best possible match and leave. - - lea eax, [edi + edx] - mov edi, [scan] - sub eax, edi - cmp eax, MAX_MATCH - jge LenMaximum - -;;; If the length of the match is not longer than the best match we -;;; have so far, then forget it and return to the lookup loop. - - mov edx, [deflatestate] - mov ebx, [bestlen] - cmp eax, ebx - jg LongerMatch - mov esi, [windowbestlen] - mov edi, [edx + dsPrev] - mov ebx, [scanend] - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; s->match_start = cur_match; -;;; best_len = len; -;;; if (len >= nice_match) break; -;;; scan_end = *(ushf*)(scan+best_len-1); - -LongerMatch: mov ebx, [nicematch] - mov [bestlen], eax - mov [edx + dsMatchStart], ecx - cmp eax, ebx - jge LeaveNow - mov esi, [window] - add esi, eax - mov [windowbestlen], esi - movzx ebx, word ptr [edi + eax - 1] - mov edi, [edx + dsPrev] - mov [scanend], ebx - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; Accept the current string, with the maximum possible length. - -LenMaximum: mov edx, [deflatestate] - mov dword ptr [bestlen], MAX_MATCH - mov [edx + dsMatchStart], ecx - -;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; -;;; return s->lookahead; - -LeaveNow: - mov edx, [deflatestate] - mov ebx, [bestlen] - mov eax, [edx + dsLookahead] - cmp ebx, eax - jg LookaheadRet - mov eax, ebx -LookaheadRet: - -;;; Restore the stack and return from whence we came. - - add esp, LocalVarsSize - pop ebx - pop esi - pop edi - pop ebp - - ret -; please don't remove this string ! -; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! - db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah - -IFDEF NOUNDERLINE -longest_match_686 endp -ELSE -_longest_match_686 endp -ENDIF - -_TEXT ends -end +; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
+; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
+; File written by Gilles Vollant, by modifiying the longest_match
+; from Jean-loup Gailly in deflate.c
+;
+; http://www.zlib.net
+; http://www.winimage.com/zLibDll
+; http://www.muppetlabs.com/~breadbox/software/assembly.html
+;
+; For Visual C++ 4.x and higher and ML 6.x and higher
+; ml.exe is in directory \MASM611C of Win95 DDK
+; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
+; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
+;
+; this file contain two implementation of longest_match
+;
+; longest_match_7fff : written 1996 by Gilles Vollant optimized for
+; first Pentium. Assume s->w_mask == 0x7fff
+; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro
+;
+; for using an seembly version of longest_match, you need define ASMV in project
+; There is two way in using gvmat32.asm
+;
+; A) Suggested method
+; if you want include both longest_match_7fff and longest_match_686
+; compile the asm file running
+; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm
+; and include gvmat32c.c in your project
+; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff,
+; longest_match_7fff will be used
+; if you have a more modern CPU (Pentium Pro, II and higher)
+; longest_match_686 will be used
+; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used,
+; but this is not a sitation you'll find often
+;
+; B) Alternative
+; if you are not interresed in old cpu performance and want the smaller
+; binaries possible
+;
+; compile the asm file running
+; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm
+; and do not include gvmat32c.c in your project (ou define also
+; NOOLDPENTIUMCODE)
+;
+; note : as I known, longest_match_686 is very faster than longest_match_7fff
+; on pentium Pro/II/III, faster (but less) in P4, but it seem
+; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8
+;
+; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
+
+;uInt longest_match_7fff(s, cur_match)
+; deflate_state *s;
+; IPos cur_match; /* current match */
+
+ NbStack equ 76
+ cur_match equ dword ptr[esp+NbStack-0]
+ str_s equ dword ptr[esp+NbStack-4]
+; 5 dword on top (ret,ebp,esi,edi,ebx)
+ adrret equ dword ptr[esp+NbStack-8]
+ pushebp equ dword ptr[esp+NbStack-12]
+ pushedi equ dword ptr[esp+NbStack-16]
+ pushesi equ dword ptr[esp+NbStack-20]
+ pushebx equ dword ptr[esp+NbStack-24]
+
+ chain_length equ dword ptr [esp+NbStack-28]
+ limit equ dword ptr [esp+NbStack-32]
+ best_len equ dword ptr [esp+NbStack-36]
+ window equ dword ptr [esp+NbStack-40]
+ prev equ dword ptr [esp+NbStack-44]
+ scan_start equ word ptr [esp+NbStack-48]
+ wmask equ dword ptr [esp+NbStack-52]
+ match_start_ptr equ dword ptr [esp+NbStack-56]
+ nice_match equ dword ptr [esp+NbStack-60]
+ scan equ dword ptr [esp+NbStack-64]
+
+ windowlen equ dword ptr [esp+NbStack-68]
+ match_start equ dword ptr [esp+NbStack-72]
+ strend equ dword ptr [esp+NbStack-76]
+ NbStackAdd equ (NbStack-24)
+
+ .386p
+
+ name gvmatch
+ .MODEL FLAT
+
+
+
+; all the +zlib1222add offsets are due to the addition of fields
+; in zlib in the deflate_state structure since the asm code was first written
+; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
+; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
+; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
+
+ zlib1222add equ 8
+
+; Note : these value are good with a 8 bytes boundary pack structure
+ dep_chain_length equ 74h+zlib1222add
+ dep_window equ 30h+zlib1222add
+ dep_strstart equ 64h+zlib1222add
+ dep_prev_length equ 70h+zlib1222add
+ dep_nice_match equ 88h+zlib1222add
+ dep_w_size equ 24h+zlib1222add
+ dep_prev equ 38h+zlib1222add
+ dep_w_mask equ 2ch+zlib1222add
+ dep_good_match equ 84h+zlib1222add
+ dep_match_start equ 68h+zlib1222add
+ dep_lookahead equ 6ch+zlib1222add
+
+
+_TEXT segment
+
+IFDEF NOUNDERLINE
+ IFDEF NOOLDPENTIUMCODE
+ public longest_match
+ public match_init
+ ELSE
+ public longest_match_7fff
+ public cpudetect32
+ public longest_match_686
+ ENDIF
+ELSE
+ IFDEF NOOLDPENTIUMCODE
+ public _longest_match
+ public _match_init
+ ELSE
+ public _longest_match_7fff
+ public _cpudetect32
+ public _longest_match_686
+ ENDIF
+ENDIF
+
+ MAX_MATCH equ 258
+ MIN_MATCH equ 3
+ MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
+
+
+
+IFNDEF NOOLDPENTIUMCODE
+IFDEF NOUNDERLINE
+longest_match_7fff proc near
+ELSE
+_longest_match_7fff proc near
+ENDIF
+
+ mov edx,[esp+4]
+
+
+
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ sub esp,NbStackAdd
+
+; initialize or check the variables used in match.asm.
+ mov ebp,edx
+
+; chain_length = s->max_chain_length
+; if (prev_length>=good_match) chain_length >>= 2
+ mov edx,[ebp+dep_chain_length]
+ mov ebx,[ebp+dep_prev_length]
+ cmp [ebp+dep_good_match],ebx
+ ja noshr
+ shr edx,2
+noshr:
+; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
+ inc edx
+ mov edi,[ebp+dep_nice_match]
+ mov chain_length,edx
+ mov eax,[ebp+dep_lookahead]
+ cmp eax,edi
+; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+ jae nolookaheadnicematch
+ mov edi,eax
+nolookaheadnicematch:
+; best_len = s->prev_length
+ mov best_len,ebx
+
+; window = s->window
+ mov esi,[ebp+dep_window]
+ mov ecx,[ebp+dep_strstart]
+ mov window,esi
+
+ mov nice_match,edi
+; scan = window + strstart
+ add esi,ecx
+ mov scan,esi
+; dx = *window
+ mov dx,word ptr [esi]
+; bx = *(window+best_len-1)
+ mov bx,word ptr [esi+ebx-1]
+ add esi,MAX_MATCH-1
+; scan_start = *scan
+ mov scan_start,dx
+; strend = scan + MAX_MATCH-1
+ mov strend,esi
+; bx = scan_end = *(window+best_len-1)
+
+; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+; s->strstart - (IPos)MAX_DIST(s) : NIL;
+
+ mov esi,[ebp+dep_w_size]
+ sub esi,MIN_LOOKAHEAD
+; here esi = MAX_DIST(s)
+ sub ecx,esi
+ ja nodist
+ xor ecx,ecx
+nodist:
+ mov limit,ecx
+
+; prev = s->prev
+ mov edx,[ebp+dep_prev]
+ mov prev,edx
+
+;
+ mov edx,dword ptr [ebp+dep_match_start]
+ mov bp,scan_start
+ mov eax,cur_match
+ mov match_start,edx
+
+ mov edx,window
+ mov edi,edx
+ add edi,best_len
+ mov esi,prev
+ dec edi
+; windowlen = window + best_len -1
+ mov windowlen,edi
+
+ jmp beginloop2
+ align 4
+
+; here, in the loop
+; eax = ax = cur_match
+; ecx = limit
+; bx = scan_end
+; bp = scan_start
+; edi = windowlen (window + best_len -1)
+; esi = prev
+
+
+;// here; chain_length <=16
+normalbeg0add16:
+ add chain_length,16
+ jz exitloop
+normalbeg0:
+ cmp word ptr[edi+eax],bx
+ je normalbeg2noroll
+rcontlabnoroll:
+; cur_match = prev[cur_match & wmask]
+ and eax,7fffh
+ mov ax,word ptr[esi+eax*2]
+; if cur_match > limit, go to exitloop
+ cmp ecx,eax
+ jnb exitloop
+; if --chain_length != 0, go to exitloop
+ dec chain_length
+ jnz normalbeg0
+ jmp exitloop
+
+normalbeg2noroll:
+; if (scan_start==*(cur_match+window)) goto normalbeg2
+ cmp bp,word ptr[edx+eax]
+ jne rcontlabnoroll
+ jmp normalbeg2
+
+contloop3:
+ mov edi,windowlen
+
+; cur_match = prev[cur_match & wmask]
+ and eax,7fffh
+ mov ax,word ptr[esi+eax*2]
+; if cur_match > limit, go to exitloop
+ cmp ecx,eax
+jnbexitloopshort1:
+ jnb exitloop
+; if --chain_length != 0, go to exitloop
+
+
+; begin the main loop
+beginloop2:
+ sub chain_length,16+1
+; if chain_length <=16, don't use the unrolled loop
+ jna normalbeg0add16
+
+do16:
+ cmp word ptr[edi+eax],bx
+ je normalbeg2dc0
+
+maccn MACRO lab
+ and eax,7fffh
+ mov ax,word ptr[esi+eax*2]
+ cmp ecx,eax
+ jnb exitloop
+ cmp word ptr[edi+eax],bx
+ je lab
+ ENDM
+
+rcontloop0:
+ maccn normalbeg2dc1
+
+rcontloop1:
+ maccn normalbeg2dc2
+
+rcontloop2:
+ maccn normalbeg2dc3
+
+rcontloop3:
+ maccn normalbeg2dc4
+
+rcontloop4:
+ maccn normalbeg2dc5
+
+rcontloop5:
+ maccn normalbeg2dc6
+
+rcontloop6:
+ maccn normalbeg2dc7
+
+rcontloop7:
+ maccn normalbeg2dc8
+
+rcontloop8:
+ maccn normalbeg2dc9
+
+rcontloop9:
+ maccn normalbeg2dc10
+
+rcontloop10:
+ maccn short normalbeg2dc11
+
+rcontloop11:
+ maccn short normalbeg2dc12
+
+rcontloop12:
+ maccn short normalbeg2dc13
+
+rcontloop13:
+ maccn short normalbeg2dc14
+
+rcontloop14:
+ maccn short normalbeg2dc15
+
+rcontloop15:
+ and eax,7fffh
+ mov ax,word ptr[esi+eax*2]
+ cmp ecx,eax
+ jnb exitloop
+
+ sub chain_length,16
+ ja do16
+ jmp normalbeg0add16
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+normbeg MACRO rcontlab,valsub
+; if we are here, we know that *(match+best_len-1) == scan_end
+ cmp bp,word ptr[edx+eax]
+; if (match != scan_start) goto rcontlab
+ jne rcontlab
+; calculate the good chain_length, and we'll compare scan and match string
+ add chain_length,16-valsub
+ jmp iseq
+ ENDM
+
+
+normalbeg2dc11:
+ normbeg rcontloop11,11
+
+normalbeg2dc12:
+ normbeg short rcontloop12,12
+
+normalbeg2dc13:
+ normbeg short rcontloop13,13
+
+normalbeg2dc14:
+ normbeg short rcontloop14,14
+
+normalbeg2dc15:
+ normbeg short rcontloop15,15
+
+normalbeg2dc10:
+ normbeg rcontloop10,10
+
+normalbeg2dc9:
+ normbeg rcontloop9,9
+
+normalbeg2dc8:
+ normbeg rcontloop8,8
+
+normalbeg2dc7:
+ normbeg rcontloop7,7
+
+normalbeg2dc6:
+ normbeg rcontloop6,6
+
+normalbeg2dc5:
+ normbeg rcontloop5,5
+
+normalbeg2dc4:
+ normbeg rcontloop4,4
+
+normalbeg2dc3:
+ normbeg rcontloop3,3
+
+normalbeg2dc2:
+ normbeg rcontloop2,2
+
+normalbeg2dc1:
+ normbeg rcontloop1,1
+
+normalbeg2dc0:
+ normbeg rcontloop0,0
+
+
+; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
+
+normalbeg2:
+ mov edi,window
+
+ cmp bp,word ptr[edi+eax]
+ jne contloop3 ; if *(ushf*)match != scan_start, continue
+
+iseq:
+; if we are here, we know that *(match+best_len-1) == scan_end
+; and (match == scan_start)
+
+ mov edi,edx
+ mov esi,scan ; esi = scan
+ add edi,eax ; edi = window + cur_match = match
+
+ mov edx,[esi+3] ; compare manually dword at match+3
+ xor edx,[edi+3] ; and scan +3
+
+ jz begincompare ; if equal, go to long compare
+
+; we will determine the unmatch byte and calculate len (in esi)
+ or dl,dl
+ je eq1rr
+ mov esi,3
+ jmp trfinval
+eq1rr:
+ or dx,dx
+ je eq1
+
+ mov esi,4
+ jmp trfinval
+eq1:
+ and edx,0ffffffh
+ jz eq11
+ mov esi,5
+ jmp trfinval
+eq11:
+ mov esi,6
+ jmp trfinval
+
+begincompare:
+ ; here we now scan and match begin same
+ add edi,6
+ add esi,6
+ mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
+ repe cmpsd ; loop until mismatch
+
+ je trfin ; go to trfin if not unmatch
+; we determine the unmatch byte
+ sub esi,4
+ mov edx,[edi-4]
+ xor edx,[esi]
+
+ or dl,dl
+ jnz trfin
+ inc esi
+
+ or dx,dx
+ jnz trfin
+ inc esi
+
+ and edx,0ffffffh
+ jnz trfin
+ inc esi
+
+trfin:
+ sub esi,scan ; esi = len
+trfinval:
+; here we have finised compare, and esi contain len of equal string
+ cmp esi,best_len ; if len > best_len, go newbestlen
+ ja short newbestlen
+; now we restore edx, ecx and esi, for the big loop
+ mov esi,prev
+ mov ecx,limit
+ mov edx,window
+ jmp contloop3
+
+newbestlen:
+ mov best_len,esi ; len become best_len
+
+ mov match_start,eax ; save new position as match_start
+ cmp esi,nice_match ; if best_len >= nice_match, exit
+ jae exitloop
+ mov ecx,scan
+ mov edx,window ; restore edx=window
+ add ecx,esi
+ add esi,edx
+
+ dec esi
+ mov windowlen,esi ; windowlen = window + best_len-1
+ mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
+
+; now we restore ecx and esi, for the big loop :
+ mov esi,prev
+ mov ecx,limit
+ jmp contloop3
+
+exitloop:
+; exit : s->match_start=match_start
+ mov ebx,match_start
+ mov ebp,str_s
+ mov ecx,best_len
+ mov dword ptr [ebp+dep_match_start],ebx
+ mov eax,dword ptr [ebp+dep_lookahead]
+ cmp ecx,eax
+ ja minexlo
+ mov eax,ecx
+minexlo:
+; return min(best_len,s->lookahead)
+
+; restore stack and register ebx,esi,edi,ebp
+ add esp,NbStackAdd
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+InfoAuthor:
+; please don't remove this string !
+; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
+ db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
+
+
+
+IFDEF NOUNDERLINE
+longest_match_7fff endp
+ELSE
+_longest_match_7fff endp
+ENDIF
+
+
+IFDEF NOUNDERLINE
+cpudetect32 proc near
+ELSE
+_cpudetect32 proc near
+ENDIF
+
+ push ebx
+
+ pushfd ; push original EFLAGS
+ pop eax ; get original EFLAGS
+ mov ecx, eax ; save original EFLAGS
+ xor eax, 40000h ; flip AC bit in EFLAGS
+ push eax ; save new EFLAGS value on stack
+ popfd ; replace current EFLAGS value
+ pushfd ; get new EFLAGS
+ pop eax ; store new EFLAGS in EAX
+ xor eax, ecx ; can’t toggle AC bit, processor=80386
+ jz end_cpu_is_386 ; jump if 80386 processor
+ push ecx
+ popfd ; restore AC bit in EFLAGS first
+
+ pushfd
+ pushfd
+ pop ecx
+
+ mov eax, ecx ; get original EFLAGS
+ xor eax, 200000h ; flip ID bit in EFLAGS
+ push eax ; save new EFLAGS value on stack
+ popfd ; replace current EFLAGS value
+ pushfd ; get new EFLAGS
+ pop eax ; store new EFLAGS in EAX
+ popfd ; restore original EFLAGS
+ xor eax, ecx ; can’t toggle ID bit,
+ je is_old_486 ; processor=old
+
+ mov eax,1
+ db 0fh,0a2h ;CPUID
+
+exitcpudetect:
+ pop ebx
+ ret
+
+end_cpu_is_386:
+ mov eax,0300h
+ jmp exitcpudetect
+
+is_old_486:
+ mov eax,0400h
+ jmp exitcpudetect
+
+IFDEF NOUNDERLINE
+cpudetect32 endp
+ELSE
+_cpudetect32 endp
+ENDIF
+ENDIF
+
+MAX_MATCH equ 258
+MIN_MATCH equ 3
+MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
+MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
+
+
+;;; stack frame offsets
+
+chainlenwmask equ esp + 0 ; high word: current chain len
+ ; low word: s->wmask
+window equ esp + 4 ; local copy of s->window
+windowbestlen equ esp + 8 ; s->window + bestlen
+scanstart equ esp + 16 ; first two bytes of string
+scanend equ esp + 12 ; last two bytes of string
+scanalign equ esp + 20 ; dword-misalignment of string
+nicematch equ esp + 24 ; a good enough match size
+bestlen equ esp + 28 ; size of best match so far
+scan equ esp + 32 ; ptr to string wanting match
+
+LocalVarsSize equ 36
+; saved ebx byte esp + 36
+; saved edi byte esp + 40
+; saved esi byte esp + 44
+; saved ebp byte esp + 48
+; return address byte esp + 52
+deflatestate equ esp + 56 ; the function arguments
+curmatch equ esp + 60
+
+;;; Offsets for fields in the deflate_state structure. These numbers
+;;; are calculated from the definition of deflate_state, with the
+;;; assumption that the compiler will dword-align the fields. (Thus,
+;;; changing the definition of deflate_state could easily cause this
+;;; program to crash horribly, without so much as a warning at
+;;; compile time. Sigh.)
+
+dsWSize equ 36+zlib1222add
+dsWMask equ 44+zlib1222add
+dsWindow equ 48+zlib1222add
+dsPrev equ 56+zlib1222add
+dsMatchLen equ 88+zlib1222add
+dsPrevMatch equ 92+zlib1222add
+dsStrStart equ 100+zlib1222add
+dsMatchStart equ 104+zlib1222add
+dsLookahead equ 108+zlib1222add
+dsPrevLen equ 112+zlib1222add
+dsMaxChainLen equ 116+zlib1222add
+dsGoodMatch equ 132+zlib1222add
+dsNiceMatch equ 136+zlib1222add
+
+
+;;; match.asm -- Pentium-Pro-optimized version of longest_match()
+;;; Written for zlib 1.1.2
+;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
+;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
+;;;
+;;; This is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License.
+
+;GLOBAL _longest_match, _match_init
+
+
+;SECTION .text
+
+;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
+
+;_longest_match:
+IFDEF NOOLDPENTIUMCODE
+ IFDEF NOUNDERLINE
+ longest_match proc near
+ ELSE
+ _longest_match proc near
+ ENDIF
+ELSE
+ IFDEF NOUNDERLINE
+ longest_match_686 proc near
+ ELSE
+ _longest_match_686 proc near
+ ENDIF
+ENDIF
+
+;;; Save registers that the compiler may be using, and adjust esp to
+;;; make room for our stack frame.
+
+ push ebp
+ push edi
+ push esi
+ push ebx
+ sub esp, LocalVarsSize
+
+;;; Retrieve the function arguments. ecx will hold cur_match
+;;; throughout the entire function. edx will hold the pointer to the
+;;; deflate_state structure during the function's setup (before
+;;; entering the main loop.
+
+ mov edx, [deflatestate]
+ mov ecx, [curmatch]
+
+;;; uInt wmask = s->w_mask;
+;;; unsigned chain_length = s->max_chain_length;
+;;; if (s->prev_length >= s->good_match) {
+;;; chain_length >>= 2;
+;;; }
+
+ mov eax, [edx + dsPrevLen]
+ mov ebx, [edx + dsGoodMatch]
+ cmp eax, ebx
+ mov eax, [edx + dsWMask]
+ mov ebx, [edx + dsMaxChainLen]
+ jl LastMatchGood
+ shr ebx, 2
+LastMatchGood:
+
+;;; chainlen is decremented once beforehand so that the function can
+;;; use the sign flag instead of the zero flag for the exit test.
+;;; It is then shifted into the high word, to make room for the wmask
+;;; value, which it will always accompany.
+
+ dec ebx
+ shl ebx, 16
+ or ebx, eax
+ mov [chainlenwmask], ebx
+
+;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ mov eax, [edx + dsNiceMatch]
+ mov ebx, [edx + dsLookahead]
+ cmp ebx, eax
+ jl LookaheadLess
+ mov ebx, eax
+LookaheadLess: mov [nicematch], ebx
+
+;;; register Bytef *scan = s->window + s->strstart;
+
+ mov esi, [edx + dsWindow]
+ mov [window], esi
+ mov ebp, [edx + dsStrStart]
+ lea edi, [esi + ebp]
+ mov [scan], edi
+
+;;; Determine how many bytes the scan ptr is off from being
+;;; dword-aligned.
+
+ mov eax, edi
+ neg eax
+ and eax, 3
+ mov [scanalign], eax
+
+;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
+
+ mov eax, [edx + dsWSize]
+ sub eax, MIN_LOOKAHEAD
+ sub ebp, eax
+ jg LimitPositive
+ xor ebp, ebp
+LimitPositive:
+
+;;; int best_len = s->prev_length;
+
+ mov eax, [edx + dsPrevLen]
+ mov [bestlen], eax
+
+;;; Store the sum of s->window + best_len in esi locally, and in esi.
+
+ add esi, eax
+ mov [windowbestlen], esi
+
+;;; register ush scan_start = *(ushf*)scan;
+;;; register ush scan_end = *(ushf*)(scan+best_len-1);
+;;; Posf *prev = s->prev;
+
+ movzx ebx, word ptr [edi]
+ mov [scanstart], ebx
+ movzx ebx, word ptr [edi + eax - 1]
+ mov [scanend], ebx
+ mov edi, [edx + dsPrev]
+
+;;; Jump into the main loop.
+
+ mov edx, [chainlenwmask]
+ jmp short LoopEntry
+
+align 4
+
+;;; do {
+;;; match = s->window + cur_match;
+;;; if (*(ushf*)(match+best_len-1) != scan_end ||
+;;; *(ushf*)match != scan_start) continue;
+;;; [...]
+;;; } while ((cur_match = prev[cur_match & wmask]) > limit
+;;; && --chain_length != 0);
+;;;
+;;; Here is the inner loop of the function. The function will spend the
+;;; majority of its time in this loop, and majority of that time will
+;;; be spent in the first ten instructions.
+;;;
+;;; Within this loop:
+;;; ebx = scanend
+;;; ecx = curmatch
+;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
+;;; esi = windowbestlen - i.e., (window + bestlen)
+;;; edi = prev
+;;; ebp = limit
+
+LookupLoop:
+ and ecx, edx
+ movzx ecx, word ptr [edi + ecx*2]
+ cmp ecx, ebp
+ jbe LeaveNow
+ sub edx, 00010000h
+ js LeaveNow
+LoopEntry: movzx eax, word ptr [esi + ecx - 1]
+ cmp eax, ebx
+ jnz LookupLoop
+ mov eax, [window]
+ movzx eax, word ptr [eax + ecx]
+ cmp eax, [scanstart]
+ jnz LookupLoop
+
+;;; Store the current value of chainlen.
+
+ mov [chainlenwmask], edx
+
+;;; Point edi to the string under scrutiny, and esi to the string we
+;;; are hoping to match it up with. In actuality, esi and edi are
+;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
+;;; initialized to -(MAX_MATCH_8 - scanalign).
+
+ mov esi, [window]
+ mov edi, [scan]
+ add esi, ecx
+ mov eax, [scanalign]
+ mov edx, 0fffffef8h; -(MAX_MATCH_8)
+ lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
+ lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
+
+;;; Test the strings for equality, 8 bytes at a time. At the end,
+;;; adjust edx so that it is offset to the exact byte that mismatched.
+;;;
+;;; We already know at this point that the first three bytes of the
+;;; strings match each other, and they can be safely passed over before
+;;; starting the compare loop. So what this code does is skip over 0-3
+;;; bytes, as much as necessary in order to dword-align the edi
+;;; pointer. (esi will still be misaligned three times out of four.)
+;;;
+;;; It should be confessed that this loop usually does not represent
+;;; much of the total running time. Replacing it with a more
+;;; straightforward "rep cmpsb" would not drastically degrade
+;;; performance.
+
+LoopCmps:
+ mov eax, [esi + edx]
+ xor eax, [edi + edx]
+ jnz LeaveLoopCmps
+ mov eax, [esi + edx + 4]
+ xor eax, [edi + edx + 4]
+ jnz LeaveLoopCmps4
+ add edx, 8
+ jnz LoopCmps
+ jmp short LenMaximum
+LeaveLoopCmps4: add edx, 4
+LeaveLoopCmps: test eax, 0000FFFFh
+ jnz LenLower
+ add edx, 2
+ shr eax, 16
+LenLower: sub al, 1
+ adc edx, 0
+
+;;; Calculate the length of the match. If it is longer than MAX_MATCH,
+;;; then automatically accept it as the best possible match and leave.
+
+ lea eax, [edi + edx]
+ mov edi, [scan]
+ sub eax, edi
+ cmp eax, MAX_MATCH
+ jge LenMaximum
+
+;;; If the length of the match is not longer than the best match we
+;;; have so far, then forget it and return to the lookup loop.
+
+ mov edx, [deflatestate]
+ mov ebx, [bestlen]
+ cmp eax, ebx
+ jg LongerMatch
+ mov esi, [windowbestlen]
+ mov edi, [edx + dsPrev]
+ mov ebx, [scanend]
+ mov edx, [chainlenwmask]
+ jmp LookupLoop
+
+;;; s->match_start = cur_match;
+;;; best_len = len;
+;;; if (len >= nice_match) break;
+;;; scan_end = *(ushf*)(scan+best_len-1);
+
+LongerMatch: mov ebx, [nicematch]
+ mov [bestlen], eax
+ mov [edx + dsMatchStart], ecx
+ cmp eax, ebx
+ jge LeaveNow
+ mov esi, [window]
+ add esi, eax
+ mov [windowbestlen], esi
+ movzx ebx, word ptr [edi + eax - 1]
+ mov edi, [edx + dsPrev]
+ mov [scanend], ebx
+ mov edx, [chainlenwmask]
+ jmp LookupLoop
+
+;;; Accept the current string, with the maximum possible length.
+
+LenMaximum: mov edx, [deflatestate]
+ mov dword ptr [bestlen], MAX_MATCH
+ mov [edx + dsMatchStart], ecx
+
+;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+;;; return s->lookahead;
+
+LeaveNow:
+ mov edx, [deflatestate]
+ mov ebx, [bestlen]
+ mov eax, [edx + dsLookahead]
+ cmp ebx, eax
+ jg LookaheadRet
+ mov eax, ebx
+LookaheadRet:
+
+;;; Restore the stack and return from whence we came.
+
+ add esp, LocalVarsSize
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+
+ ret
+; please don't remove this string !
+; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
+ db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
+
+
+IFDEF NOOLDPENTIUMCODE
+ IFDEF NOUNDERLINE
+ longest_match endp
+ ELSE
+ _longest_match endp
+ ENDIF
+
+ IFDEF NOUNDERLINE
+ match_init proc near
+ ret
+ match_init endp
+ ELSE
+ _match_init proc near
+ ret
+ _match_init endp
+ ENDIF
+ELSE
+ IFDEF NOUNDERLINE
+ longest_match_686 endp
+ ELSE
+ _longest_match_686 endp
+ ENDIF
+ENDIF
+
+_TEXT ends
+end
diff --git a/zlib/contrib/masmx86/gvmat32c.c b/zlib/contrib/masmx86/gvmat32c.c index 9ed25f3432f..7ad2b27943b 100644 --- a/zlib/contrib/masmx86/gvmat32c.c +++ b/zlib/contrib/masmx86/gvmat32c.c @@ -1,206 +1,62 @@ -/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86 - * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. - * File written by Gilles Vollant, by modifiying the longest_match - * from Jean-loup Gailly in deflate.c - * it prepare all parameters and call the assembly longest_match_gvasm - * longest_match execute standard C code is wmask != 0x7fff - * (assembly code is faster with a fixed wmask) - * - */ - -#include "deflate.h" - -#ifdef ASMV -#define NIL 0 - -#define UNALIGNED_OK - - -/* if your C compiler don't add underline before function name, - define ADD_UNDERLINE_ASMFUNC */ -#ifdef ADD_UNDERLINE_ASMFUNC -#define longest_match_7fff _longest_match_7fff -#define longest_match_686 _longest_match_686 -#define cpudetect32 _cpudetect32 -#endif - - - -void match_init() -{ -} - -unsigned long cpudetect32(); - -uInt longest_match_c( - deflate_state *s, - IPos cur_match); /* current match */ - - -uInt longest_match_7fff( - deflate_state *s, - IPos cur_match); /* current match */ - -uInt longest_match_686( - deflate_state *s, - IPos cur_match); /* current match */ - -uInt longest_match( - deflate_state *s, - IPos cur_match) /* current match */ -{ - static uInt iIsPPro=2; - - if ((s->w_mask == 0x7fff) && (iIsPPro==0)) - return longest_match_7fff(s,cur_match); - - if (iIsPPro==1) - return longest_match_686(s,cur_match); - - if (iIsPPro==2) - iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; - - return longest_match_c(s,cur_match); -} - - - -uInt longest_match_c(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ - int best_len = s->prev_length; /* best match length so far */ - int nice_match = s->nice_match; /* stop if match long enough */ - IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - s->strstart - (IPos)MAX_DIST(s) : NIL; - /* Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0. - */ - Posf *prev = s->prev; - uInt wmask = s->w_mask; - -#ifdef UNALIGNED_OK - /* Compare two bytes at a time. Note: this is not always beneficial. - * Try with and without -DUNALIGNED_OK to check. - */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; - register ush scan_start = *(ushf*)scan; - register ush scan_end = *(ushf*)(scan+best_len-1); -#else - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; - register Byte scan_end = scan[best_len]; -#endif - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* Do not waste too much time if we already have a good match: */ - if (s->prev_length >= s->good_match) { - chain_length >>= 2; - } - /* Do not look for matches beyond the end of the input. This is necessary - * to make deflate deterministic. - */ - if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - do { - Assert(cur_match < s->strstart, "no future"); - match = s->window + cur_match; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2: - */ -#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) - /* This code assumes sizeof(unsigned short) == 2. Do not use - * UNALIGNED_OK if your compiler uses a different size. - */ - if (*(ushf*)(match+best_len-1) != scan_end || - *(ushf*)match != scan_start) continue; - - /* It is not necessary to compare scan[2] and match[2] since they are - * always equal when the other bytes match, given that the hash keys - * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient - * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is - * necessary to put more guard bytes at the end of the window, or - * to check more often for insufficient lookahead. - */ - Assert(scan[2] == match[2], "scan[2]?"); - scan++, match++; - do { - } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - scan < strend); - /* The funny "do {}" generates better code on most compilers */ - - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - if (*scan == *match) scan++; - - len = (MAX_MATCH - 1) - (int)(strend-scan); - scan = strend - (MAX_MATCH-1); - -#else /* UNALIGNED_OK */ - - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - scan = strend - MAX_MATCH; - -#endif /* UNALIGNED_OK */ - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) break; -#ifdef UNALIGNED_OK - scan_end = *(ushf*)(scan+best_len-1); -#else - scan_end1 = scan[best_len-1]; - scan_end = scan[best_len]; -#endif - } - } while ((cur_match = prev[cur_match & wmask]) > limit - && --chain_length != 0); - - if ((uInt)best_len <= s->lookahead) return (uInt)best_len; - return s->lookahead; -} - -#endif /* ASMV */ +/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86
+ * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
+ * File written by Gilles Vollant, by modifiying the longest_match
+ * from Jean-loup Gailly in deflate.c
+ * it prepare all parameters and call the assembly longest_match_gvasm
+ * longest_match execute standard C code is wmask != 0x7fff
+ * (assembly code is faster with a fixed wmask)
+ *
+ * Read comment at beginning of gvmat32.asm for more information
+ */
+
+#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE))
+#include "deflate.h"
+
+/* if your C compiler don't add underline before function name,
+ define ADD_UNDERLINE_ASMFUNC */
+#ifdef ADD_UNDERLINE_ASMFUNC
+#define longest_match_7fff _longest_match_7fff
+#define longest_match_686 _longest_match_686
+#define cpudetect32 _cpudetect32
+#endif
+
+
+unsigned long cpudetect32();
+
+uInt longest_match_c(
+ deflate_state *s,
+ IPos cur_match); /* current match */
+
+
+uInt longest_match_7fff(
+ deflate_state *s,
+ IPos cur_match); /* current match */
+
+uInt longest_match_686(
+ deflate_state *s,
+ IPos cur_match); /* current match */
+
+
+static uInt iIsPPro=2;
+
+void match_init ()
+{
+ iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0;
+}
+
+uInt longest_match(
+ deflate_state *s,
+ IPos cur_match) /* current match */
+{
+ if (iIsPPro!=0)
+ return longest_match_686(s,cur_match);
+
+ if (s->w_mask != 0x7fff)
+ return longest_match_686(s,cur_match);
+
+ /* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */
+ return longest_match_7fff(s,cur_match);
+}
+
+
+#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */
diff --git a/zlib/contrib/masmx86/inffas32.asm b/zlib/contrib/masmx86/inffas32.asm index 7d76e1c6600..4a205125ec5 100644 --- a/zlib/contrib/masmx86/inffas32.asm +++ b/zlib/contrib/masmx86/inffas32.asm @@ -1,1033 +1,1083 @@ -; 75 "inffast.S" -;FILE "inffast.S" - -;;;GLOBAL _inflate_fast - -;;;SECTION .text - - - - .586p - .mmx - - name inflate_fast_x86 - .MODEL FLAT - -_DATA segment -inflate_fast_use_mmx: - dd 1 - - -_TEXT segment -PUBLIC _inflate_fast - -ALIGN 4 -_inflate_fast: - jmp inflate_fast_entry - - - -ALIGN 4 - db 'Fast decoding Code from Chris Anderson' - db 0 - -ALIGN 4 -invalid_literal_length_code_msg: - db 'invalid literal/length code' - db 0 - -ALIGN 4 -invalid_distance_code_msg: - db 'invalid distance code' - db 0 - -ALIGN 4 -invalid_distance_too_far_msg: - db 'invalid distance too far back' - db 0 - - -ALIGN 4 -inflate_fast_mask: -dd 0 -dd 1 -dd 3 -dd 7 -dd 15 -dd 31 -dd 63 -dd 127 -dd 255 -dd 511 -dd 1023 -dd 2047 -dd 4095 -dd 8191 -dd 16383 -dd 32767 -dd 65535 -dd 131071 -dd 262143 -dd 524287 -dd 1048575 -dd 2097151 -dd 4194303 -dd 8388607 -dd 16777215 -dd 33554431 -dd 67108863 -dd 134217727 -dd 268435455 -dd 536870911 -dd 1073741823 -dd 2147483647 -dd 4294967295 - - - -mode_state equ 0 ;/* state->mode */ -wsize_state equ 32 ;/* state->wsize */ -write_state equ (36+4) ;/* state->write */ -window_state equ (40+4) ;/* state->window */ -hold_state equ (44+4) ;/* state->hold */ -bits_state equ (48+4) ;/* state->bits */ -lencode_state equ (64+4) ;/* state->lencode */ -distcode_state equ (68+4) ;/* state->distcode */ -lenbits_state equ (72+4) ;/* state->lenbits */ -distbits_state equ (76+4) ;/* state->distbits */ - - -;;SECTION .text -; 205 "inffast.S" -;GLOBAL inflate_fast_use_mmx - -;SECTION .data - - -; GLOBAL inflate_fast_use_mmx:object -;.size inflate_fast_use_mmx, 4 -; 226 "inffast.S" -;SECTION .text - -ALIGN 4 -inflate_fast_entry: - push edi - push esi - push ebp - push ebx - pushfd - sub esp,64 - cld - - - - - mov esi, [esp+88] - mov edi, [esi+28] - - - - - - - - mov edx, [esi+4] - mov eax, [esi+0] - - add edx,eax - sub edx,11 - - mov [esp+44],eax - mov [esp+20],edx - - mov ebp, [esp+92] - mov ecx, [esi+16] - mov ebx, [esi+12] - - sub ebp,ecx - neg ebp - add ebp,ebx - - sub ecx,257 - add ecx,ebx - - mov [esp+60],ebx - mov [esp+40],ebp - mov [esp+16],ecx -; 285 "inffast.S" - mov eax, [edi+lencode_state] - mov ecx, [edi+distcode_state] - - mov [esp+8],eax - mov [esp+12],ecx - - mov eax,1 - mov ecx, [edi+lenbits_state] - shl eax,cl - dec eax - mov [esp+0],eax - - mov eax,1 - mov ecx, [edi+distbits_state] - shl eax,cl - dec eax - mov [esp+4],eax - - mov eax, [edi+wsize_state] - mov ecx, [edi+write_state] - mov edx, [edi+window_state] - - mov [esp+52],eax - mov [esp+48],ecx - mov [esp+56],edx - - mov ebp, [edi+hold_state] - mov ebx, [edi+bits_state] -; 321 "inffast.S" - mov esi, [esp+44] - mov ecx, [esp+20] - cmp ecx,esi - ja L_align_long - - add ecx,11 - sub ecx,esi - mov eax,12 - sub eax,ecx - lea edi, [esp+28] - rep movsb - mov ecx,eax - xor eax,eax - rep stosb - lea esi, [esp+28] - mov [esp+20],esi - jmp L_is_aligned - - -L_align_long: - test esi,3 - jz L_is_aligned - xor eax,eax - mov al, [esi] - inc esi - mov ecx,ebx - add ebx,8 - shl eax,cl - or ebp,eax - jmp L_align_long - -L_is_aligned: - mov edi, [esp+60] -; 366 "inffast.S" -L_check_mmx: - cmp dword ptr [inflate_fast_use_mmx],2 - je L_init_mmx - ja L_do_loop - - push eax - push ebx - push ecx - push edx - pushfd - mov eax, [esp] - xor dword ptr [esp],0200000h - - - - - popfd - pushfd - pop edx - xor edx,eax - jz L_dont_use_mmx - xor eax,eax - cpuid - cmp ebx,0756e6547h - jne L_dont_use_mmx - cmp ecx,06c65746eh - jne L_dont_use_mmx - cmp edx,049656e69h - jne L_dont_use_mmx - mov eax,1 - cpuid - shr eax,8 - and eax,15 - cmp eax,6 - jne L_dont_use_mmx - test edx,0800000h - jnz L_use_mmx - jmp L_dont_use_mmx -L_use_mmx: - mov dword ptr [inflate_fast_use_mmx],2 - jmp L_check_mmx_pop -L_dont_use_mmx: - mov dword ptr [inflate_fast_use_mmx],3 -L_check_mmx_pop: - pop edx - pop ecx - pop ebx - pop eax - jmp L_check_mmx -; 426 "inffast.S" -ALIGN 4 -L_do_loop: -; 437 "inffast.S" - cmp bl,15 - ja L_get_length_code - - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - -L_get_length_code: - mov edx, [esp+0] - mov ecx, [esp+8] - and edx,ebp - mov eax, [ecx+edx*4] - -L_dolen: - - - - - - - mov cl,ah - sub bl,ah - shr ebp,cl - - - - - - - test al,al - jnz L_test_for_length_base - - shr eax,16 - stosb - -L_while_test: - - - cmp [esp+16],edi - jbe L_break_loop - - cmp [esp+20],esi - ja L_do_loop - jmp L_break_loop - -L_test_for_length_base: -; 502 "inffast.S" - mov edx,eax - shr edx,16 - mov cl,al - - test al,16 - jz L_test_for_second_level_length - and cl,15 - jz L_save_len - cmp bl,cl - jae L_add_bits_to_len - - mov ch,cl - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - mov cl,ch - -L_add_bits_to_len: - mov eax,1 - shl eax,cl - dec eax - sub bl,cl - and eax,ebp - shr ebp,cl - add edx,eax - -L_save_len: - mov [esp+24],edx - - -L_decode_distance: -; 549 "inffast.S" - cmp bl,15 - ja L_get_distance_code - - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - -L_get_distance_code: - mov edx, [esp+4] - mov ecx, [esp+12] - and edx,ebp - mov eax, [ecx+edx*4] - - -L_dodist: - mov edx,eax - shr edx,16 - mov cl,ah - sub bl,ah - shr ebp,cl -; 584 "inffast.S" - mov cl,al - - test al,16 - jz L_test_for_second_level_dist - and cl,15 - jz L_check_dist_one - cmp bl,cl - jae L_add_bits_to_dist - - mov ch,cl - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - mov cl,ch - -L_add_bits_to_dist: - mov eax,1 - shl eax,cl - dec eax - sub bl,cl - and eax,ebp - shr ebp,cl - add edx,eax - jmp L_check_window - -L_check_window: -; 625 "inffast.S" - mov [esp+44],esi - mov eax,edi - sub eax, [esp+40] - - cmp eax,edx - jb L_clip_window - - mov ecx, [esp+24] - mov esi,edi - sub esi,edx - - sub ecx,3 - mov al, [esi] - mov [edi],al - mov al, [esi+1] - mov dl, [esi+2] - add esi,3 - mov [edi+1],al - mov [edi+2],dl - add edi,3 - rep movsb - - mov esi, [esp+44] - jmp L_while_test - -ALIGN 4 -L_check_dist_one: - cmp edx,1 - jne L_check_window - cmp [esp+40],edi - je L_check_window - - dec edi - mov ecx, [esp+24] - mov al, [edi] - sub ecx,3 - - mov [edi+1],al - mov [edi+2],al - mov [edi+3],al - add edi,4 - rep stosb - - jmp L_while_test - -ALIGN 4 -L_test_for_second_level_length: - - - - - test al,64 - jnz L_test_for_end_of_block - - mov eax,1 - shl eax,cl - dec eax - and eax,ebp - add eax,edx - mov edx, [esp+8] - mov eax, [edx+eax*4] - jmp L_dolen - -ALIGN 4 -L_test_for_second_level_dist: - - - - - test al,64 - jnz L_invalid_distance_code - - mov eax,1 - shl eax,cl - dec eax - and eax,ebp - add eax,edx - mov edx, [esp+12] - mov eax, [edx+eax*4] - jmp L_dodist - -ALIGN 4 -L_clip_window: -; 721 "inffast.S" - mov ecx,eax - mov eax, [esp+52] - neg ecx - mov esi, [esp+56] - - cmp eax,edx - jb L_invalid_distance_too_far - - add ecx,edx - cmp dword ptr [esp+48],0 - jne L_wrap_around_window - - sub eax,ecx - add esi,eax -; 749 "inffast.S" - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - -L_wrap_around_window: -; 793 "inffast.S" - mov eax, [esp+48] - cmp ecx,eax - jbe L_contiguous_in_window - - add esi, [esp+52] - add esi,eax - sub esi,ecx - sub ecx,eax - - - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi, [esp+56] - mov ecx, [esp+48] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - -L_contiguous_in_window: -; 836 "inffast.S" - add esi,eax - sub esi,ecx - - - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - -L_do_copy1: -; 862 "inffast.S" - mov ecx,eax - rep movsb - - mov esi, [esp+44] - jmp L_while_test -; 878 "inffast.S" -ALIGN 4 -L_init_mmx: - emms - - - - - - movd mm0,ebp - mov ebp,ebx -; 896 "inffast.S" - movd mm4,[esp+0] - movq mm3,mm4 - movd mm5,[esp+4] - movq mm2,mm5 - pxor mm1,mm1 - mov ebx, [esp+8] - jmp L_do_loop_mmx - -ALIGN 4 -L_do_loop_mmx: - psrlq mm0,mm1 - - cmp ebp,32 - ja L_get_length_code_mmx - - movd mm6,ebp - movd mm7,[esi] - add esi,4 - psllq mm7,mm6 - add ebp,32 - por mm0,mm7 - -L_get_length_code_mmx: - pand mm4,mm0 - movd eax,mm4 - movq mm4,mm3 - mov eax, [ebx+eax*4] - -L_dolen_mmx: - movzx ecx,ah - movd mm1,ecx - sub ebp,ecx - - test al,al - jnz L_test_for_length_base_mmx - - shr eax,16 - stosb - -L_while_test_mmx: - - - cmp [esp+16],edi - jbe L_break_loop - - cmp [esp+20],esi - ja L_do_loop_mmx - jmp L_break_loop - -L_test_for_length_base_mmx: - - mov edx,eax - shr edx,16 - - test al,16 - jz L_test_for_second_level_length_mmx - and eax,15 - jz L_decode_distance_mmx - - psrlq mm0,mm1 - movd mm1,eax - movd ecx,mm0 - sub ebp,eax - and ecx, [inflate_fast_mask+eax*4] - add edx,ecx - -L_decode_distance_mmx: - psrlq mm0,mm1 - - cmp ebp,32 - ja L_get_dist_code_mmx - - movd mm6,ebp - movd mm7,[esi] - add esi,4 - psllq mm7,mm6 - add ebp,32 - por mm0,mm7 - -L_get_dist_code_mmx: - mov ebx, [esp+12] - pand mm5,mm0 - movd eax,mm5 - movq mm5,mm2 - mov eax, [ebx+eax*4] - -L_dodist_mmx: - - movzx ecx,ah - mov ebx,eax - shr ebx,16 - sub ebp,ecx - movd mm1,ecx - - test al,16 - jz L_test_for_second_level_dist_mmx - and eax,15 - jz L_check_dist_one_mmx - -L_add_bits_to_dist_mmx: - psrlq mm0,mm1 - movd mm1,eax - movd ecx,mm0 - sub ebp,eax - and ecx, [inflate_fast_mask+eax*4] - add ebx,ecx - -L_check_window_mmx: - mov [esp+44],esi - mov eax,edi - sub eax, [esp+40] - - cmp eax,ebx - jb L_clip_window_mmx - - mov ecx,edx - mov esi,edi - sub esi,ebx - - sub ecx,3 - mov al, [esi] - mov [edi],al - mov al, [esi+1] - mov dl, [esi+2] - add esi,3 - mov [edi+1],al - mov [edi+2],dl - add edi,3 - rep movsb - - mov esi, [esp+44] - mov ebx, [esp+8] - jmp L_while_test_mmx - -ALIGN 4 -L_check_dist_one_mmx: - cmp ebx,1 - jne L_check_window_mmx - cmp [esp+40],edi - je L_check_window_mmx - - dec edi - mov ecx,edx - mov al, [edi] - sub ecx,3 - - mov [edi+1],al - mov [edi+2],al - mov [edi+3],al - add edi,4 - rep stosb - - mov ebx, [esp+8] - jmp L_while_test_mmx - -ALIGN 4 -L_test_for_second_level_length_mmx: - test al,64 - jnz L_test_for_end_of_block - - and eax,15 - psrlq mm0,mm1 - movd ecx,mm0 - and ecx, [inflate_fast_mask+eax*4] - add ecx,edx - mov eax, [ebx+ecx*4] - jmp L_dolen_mmx - -ALIGN 4 -L_test_for_second_level_dist_mmx: - test al,64 - jnz L_invalid_distance_code - - and eax,15 - psrlq mm0,mm1 - movd ecx,mm0 - and ecx, [inflate_fast_mask+eax*4] - mov eax, [esp+12] - add ecx,ebx - mov eax, [eax+ecx*4] - jmp L_dodist_mmx - -ALIGN 4 -L_clip_window_mmx: - - mov ecx,eax - mov eax, [esp+52] - neg ecx - mov esi, [esp+56] - - cmp eax,ebx - jb L_invalid_distance_too_far - - add ecx,ebx - cmp dword ptr [esp+48],0 - jne L_wrap_around_window_mmx - - sub eax,ecx - add esi,eax - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - -L_wrap_around_window_mmx: - - mov eax, [esp+48] - cmp ecx,eax - jbe L_contiguous_in_window_mmx - - add esi, [esp+52] - add esi,eax - sub esi,ecx - sub ecx,eax - - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi, [esp+56] - mov ecx, [esp+48] - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - -L_contiguous_in_window_mmx: - - add esi,eax - sub esi,ecx - - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - -L_do_copy1_mmx: - - - mov ecx,edx - rep movsb - - mov esi, [esp+44] - mov ebx, [esp+8] - jmp L_while_test_mmx -; 1174 "inffast.S" -L_invalid_distance_code: - - - - - - mov ecx, invalid_distance_code_msg - mov edx,26 - jmp L_update_stream_state - -L_test_for_end_of_block: - - - - - - test al,32 - jz L_invalid_literal_length_code - - mov ecx,0 - mov edx,11 - jmp L_update_stream_state - -L_invalid_literal_length_code: - - - - - - mov ecx, invalid_literal_length_code_msg - mov edx,26 - jmp L_update_stream_state - -L_invalid_distance_too_far: - - - - mov esi, [esp+44] - mov ecx, invalid_distance_too_far_msg - mov edx,26 - jmp L_update_stream_state - -L_update_stream_state: - - mov eax, [esp+88] - test ecx,ecx - jz L_skip_msg - mov [eax+24],ecx -L_skip_msg: - mov eax, [eax+28] - mov [eax+mode_state],edx - jmp L_break_loop - -ALIGN 4 -L_break_loop: -; 1243 "inffast.S" - cmp dword ptr [inflate_fast_use_mmx],2 - jne L_update_next_in - - - - mov ebx,ebp - -L_update_next_in: -; 1266 "inffast.S" - mov eax, [esp+88] - mov ecx,ebx - mov edx, [eax+28] - shr ecx,3 - sub esi,ecx - shl ecx,3 - sub ebx,ecx - mov [eax+12],edi - mov [edx+bits_state],ebx - mov ecx,ebx - - lea ebx, [esp+28] - cmp [esp+20],ebx - jne L_buf_not_used - - sub esi,ebx - mov ebx, [eax+0] - mov [esp+20],ebx - add esi,ebx - mov ebx, [eax+4] - sub ebx,11 - add [esp+20],ebx - -L_buf_not_used: - mov [eax+0],esi - - mov ebx,1 - shl ebx,cl - dec ebx - - - - - - cmp dword ptr [inflate_fast_use_mmx],2 - jne L_update_hold - - - - psrlq mm0,mm1 - movd ebp,mm0 - - emms - -L_update_hold: - - - - and ebp,ebx - mov [edx+hold_state],ebp - - - - - mov ebx, [esp+20] - cmp ebx,esi - jbe L_last_is_smaller - - sub ebx,esi - add ebx,11 - mov [eax+4],ebx - jmp L_fixup_out -L_last_is_smaller: - sub esi,ebx - neg esi - add esi,11 - mov [eax+4],esi - - - - -L_fixup_out: - - mov ebx, [esp+16] - cmp ebx,edi - jbe L_end_is_smaller - - sub ebx,edi - add ebx,257 - mov [eax+16],ebx - jmp L_done -L_end_is_smaller: - sub edi,ebx - neg edi - add edi,257 - mov [eax+16],edi - - - - - -L_done: - add esp,64 - popfd - pop ebx - pop ebp - pop esi - pop edi - ret - - - - -_TEXT ends -end +;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
+; *
+; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
+; *
+; * Copyright (C) 1995-2003 Mark Adler
+; * For conditions of distribution and use, see copyright notice in zlib.h
+; *
+; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
+; * Please use the copyright conditions above.
+; *
+; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
+; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
+; * the moment. I have successfully compiled and tested this code with gcc2.96,
+; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
+; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
+; * enabled. I will attempt to merge the MMX code into this version. Newer
+; * versions of this and inffast.S can be found at
+; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
+; *
+; * 2005 : modification by Gilles Vollant
+; */
+; For Visual C++ 4.x and higher and ML 6.x and higher
+; ml.exe is in directory \MASM611C of Win95 DDK
+; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
+; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
+;
+;
+; compile with command line option
+; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
+
+; if you define NO_GZIP (see inflate.h), compile with
+; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
+
+
+; zlib122sup is 0 fort zlib 1.2.2.1 and lower
+; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
+; in inflate_state in inflate.h)
+zlib1222sup equ 8
+
+
+IFDEF GUNZIP
+ INFLATE_MODE_TYPE equ 11
+ INFLATE_MODE_BAD equ 26
+ELSE
+ IFNDEF NO_GUNZIP
+ INFLATE_MODE_TYPE equ 11
+ INFLATE_MODE_BAD equ 26
+ ELSE
+ INFLATE_MODE_TYPE equ 3
+ INFLATE_MODE_BAD equ 17
+ ENDIF
+ENDIF
+
+
+; 75 "inffast.S"
+;FILE "inffast.S"
+
+;;;GLOBAL _inflate_fast
+
+;;;SECTION .text
+
+
+
+ .586p
+ .mmx
+
+ name inflate_fast_x86
+ .MODEL FLAT
+
+_DATA segment
+inflate_fast_use_mmx:
+ dd 1
+
+
+_TEXT segment
+PUBLIC _inflate_fast
+
+ALIGN 4
+_inflate_fast:
+ jmp inflate_fast_entry
+
+
+
+ALIGN 4
+ db 'Fast decoding Code from Chris Anderson'
+ db 0
+
+ALIGN 4
+invalid_literal_length_code_msg:
+ db 'invalid literal/length code'
+ db 0
+
+ALIGN 4
+invalid_distance_code_msg:
+ db 'invalid distance code'
+ db 0
+
+ALIGN 4
+invalid_distance_too_far_msg:
+ db 'invalid distance too far back'
+ db 0
+
+
+ALIGN 4
+inflate_fast_mask:
+dd 0
+dd 1
+dd 3
+dd 7
+dd 15
+dd 31
+dd 63
+dd 127
+dd 255
+dd 511
+dd 1023
+dd 2047
+dd 4095
+dd 8191
+dd 16383
+dd 32767
+dd 65535
+dd 131071
+dd 262143
+dd 524287
+dd 1048575
+dd 2097151
+dd 4194303
+dd 8388607
+dd 16777215
+dd 33554431
+dd 67108863
+dd 134217727
+dd 268435455
+dd 536870911
+dd 1073741823
+dd 2147483647
+dd 4294967295
+
+
+mode_state equ 0 ;/* state->mode */
+wsize_state equ (32+zlib1222sup) ;/* state->wsize */
+write_state equ (36+4+zlib1222sup) ;/* state->write */
+window_state equ (40+4+zlib1222sup) ;/* state->window */
+hold_state equ (44+4+zlib1222sup) ;/* state->hold */
+bits_state equ (48+4+zlib1222sup) ;/* state->bits */
+lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
+distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
+lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
+distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
+
+
+;;SECTION .text
+; 205 "inffast.S"
+;GLOBAL inflate_fast_use_mmx
+
+;SECTION .data
+
+
+; GLOBAL inflate_fast_use_mmx:object
+;.size inflate_fast_use_mmx, 4
+; 226 "inffast.S"
+;SECTION .text
+
+ALIGN 4
+inflate_fast_entry:
+ push edi
+ push esi
+ push ebp
+ push ebx
+ pushfd
+ sub esp,64
+ cld
+
+
+
+
+ mov esi, [esp+88]
+ mov edi, [esi+28]
+
+
+
+
+
+
+
+ mov edx, [esi+4]
+ mov eax, [esi+0]
+
+ add edx,eax
+ sub edx,11
+
+ mov [esp+44],eax
+ mov [esp+20],edx
+
+ mov ebp, [esp+92]
+ mov ecx, [esi+16]
+ mov ebx, [esi+12]
+
+ sub ebp,ecx
+ neg ebp
+ add ebp,ebx
+
+ sub ecx,257
+ add ecx,ebx
+
+ mov [esp+60],ebx
+ mov [esp+40],ebp
+ mov [esp+16],ecx
+; 285 "inffast.S"
+ mov eax, [edi+lencode_state]
+ mov ecx, [edi+distcode_state]
+
+ mov [esp+8],eax
+ mov [esp+12],ecx
+
+ mov eax,1
+ mov ecx, [edi+lenbits_state]
+ shl eax,cl
+ dec eax
+ mov [esp+0],eax
+
+ mov eax,1
+ mov ecx, [edi+distbits_state]
+ shl eax,cl
+ dec eax
+ mov [esp+4],eax
+
+ mov eax, [edi+wsize_state]
+ mov ecx, [edi+write_state]
+ mov edx, [edi+window_state]
+
+ mov [esp+52],eax
+ mov [esp+48],ecx
+ mov [esp+56],edx
+
+ mov ebp, [edi+hold_state]
+ mov ebx, [edi+bits_state]
+; 321 "inffast.S"
+ mov esi, [esp+44]
+ mov ecx, [esp+20]
+ cmp ecx,esi
+ ja L_align_long
+
+ add ecx,11
+ sub ecx,esi
+ mov eax,12
+ sub eax,ecx
+ lea edi, [esp+28]
+ rep movsb
+ mov ecx,eax
+ xor eax,eax
+ rep stosb
+ lea esi, [esp+28]
+ mov [esp+20],esi
+ jmp L_is_aligned
+
+
+L_align_long:
+ test esi,3
+ jz L_is_aligned
+ xor eax,eax
+ mov al, [esi]
+ inc esi
+ mov ecx,ebx
+ add ebx,8
+ shl eax,cl
+ or ebp,eax
+ jmp L_align_long
+
+L_is_aligned:
+ mov edi, [esp+60]
+; 366 "inffast.S"
+L_check_mmx:
+ cmp dword ptr [inflate_fast_use_mmx],2
+ je L_init_mmx
+ ja L_do_loop
+
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ mov eax, [esp]
+ xor dword ptr [esp],0200000h
+
+
+
+
+ popfd
+ pushfd
+ pop edx
+ xor edx,eax
+ jz L_dont_use_mmx
+ xor eax,eax
+ cpuid
+ cmp ebx,0756e6547h
+ jne L_dont_use_mmx
+ cmp ecx,06c65746eh
+ jne L_dont_use_mmx
+ cmp edx,049656e69h
+ jne L_dont_use_mmx
+ mov eax,1
+ cpuid
+ shr eax,8
+ and eax,15
+ cmp eax,6
+ jne L_dont_use_mmx
+ test edx,0800000h
+ jnz L_use_mmx
+ jmp L_dont_use_mmx
+L_use_mmx:
+ mov dword ptr [inflate_fast_use_mmx],2
+ jmp L_check_mmx_pop
+L_dont_use_mmx:
+ mov dword ptr [inflate_fast_use_mmx],3
+L_check_mmx_pop:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ jmp L_check_mmx
+; 426 "inffast.S"
+ALIGN 4
+L_do_loop:
+; 437 "inffast.S"
+ cmp bl,15
+ ja L_get_length_code
+
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+
+L_get_length_code:
+ mov edx, [esp+0]
+ mov ecx, [esp+8]
+ and edx,ebp
+ mov eax, [ecx+edx*4]
+
+L_dolen:
+
+
+
+
+
+
+ mov cl,ah
+ sub bl,ah
+ shr ebp,cl
+
+
+
+
+
+
+ test al,al
+ jnz L_test_for_length_base
+
+ shr eax,16
+ stosb
+
+L_while_test:
+
+
+ cmp [esp+16],edi
+ jbe L_break_loop
+
+ cmp [esp+20],esi
+ ja L_do_loop
+ jmp L_break_loop
+
+L_test_for_length_base:
+; 502 "inffast.S"
+ mov edx,eax
+ shr edx,16
+ mov cl,al
+
+ test al,16
+ jz L_test_for_second_level_length
+ and cl,15
+ jz L_save_len
+ cmp bl,cl
+ jae L_add_bits_to_len
+
+ mov ch,cl
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+ mov cl,ch
+
+L_add_bits_to_len:
+ mov eax,1
+ shl eax,cl
+ dec eax
+ sub bl,cl
+ and eax,ebp
+ shr ebp,cl
+ add edx,eax
+
+L_save_len:
+ mov [esp+24],edx
+
+
+L_decode_distance:
+; 549 "inffast.S"
+ cmp bl,15
+ ja L_get_distance_code
+
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+
+L_get_distance_code:
+ mov edx, [esp+4]
+ mov ecx, [esp+12]
+ and edx,ebp
+ mov eax, [ecx+edx*4]
+
+
+L_dodist:
+ mov edx,eax
+ shr edx,16
+ mov cl,ah
+ sub bl,ah
+ shr ebp,cl
+; 584 "inffast.S"
+ mov cl,al
+
+ test al,16
+ jz L_test_for_second_level_dist
+ and cl,15
+ jz L_check_dist_one
+ cmp bl,cl
+ jae L_add_bits_to_dist
+
+ mov ch,cl
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+ mov cl,ch
+
+L_add_bits_to_dist:
+ mov eax,1
+ shl eax,cl
+ dec eax
+ sub bl,cl
+ and eax,ebp
+ shr ebp,cl
+ add edx,eax
+ jmp L_check_window
+
+L_check_window:
+; 625 "inffast.S"
+ mov [esp+44],esi
+ mov eax,edi
+ sub eax, [esp+40]
+
+ cmp eax,edx
+ jb L_clip_window
+
+ mov ecx, [esp+24]
+ mov esi,edi
+ sub esi,edx
+
+ sub ecx,3
+ mov al, [esi]
+ mov [edi],al
+ mov al, [esi+1]
+ mov dl, [esi+2]
+ add esi,3
+ mov [edi+1],al
+ mov [edi+2],dl
+ add edi,3
+ rep movsb
+
+ mov esi, [esp+44]
+ jmp L_while_test
+
+ALIGN 4
+L_check_dist_one:
+ cmp edx,1
+ jne L_check_window
+ cmp [esp+40],edi
+ je L_check_window
+
+ dec edi
+ mov ecx, [esp+24]
+ mov al, [edi]
+ sub ecx,3
+
+ mov [edi+1],al
+ mov [edi+2],al
+ mov [edi+3],al
+ add edi,4
+ rep stosb
+
+ jmp L_while_test
+
+ALIGN 4
+L_test_for_second_level_length:
+
+
+
+
+ test al,64
+ jnz L_test_for_end_of_block
+
+ mov eax,1
+ shl eax,cl
+ dec eax
+ and eax,ebp
+ add eax,edx
+ mov edx, [esp+8]
+ mov eax, [edx+eax*4]
+ jmp L_dolen
+
+ALIGN 4
+L_test_for_second_level_dist:
+
+
+
+
+ test al,64
+ jnz L_invalid_distance_code
+
+ mov eax,1
+ shl eax,cl
+ dec eax
+ and eax,ebp
+ add eax,edx
+ mov edx, [esp+12]
+ mov eax, [edx+eax*4]
+ jmp L_dodist
+
+ALIGN 4
+L_clip_window:
+; 721 "inffast.S"
+ mov ecx,eax
+ mov eax, [esp+52]
+ neg ecx
+ mov esi, [esp+56]
+
+ cmp eax,edx
+ jb L_invalid_distance_too_far
+
+ add ecx,edx
+ cmp dword ptr [esp+48],0
+ jne L_wrap_around_window
+
+ sub eax,ecx
+ add esi,eax
+; 749 "inffast.S"
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+L_wrap_around_window:
+; 793 "inffast.S"
+ mov eax, [esp+48]
+ cmp ecx,eax
+ jbe L_contiguous_in_window
+
+ add esi, [esp+52]
+ add esi,eax
+ sub esi,ecx
+ sub ecx,eax
+
+
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi, [esp+56]
+ mov ecx, [esp+48]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+L_contiguous_in_window:
+; 836 "inffast.S"
+ add esi,eax
+ sub esi,ecx
+
+
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+
+L_do_copy1:
+; 862 "inffast.S"
+ mov ecx,eax
+ rep movsb
+
+ mov esi, [esp+44]
+ jmp L_while_test
+; 878 "inffast.S"
+ALIGN 4
+L_init_mmx:
+ emms
+
+
+
+
+
+ movd mm0,ebp
+ mov ebp,ebx
+; 896 "inffast.S"
+ movd mm4,[esp+0]
+ movq mm3,mm4
+ movd mm5,[esp+4]
+ movq mm2,mm5
+ pxor mm1,mm1
+ mov ebx, [esp+8]
+ jmp L_do_loop_mmx
+
+ALIGN 4
+L_do_loop_mmx:
+ psrlq mm0,mm1
+
+ cmp ebp,32
+ ja L_get_length_code_mmx
+
+ movd mm6,ebp
+ movd mm7,[esi]
+ add esi,4
+ psllq mm7,mm6
+ add ebp,32
+ por mm0,mm7
+
+L_get_length_code_mmx:
+ pand mm4,mm0
+ movd eax,mm4
+ movq mm4,mm3
+ mov eax, [ebx+eax*4]
+
+L_dolen_mmx:
+ movzx ecx,ah
+ movd mm1,ecx
+ sub ebp,ecx
+
+ test al,al
+ jnz L_test_for_length_base_mmx
+
+ shr eax,16
+ stosb
+
+L_while_test_mmx:
+
+
+ cmp [esp+16],edi
+ jbe L_break_loop
+
+ cmp [esp+20],esi
+ ja L_do_loop_mmx
+ jmp L_break_loop
+
+L_test_for_length_base_mmx:
+
+ mov edx,eax
+ shr edx,16
+
+ test al,16
+ jz L_test_for_second_level_length_mmx
+ and eax,15
+ jz L_decode_distance_mmx
+
+ psrlq mm0,mm1
+ movd mm1,eax
+ movd ecx,mm0
+ sub ebp,eax
+ and ecx, [inflate_fast_mask+eax*4]
+ add edx,ecx
+
+L_decode_distance_mmx:
+ psrlq mm0,mm1
+
+ cmp ebp,32
+ ja L_get_dist_code_mmx
+
+ movd mm6,ebp
+ movd mm7,[esi]
+ add esi,4
+ psllq mm7,mm6
+ add ebp,32
+ por mm0,mm7
+
+L_get_dist_code_mmx:
+ mov ebx, [esp+12]
+ pand mm5,mm0
+ movd eax,mm5
+ movq mm5,mm2
+ mov eax, [ebx+eax*4]
+
+L_dodist_mmx:
+
+ movzx ecx,ah
+ mov ebx,eax
+ shr ebx,16
+ sub ebp,ecx
+ movd mm1,ecx
+
+ test al,16
+ jz L_test_for_second_level_dist_mmx
+ and eax,15
+ jz L_check_dist_one_mmx
+
+L_add_bits_to_dist_mmx:
+ psrlq mm0,mm1
+ movd mm1,eax
+ movd ecx,mm0
+ sub ebp,eax
+ and ecx, [inflate_fast_mask+eax*4]
+ add ebx,ecx
+
+L_check_window_mmx:
+ mov [esp+44],esi
+ mov eax,edi
+ sub eax, [esp+40]
+
+ cmp eax,ebx
+ jb L_clip_window_mmx
+
+ mov ecx,edx
+ mov esi,edi
+ sub esi,ebx
+
+ sub ecx,3
+ mov al, [esi]
+ mov [edi],al
+ mov al, [esi+1]
+ mov dl, [esi+2]
+ add esi,3
+ mov [edi+1],al
+ mov [edi+2],dl
+ add edi,3
+ rep movsb
+
+ mov esi, [esp+44]
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+
+ALIGN 4
+L_check_dist_one_mmx:
+ cmp ebx,1
+ jne L_check_window_mmx
+ cmp [esp+40],edi
+ je L_check_window_mmx
+
+ dec edi
+ mov ecx,edx
+ mov al, [edi]
+ sub ecx,3
+
+ mov [edi+1],al
+ mov [edi+2],al
+ mov [edi+3],al
+ add edi,4
+ rep stosb
+
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+
+ALIGN 4
+L_test_for_second_level_length_mmx:
+ test al,64
+ jnz L_test_for_end_of_block
+
+ and eax,15
+ psrlq mm0,mm1
+ movd ecx,mm0
+ and ecx, [inflate_fast_mask+eax*4]
+ add ecx,edx
+ mov eax, [ebx+ecx*4]
+ jmp L_dolen_mmx
+
+ALIGN 4
+L_test_for_second_level_dist_mmx:
+ test al,64
+ jnz L_invalid_distance_code
+
+ and eax,15
+ psrlq mm0,mm1
+ movd ecx,mm0
+ and ecx, [inflate_fast_mask+eax*4]
+ mov eax, [esp+12]
+ add ecx,ebx
+ mov eax, [eax+ecx*4]
+ jmp L_dodist_mmx
+
+ALIGN 4
+L_clip_window_mmx:
+
+ mov ecx,eax
+ mov eax, [esp+52]
+ neg ecx
+ mov esi, [esp+56]
+
+ cmp eax,ebx
+ jb L_invalid_distance_too_far
+
+ add ecx,ebx
+ cmp dword ptr [esp+48],0
+ jne L_wrap_around_window_mmx
+
+ sub eax,ecx
+ add esi,eax
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+L_wrap_around_window_mmx:
+
+ mov eax, [esp+48]
+ cmp ecx,eax
+ jbe L_contiguous_in_window_mmx
+
+ add esi, [esp+52]
+ add esi,eax
+ sub esi,ecx
+ sub ecx,eax
+
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi, [esp+56]
+ mov ecx, [esp+48]
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+L_contiguous_in_window_mmx:
+
+ add esi,eax
+ sub esi,ecx
+
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+
+L_do_copy1_mmx:
+
+
+ mov ecx,edx
+ rep movsb
+
+ mov esi, [esp+44]
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+; 1174 "inffast.S"
+L_invalid_distance_code:
+
+
+
+
+
+ mov ecx, invalid_distance_code_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_test_for_end_of_block:
+
+
+
+
+
+ test al,32
+ jz L_invalid_literal_length_code
+
+ mov ecx,0
+ mov edx,INFLATE_MODE_TYPE
+ jmp L_update_stream_state
+
+L_invalid_literal_length_code:
+
+
+
+
+
+ mov ecx, invalid_literal_length_code_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_invalid_distance_too_far:
+
+
+
+ mov esi, [esp+44]
+ mov ecx, invalid_distance_too_far_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_update_stream_state:
+
+ mov eax, [esp+88]
+ test ecx,ecx
+ jz L_skip_msg
+ mov [eax+24],ecx
+L_skip_msg:
+ mov eax, [eax+28]
+ mov [eax+mode_state],edx
+ jmp L_break_loop
+
+ALIGN 4
+L_break_loop:
+; 1243 "inffast.S"
+ cmp dword ptr [inflate_fast_use_mmx],2
+ jne L_update_next_in
+
+
+
+ mov ebx,ebp
+
+L_update_next_in:
+; 1266 "inffast.S"
+ mov eax, [esp+88]
+ mov ecx,ebx
+ mov edx, [eax+28]
+ shr ecx,3
+ sub esi,ecx
+ shl ecx,3
+ sub ebx,ecx
+ mov [eax+12],edi
+ mov [edx+bits_state],ebx
+ mov ecx,ebx
+
+ lea ebx, [esp+28]
+ cmp [esp+20],ebx
+ jne L_buf_not_used
+
+ sub esi,ebx
+ mov ebx, [eax+0]
+ mov [esp+20],ebx
+ add esi,ebx
+ mov ebx, [eax+4]
+ sub ebx,11
+ add [esp+20],ebx
+
+L_buf_not_used:
+ mov [eax+0],esi
+
+ mov ebx,1
+ shl ebx,cl
+ dec ebx
+
+
+
+
+
+ cmp dword ptr [inflate_fast_use_mmx],2
+ jne L_update_hold
+
+
+
+ psrlq mm0,mm1
+ movd ebp,mm0
+
+ emms
+
+L_update_hold:
+
+
+
+ and ebp,ebx
+ mov [edx+hold_state],ebp
+
+
+
+
+ mov ebx, [esp+20]
+ cmp ebx,esi
+ jbe L_last_is_smaller
+
+ sub ebx,esi
+ add ebx,11
+ mov [eax+4],ebx
+ jmp L_fixup_out
+L_last_is_smaller:
+ sub esi,ebx
+ neg esi
+ add esi,11
+ mov [eax+4],esi
+
+
+
+
+L_fixup_out:
+
+ mov ebx, [esp+16]
+ cmp ebx,edi
+ jbe L_end_is_smaller
+
+ sub ebx,edi
+ add ebx,257
+ mov [eax+16],ebx
+ jmp L_done
+L_end_is_smaller:
+ sub edi,ebx
+ neg edi
+ add edi,257
+ mov [eax+16],edi
+
+
+
+
+
+L_done:
+ add esp,64
+ popfd
+ pop ebx
+ pop ebp
+ pop esi
+ pop edi
+ ret
+
+_TEXT ends
+end
diff --git a/zlib/contrib/masmx86/mkasm.bat b/zlib/contrib/masmx86/mkasm.bat index f3fa0a00f39..70a51f83777 100755 --- a/zlib/contrib/masmx86/mkasm.bat +++ b/zlib/contrib/masmx86/mkasm.bat @@ -1,3 +1,3 @@ -cl /I..\.. /O2 /c gvmat32c.c
+cl /DASMV /I..\.. /O2 /c gvmat32c.c
ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm
ml /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/zlib/contrib/minizip/crypt.h b/zlib/contrib/minizip/crypt.h index 9c7a89cbe82..622f4bc2ec4 100644 --- a/zlib/contrib/minizip/crypt.h +++ b/zlib/contrib/minizip/crypt.h @@ -1,9 +1,9 @@ /* crypt.h -- base code for crypt/uncrypt ZIPfile - Version 1.00, September 10th, 2003 + Version 1.01e, February 12th, 2005 - Copyright (C) 1998-2003 Gilles Vollant + Copyright (C) 1998-2005 Gilles Vollant This code is a modified version of crypting code in Infozip distribution diff --git a/zlib/contrib/minizip/ioapi.c b/zlib/contrib/minizip/ioapi.c index 80443b761f3..f1bee23e64b 100644 --- a/zlib/contrib/minizip/ioapi.c +++ b/zlib/contrib/minizip/ioapi.c @@ -1,9 +1,9 @@ /* ioapi.c -- IO base function header for compress/uncompress .zip files using zlib + zip or unzip API - Version 1.00, September 10th, 2003 + Version 1.01e, February 12th, 2005 - Copyright (C) 1998-2003 Gilles Vollant + Copyright (C) 1998-2005 Gilles Vollant */ #include <stdio.h> @@ -94,7 +94,7 @@ uLong ZCALLBACK fread_file_func (opaque, stream, buf, size) uLong size; { uLong ret; - ret = fread(buf, 1, (size_t)size, (FILE *)stream); + ret = (uLong)fread(buf, 1, (size_t)size, (FILE *)stream); return ret; } @@ -106,7 +106,7 @@ uLong ZCALLBACK fwrite_file_func (opaque, stream, buf, size) uLong size; { uLong ret; - ret = fwrite(buf, 1, (size_t)size, (FILE *)stream); + ret = (uLong)fwrite(buf, 1, (size_t)size, (FILE *)stream); return ret; } diff --git a/zlib/contrib/minizip/ioapi.h b/zlib/contrib/minizip/ioapi.h index 6bc2a2cc872..7d457baab34 100644 --- a/zlib/contrib/minizip/ioapi.h +++ b/zlib/contrib/minizip/ioapi.h @@ -1,9 +1,9 @@ /* ioapi.h -- IO base function header for compress/uncompress .zip files using zlib + zip or unzip API - Version 1.00, September 10th, 2003 + Version 1.01e, February 12th, 2005 - Copyright (C) 1998-2003 Gilles Vollant + Copyright (C) 1998-2005 Gilles Vollant */ #ifndef _ZLIBIOAPI_H diff --git a/zlib/contrib/minizip/iowin32.c b/zlib/contrib/minizip/iowin32.c index 02b27cb7614..a9b5f783996 100644 --- a/zlib/contrib/minizip/iowin32.c +++ b/zlib/contrib/minizip/iowin32.c @@ -2,9 +2,9 @@ files using zlib + zip or unzip API This IO API version uses the Win32 API (for Microsoft Windows) - Version 1.00, September 10th, 2003 + Version 1.01e, February 12th, 2005 - Copyright (C) 1998-2003 Gilles Vollant + Copyright (C) 1998-2005 Gilles Vollant */ #include <stdlib.h> diff --git a/zlib/contrib/minizip/iowin32.h b/zlib/contrib/minizip/iowin32.h index c0ebd50738e..a3a437adf84 100644 --- a/zlib/contrib/minizip/iowin32.h +++ b/zlib/contrib/minizip/iowin32.h @@ -2,9 +2,9 @@ files using zlib + zip or unzip API This IO API version uses the Win32 API (for Microsoft Windows) - Version 1.00, September 10th, 2003 + Version 1.01e, February 12th, 2005 - Copyright (C) 1998-2003 Gilles Vollant + Copyright (C) 1998-2005 Gilles Vollant */ #include <windows.h> diff --git a/zlib/contrib/pascal/zlibpas.pas b/zlib/contrib/pascal/zlibpas.pas index 6d5ebe0f30e..836848c2b7b 100644 --- a/zlib/contrib/pascal/zlibpas.pas +++ b/zlib/contrib/pascal/zlibpas.pas @@ -10,7 +10,7 @@ unit zlibpas; interface const - ZLIB_VERSION = '1.2.1'; + ZLIB_VERSION = '1.2.3'; type alloc_func = function(opaque: Pointer; items, size: Integer): Pointer; diff --git a/zlib/contrib/puff/puff.c b/zlib/contrib/puff/puff.c index 7a8116b4480..ce0cc405e38 100644 --- a/zlib/contrib/puff/puff.c +++ b/zlib/contrib/puff/puff.c @@ -1,8 +1,8 @@ /* * puff.c - * Copyright (C) 2002, 2003 Mark Adler + * Copyright (C) 2002-2004 Mark Adler * For conditions of distribution and use, see copyright notice in puff.h - * version 1.7, 3 Mar 2003 + * version 1.8, 9 Jan 2004 * * puff.c is a simple inflate written to be an unambiguous way to specify the * deflate format. It is not written for speed but rather simplicity. As a @@ -60,6 +60,7 @@ * 1.6 7 Aug 2002 - Minor format changes * 1.7 3 Mar 2003 - Added test code for distribution * - Added zlib-like license + * 1.8 9 Jan 2004 - Added some comments on no distance codes case */ #include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */ @@ -577,6 +578,9 @@ local int fixed(struct state *s) * block is fewer bits), but it is allowed by the format. So incomplete * literal/length codes of one symbol should also be permitted. * + * - If there are only literal codes and no lengths, then there are no distance + * codes. This is represented by one distance code with zero bits. + * * - The list of up to 286 length/literal lengths and up to 30 distance lengths * are themselves compressed using Huffman codes and run-length encoding. In * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means diff --git a/zlib/contrib/testzlib/testzlib.c b/zlib/contrib/testzlib/testzlib.c index fdabc5c192f..e5574f45e7f 100644 --- a/zlib/contrib/testzlib/testzlib.c +++ b/zlib/contrib/testzlib/testzlib.c @@ -1,149 +1,275 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <windows.h> -#include "zlib.h" - -int ReadFileMemory(const char* filename,long* plFileSize,void** pFilePtr) -{ - FILE* stream; - void* ptr; - int retVal=1; - stream=fopen(filename, "rb"); - if (stream==NULL) - return 0; - - fseek(stream,0,SEEK_END); - - *plFileSize=ftell(stream); - fseek(stream,0,SEEK_SET); - ptr=malloc((*plFileSize)+1); - if (ptr==NULL) - retVal=0; - else - { - if (fread(ptr, 1, *plFileSize,stream) != (*plFileSize)) - retVal=0; - } - fclose(stream); - *pFilePtr=ptr; - return retVal; -} - -int main(int argc, char *argv[]) -{ - int BlockSizeCompress=0x8000; - int BlockSizeUncompress=0x8000; - int cprLevel=Z_DEFAULT_COMPRESSION ; - long lFileSize; - unsigned char* FilePtr; - long lBufferSizeCpr; - long lBufferSizeUncpr; - long lCompressedSize=0; - unsigned char* CprPtr; - unsigned char* UncprPtr; - long lSizeCpr,lSizeUncpr; - DWORD dwGetTick; - - if (argc<=1) - { - printf("run TestZlib <File> [BlockSizeCompress] [BlockSizeUncompress] [compres. level]\n"); - return 0; - } - - if (ReadFileMemory(argv[1],&lFileSize,&FilePtr)==0) - { - printf("error reading %s\n",argv[1]); - return 1; - } - else printf("file %s read, %u bytes\n",argv[1],lFileSize); - - if (argc>=3) - BlockSizeCompress=atol(argv[2]); - - if (argc>=4) - BlockSizeUncompress=atol(argv[3]); - - if (argc>=5) - cprLevel=(int)atol(argv[4]); - - lBufferSizeCpr = lFileSize + (lFileSize/0x10) + 0x200; - lBufferSizeUncpr = lBufferSizeCpr; - - CprPtr=(unsigned char*)malloc(lBufferSizeCpr + BlockSizeCompress); - UncprPtr=(unsigned char*)malloc(lBufferSizeUncpr + BlockSizeUncompress); - - dwGetTick=GetTickCount(); - { - z_stream zcpr; - int ret=Z_OK; - long lOrigToDo = lFileSize; - long lOrigDone = 0; - int step=0; - memset(&zcpr,0,sizeof(z_stream)); - deflateInit(&zcpr,cprLevel); - - zcpr.next_in = FilePtr; - zcpr.next_out = CprPtr; - - - do - { - long all_read_before = zcpr.total_in; - zcpr.avail_in = min(lOrigToDo,BlockSizeCompress); - zcpr.avail_out = BlockSizeCompress; - ret=deflate(&zcpr,(zcpr.avail_in==lOrigToDo) ? Z_FINISH : Z_SYNC_FLUSH); - lOrigDone += (zcpr.total_in-all_read_before); - lOrigToDo -= (zcpr.total_in-all_read_before); - step++; - } while (ret==Z_OK); - - lSizeCpr=zcpr.total_out; - deflateEnd(&zcpr); - dwGetTick=GetTickCount()-dwGetTick; - printf("total compress size = %u, in %u step\n",lSizeCpr,step); - printf("time = %u msec = %f sec\n\n",dwGetTick,dwGetTick/(double)1000.); - } - - dwGetTick=GetTickCount(); - { - z_stream zcpr; - int ret=Z_OK; - long lOrigToDo = lSizeCpr; - long lOrigDone = 0; - int step=0; - memset(&zcpr,0,sizeof(z_stream)); - inflateInit(&zcpr); - - zcpr.next_in = CprPtr; - zcpr.next_out = UncprPtr; - - - do - { - long all_read_before = zcpr.total_in; - zcpr.avail_in = min(lOrigToDo,BlockSizeUncompress); - zcpr.avail_out = BlockSizeUncompress; - ret=inflate(&zcpr,Z_SYNC_FLUSH); - lOrigDone += (zcpr.total_in-all_read_before); - lOrigToDo -= (zcpr.total_in-all_read_before); - step++; - } while (ret==Z_OK); - - lSizeUncpr=zcpr.total_out; - inflateEnd(&zcpr); - dwGetTick=GetTickCount()-dwGetTick; - printf("total uncompress size = %u, in %u step\n",lSizeUncpr,step); - printf("time = %u msec = %f sec\n\n",dwGetTick,dwGetTick/(double)1000.); - } - - if (lSizeUncpr==lFileSize) - { - if (memcmp(FilePtr,UncprPtr,lFileSize)==0) - printf("compare ok\n"); - - } - - return 0; - -} +#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+
+#include "zlib.h"
+
+
+void MyDoMinus64(LARGE_INTEGER *R,LARGE_INTEGER A,LARGE_INTEGER B)
+{
+ R->HighPart = A.HighPart - B.HighPart;
+ if (A.LowPart >= B.LowPart)
+ R->LowPart = A.LowPart - B.LowPart;
+ else
+ {
+ R->LowPart = A.LowPart - B.LowPart;
+ R->HighPart --;
+ }
+}
+
+#ifdef _M_X64
+// see http://msdn2.microsoft.com/library/twchhe95(en-us,vs.80).aspx for __rdtsc
+unsigned __int64 __rdtsc(void);
+void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64)
+{
+ // printf("rdtsc = %I64x\n",__rdtsc());
+ pbeginTime64->QuadPart=__rdtsc();
+}
+
+LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf)
+{
+ LARGE_INTEGER LIres;
+ unsigned _int64 res=__rdtsc()-((unsigned _int64)(beginTime64.QuadPart));
+ LIres.QuadPart=res;
+ // printf("rdtsc = %I64x\n",__rdtsc());
+ return LIres;
+}
+#else
+#ifdef _M_IX86
+void myGetRDTSC32(LARGE_INTEGER * pbeginTime64)
+{
+ DWORD dwEdx,dwEax;
+ _asm
+ {
+ rdtsc
+ mov dwEax,eax
+ mov dwEdx,edx
+ }
+ pbeginTime64->LowPart=dwEax;
+ pbeginTime64->HighPart=dwEdx;
+}
+
+void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64)
+{
+ myGetRDTSC32(pbeginTime64);
+}
+
+LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf)
+{
+ LARGE_INTEGER LIres,endTime64;
+ myGetRDTSC32(&endTime64);
+
+ LIres.LowPart=LIres.HighPart=0;
+ MyDoMinus64(&LIres,endTime64,beginTime64);
+ return LIres;
+}
+#else
+void myGetRDTSC32(LARGE_INTEGER * pbeginTime64)
+{
+}
+
+void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64)
+{
+}
+
+LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf)
+{
+ LARGE_INTEGER lr;
+ lr.QuadPart=0;
+ return lr;
+}
+#endif
+#endif
+
+void BeginCountPerfCounter(LARGE_INTEGER * pbeginTime64,BOOL fComputeTimeQueryPerf)
+{
+ if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(pbeginTime64)))
+ {
+ pbeginTime64->LowPart = GetTickCount();
+ pbeginTime64->HighPart = 0;
+ }
+}
+
+DWORD GetMsecSincePerfCounter(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf)
+{
+ LARGE_INTEGER endTime64,ticksPerSecond,ticks;
+ DWORDLONG ticksShifted,tickSecShifted;
+ DWORD dwLog=16+0;
+ DWORD dwRet;
+ if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(&endTime64)))
+ dwRet = (GetTickCount() - beginTime64.LowPart)*1;
+ else
+ {
+ MyDoMinus64(&ticks,endTime64,beginTime64);
+ QueryPerformanceFrequency(&ticksPerSecond);
+
+
+ {
+ ticksShifted = Int64ShrlMod32(*(DWORDLONG*)&ticks,dwLog);
+ tickSecShifted = Int64ShrlMod32(*(DWORDLONG*)&ticksPerSecond,dwLog);
+
+ }
+
+ dwRet = (DWORD)((((DWORD)ticksShifted)*1000)/(DWORD)(tickSecShifted));
+ dwRet *=1;
+ }
+ return dwRet;
+}
+
+int ReadFileMemory(const char* filename,long* plFileSize,void** pFilePtr)
+{
+ FILE* stream;
+ void* ptr;
+ int retVal=1;
+ stream=fopen(filename, "rb");
+ if (stream==NULL)
+ return 0;
+
+ fseek(stream,0,SEEK_END);
+
+ *plFileSize=ftell(stream);
+ fseek(stream,0,SEEK_SET);
+ ptr=malloc((*plFileSize)+1);
+ if (ptr==NULL)
+ retVal=0;
+ else
+ {
+ if (fread(ptr, 1, *plFileSize,stream) != (*plFileSize))
+ retVal=0;
+ }
+ fclose(stream);
+ *pFilePtr=ptr;
+ return retVal;
+}
+
+int main(int argc, char *argv[])
+{
+ int BlockSizeCompress=0x8000;
+ int BlockSizeUncompress=0x8000;
+ int cprLevel=Z_DEFAULT_COMPRESSION ;
+ long lFileSize;
+ unsigned char* FilePtr;
+ long lBufferSizeCpr;
+ long lBufferSizeUncpr;
+ long lCompressedSize=0;
+ unsigned char* CprPtr;
+ unsigned char* UncprPtr;
+ long lSizeCpr,lSizeUncpr;
+ DWORD dwGetTick,dwMsecQP;
+ LARGE_INTEGER li_qp,li_rdtsc,dwResRdtsc;
+
+ if (argc<=1)
+ {
+ printf("run TestZlib <File> [BlockSizeCompress] [BlockSizeUncompress] [compres. level]\n");
+ return 0;
+ }
+
+ if (ReadFileMemory(argv[1],&lFileSize,&FilePtr)==0)
+ {
+ printf("error reading %s\n",argv[1]);
+ return 1;
+ }
+ else printf("file %s read, %u bytes\n",argv[1],lFileSize);
+
+ if (argc>=3)
+ BlockSizeCompress=atol(argv[2]);
+
+ if (argc>=4)
+ BlockSizeUncompress=atol(argv[3]);
+
+ if (argc>=5)
+ cprLevel=(int)atol(argv[4]);
+
+ lBufferSizeCpr = lFileSize + (lFileSize/0x10) + 0x200;
+ lBufferSizeUncpr = lBufferSizeCpr;
+
+ CprPtr=(unsigned char*)malloc(lBufferSizeCpr + BlockSizeCompress);
+
+ BeginCountPerfCounter(&li_qp,TRUE);
+ dwGetTick=GetTickCount();
+ BeginCountRdtsc(&li_rdtsc);
+ {
+ z_stream zcpr;
+ int ret=Z_OK;
+ long lOrigToDo = lFileSize;
+ long lOrigDone = 0;
+ int step=0;
+ memset(&zcpr,0,sizeof(z_stream));
+ deflateInit(&zcpr,cprLevel);
+
+ zcpr.next_in = FilePtr;
+ zcpr.next_out = CprPtr;
+
+
+ do
+ {
+ long all_read_before = zcpr.total_in;
+ zcpr.avail_in = min(lOrigToDo,BlockSizeCompress);
+ zcpr.avail_out = BlockSizeCompress;
+ ret=deflate(&zcpr,(zcpr.avail_in==lOrigToDo) ? Z_FINISH : Z_SYNC_FLUSH);
+ lOrigDone += (zcpr.total_in-all_read_before);
+ lOrigToDo -= (zcpr.total_in-all_read_before);
+ step++;
+ } while (ret==Z_OK);
+
+ lSizeCpr=zcpr.total_out;
+ deflateEnd(&zcpr);
+ dwGetTick=GetTickCount()-dwGetTick;
+ dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE);
+ dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE);
+ printf("total compress size = %u, in %u step\n",lSizeCpr,step);
+ printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.);
+ printf("defcpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.);
+ printf("defcpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart);
+ }
+
+ CprPtr=(unsigned char*)realloc(CprPtr,lSizeCpr);
+ UncprPtr=(unsigned char*)malloc(lBufferSizeUncpr + BlockSizeUncompress);
+
+ BeginCountPerfCounter(&li_qp,TRUE);
+ dwGetTick=GetTickCount();
+ BeginCountRdtsc(&li_rdtsc);
+ {
+ z_stream zcpr;
+ int ret=Z_OK;
+ long lOrigToDo = lSizeCpr;
+ long lOrigDone = 0;
+ int step=0;
+ memset(&zcpr,0,sizeof(z_stream));
+ inflateInit(&zcpr);
+
+ zcpr.next_in = CprPtr;
+ zcpr.next_out = UncprPtr;
+
+
+ do
+ {
+ long all_read_before = zcpr.total_in;
+ zcpr.avail_in = min(lOrigToDo,BlockSizeUncompress);
+ zcpr.avail_out = BlockSizeUncompress;
+ ret=inflate(&zcpr,Z_SYNC_FLUSH);
+ lOrigDone += (zcpr.total_in-all_read_before);
+ lOrigToDo -= (zcpr.total_in-all_read_before);
+ step++;
+ } while (ret==Z_OK);
+
+ lSizeUncpr=zcpr.total_out;
+ inflateEnd(&zcpr);
+ dwGetTick=GetTickCount()-dwGetTick;
+ dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE);
+ dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE);
+ printf("total uncompress size = %u, in %u step\n",lSizeUncpr,step);
+ printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.);
+ printf("uncpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.);
+ printf("uncpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart);
+ }
+
+ if (lSizeUncpr==lFileSize)
+ {
+ if (memcmp(FilePtr,UncprPtr,lFileSize)==0)
+ printf("compare ok\n");
+
+ }
+
+ return 0;
+}
diff --git a/zlib/contrib/vstudio/readme.txt b/zlib/contrib/vstudio/readme.txt index 3a4b85c8ef4..16159f9cd0d 100644 --- a/zlib/contrib/vstudio/readme.txt +++ b/zlib/contrib/vstudio/readme.txt @@ -1,8 +1,8 @@ -Building instructions for the DLL versions of Zlib 1.21
-=======================================================
+Building instructions for the DLL versions of Zlib 1.2.3
+========================================================
This directory contains projects that build zlib and minizip using
-Microsoft Visual C++ 7.0/7.1.
+Microsoft Visual C++ 7.0/7.1, and Visual C++ .
You don't need to build these projects yourself. You can download the
binaries from:
@@ -11,18 +11,36 @@ binaries from: More information can be found at this site.
-Build instructions
-------------------
-- Unzip zlib*.zip and copy the files from contrib\vstudio\vc7,
- from contrib\vstudio\masmx86 and from contrib\minizip into the same
- directory.
+Build instructions for Visual Studio 7.x (32 bits)
+--------------------------------------------------
+- Uncompress current zlib, including all contrib/* files
- Download the crtdll library from
http://www.winimage.com/zLibDll/crtdll.zip
- Unzip crtdll.zip to extract crtdll.lib.
-- If you are using x86, use the Release target.
-- Open zlibvc.sln with Microsoft Visual C++ 7.0 or 7.1
+ Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc7.
+- Open contrib\vstudio\vc7\zlibvc.sln with Microsoft Visual C++ 7.x
(Visual Studio .Net 2002 or 2003).
+Build instructions for Visual Studio 2005 (32 bits or 64 bits)
+--------------------------------------------------------------
+- Uncompress current zlib, including all contrib/* files
+- For 32 bits only: download the crtdll library from
+ http://www.winimage.com/zLibDll/crtdll.zip
+ Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc8.
+- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 8.0
+
+Build instructions for Visual Studio 2005 64 bits, PSDK compiler
+----------------------------------------------------------------
+at the time of writing this text file, Visual Studio 2005 (and
+ Microsoft Visual C++ 8.0) is on the beta 2 stage.
+Using you can get the free 64 bits compiler from Platform SDK,
+ which is NOT a beta, and compile using the Visual studio 2005 IDE
+see http://www.winimage.com/misc/sdk64onvs2005/ for instruction
+
+- Uncompress current zlib, including all contrib/* files
+- start Visual Studio 2005 from a platform SDK command prompt, using
+ the /useenv switch
+- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 8.0
+
Important
---------
diff --git a/zlib/contrib/vstudio/vc7/miniunz.vcproj b/zlib/contrib/vstudio/vc7/miniunz.vcproj index 935c250e5f9..ad5117c8451 100644 --- a/zlib/contrib/vstudio/vc7/miniunz.vcproj +++ b/zlib/contrib/vstudio/vc7/miniunz.vcproj @@ -19,6 +19,7 @@ <Tool
Name="VCCLCompilerTool"
Optimization="0"
+ AdditionalIncludeDirectories="..\..\..;..\..\minizip"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
@@ -63,6 +64,7 @@ Optimization="2"
InlineFunctionExpansion="1"
OmitFramePointers="TRUE"
+ AdditionalIncludeDirectories="..\..\..;..\..\minizip"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI;NDEBUG;_CONSOLE"
StringPooling="TRUE"
RuntimeLibrary="4"
@@ -104,7 +106,7 @@ Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm">
<File
- RelativePath="miniunz.c">
+ RelativePath="..\..\minizip\miniunz.c">
</File>
</Filter>
<Filter
@@ -116,7 +118,7 @@ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
</Filter>
<File
- RelativePath="zlibwapi.lib">
+ RelativePath="ReleaseDll\zlibwapi.lib">
</File>
</Files>
<Globals>
diff --git a/zlib/contrib/vstudio/vc7/minizip.vcproj b/zlib/contrib/vstudio/vc7/minizip.vcproj index e6f910742c1..fb5b6320d50 100644 --- a/zlib/contrib/vstudio/vc7/minizip.vcproj +++ b/zlib/contrib/vstudio/vc7/minizip.vcproj @@ -19,6 +19,7 @@ <Tool
Name="VCCLCompilerTool"
Optimization="0"
+ AdditionalIncludeDirectories="..\..\..;..\..\minizip"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI;_DEBUG;_CONSOLE"
MinimalRebuild="TRUE"
BasicRuntimeChecks="3"
@@ -63,6 +64,7 @@ Optimization="2"
InlineFunctionExpansion="1"
OmitFramePointers="TRUE"
+ AdditionalIncludeDirectories="..\..\..;..\..\minizip"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI;NDEBUG;_CONSOLE"
StringPooling="TRUE"
RuntimeLibrary="4"
@@ -104,7 +106,7 @@ Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm">
<File
- RelativePath="minizip.c">
+ RelativePath="..\..\minizip\minizip.c">
</File>
</Filter>
<Filter
@@ -116,7 +118,7 @@ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
</Filter>
<File
- RelativePath="zlibwapi.lib">
+ RelativePath="ReleaseDll\zlibwapi.lib">
</File>
</Files>
<Globals>
diff --git a/zlib/contrib/vstudio/vc7/zlib.rc b/zlib/contrib/vstudio/vc7/zlib.rc index 6c51679853b..72cb8b4c31e 100644 --- a/zlib/contrib/vstudio/vc7/zlib.rc +++ b/zlib/contrib/vstudio/vc7/zlib.rc @@ -2,8 +2,8 @@ #define IDR_VERSION1 1
IDR_VERSION1 VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE
- FILEVERSION 1,2,1,0
- PRODUCTVERSION 1,2,1,0
+ FILEVERSION 1,2,3,0
+ PRODUCTVERSION 1,2,3,0
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
FILEFLAGS 0
FILEOS VOS_DOS_WINDOWS32
@@ -17,7 +17,7 @@ BEGIN BEGIN
VALUE "FileDescription", "zlib data compression library\0"
- VALUE "FileVersion", "1.2.1.0\0"
+ VALUE "FileVersion", "1.2.3.0\0"
VALUE "InternalName", "zlib\0"
VALUE "OriginalFilename", "zlib.dll\0"
VALUE "ProductName", "ZLib.DLL\0"
diff --git a/zlib/contrib/vstudio/vc7/zlibstat.vcproj b/zlib/contrib/vstudio/vc7/zlibstat.vcproj index eb182f7a884..766d7a4d6e7 100644 --- a/zlib/contrib/vstudio/vc7/zlibstat.vcproj +++ b/zlib/contrib/vstudio/vc7/zlibstat.vcproj @@ -20,6 +20,7 @@ <Tool
Name="VCCLCompilerTool"
Optimization="0"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI"
ExceptionHandling="FALSE"
RuntimeLibrary="5"
@@ -61,6 +62,7 @@ <Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -102,6 +104,7 @@ <Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI;ASMV;ASMINF"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -117,7 +120,7 @@ Name="VCCustomBuildTool"/>
<Tool
Name="VCLibrarianTool"
- AdditionalOptions="gvmat32.obj inffas32.obj /NODEFAULTLIB "
+ AdditionalOptions="..\..\masmx86\gvmat32.obj ..\..\masmx86\inffas32.obj /NODEFAULTLIB "
OutputFile=".\zlibstat\zlibstat.lib"
SuppressStartupBanner="TRUE"/>
<Tool
@@ -144,6 +147,7 @@ <Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32;ZLIB_WINAPI"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -182,49 +186,49 @@ Name="Source Files"
Filter="">
<File
- RelativePath=".\adler32.c">
+ RelativePath="..\..\..\adler32.c">
</File>
<File
- RelativePath=".\compress.c">
+ RelativePath="..\..\..\compress.c">
</File>
<File
- RelativePath=".\crc32.c">
+ RelativePath="..\..\..\crc32.c">
</File>
<File
- RelativePath=".\deflate.c">
+ RelativePath="..\..\..\deflate.c">
</File>
<File
- RelativePath=".\gvmat32c.c">
+ RelativePath="..\..\masmx86\gvmat32c.c">
</File>
<File
- RelativePath=".\gzio.c">
+ RelativePath="..\..\..\gzio.c">
</File>
<File
- RelativePath=".\infback.c">
+ RelativePath="..\..\..\infback.c">
</File>
<File
- RelativePath=".\inffast.c">
+ RelativePath="..\..\..\inffast.c">
</File>
<File
- RelativePath=".\inflate.c">
+ RelativePath="..\..\..\inflate.c">
</File>
<File
- RelativePath=".\inftrees.c">
+ RelativePath="..\..\..\inftrees.c">
</File>
<File
- RelativePath=".\ioapi.c">
+ RelativePath="..\..\minizip\ioapi.c">
</File>
<File
- RelativePath=".\trees.c">
+ RelativePath="..\..\..\trees.c">
</File>
<File
- RelativePath=".\uncompr.c">
+ RelativePath="..\..\..\uncompr.c">
</File>
<File
- RelativePath=".\unzip.c">
+ RelativePath="..\..\minizip\unzip.c">
</File>
<File
- RelativePath=".\zip.c">
+ RelativePath="..\..\minizip\zip.c">
</File>
<File
RelativePath=".\zlib.rc">
@@ -233,7 +237,7 @@ RelativePath=".\zlibvc.def">
</File>
<File
- RelativePath=".\zutil.c">
+ RelativePath="..\..\..\zutil.c">
</File>
</Filter>
</Files>
diff --git a/zlib/contrib/vstudio/vc7/zlibvc.sln b/zlib/contrib/vstudio/vc7/zlibvc.sln index 5a007ff697b..927b42b7bde 100644 --- a/zlib/contrib/vstudio/vc7/zlibvc.sln +++ b/zlib/contrib/vstudio/vc7/zlibvc.sln @@ -7,6 +7,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "minizip", "minizip.vcproj", EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "miniunz", "miniunz.vcproj", "{C52F9E7B-498A-42BE-8DB4-85A15694382A}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testZlibDll", "testzlib.vcproj", "{AA6666AA-E09F-4135-9C0C-4FE50C3C654C}"
+EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
ConfigName.0 = Debug
@@ -58,6 +60,16 @@ Global {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm.Build.0 = Release|Win32
{C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutCrtdll.ActiveCfg = Release|Win32
{C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutCrtdll.Build.0 = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Debug.ActiveCfg = Debug|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Debug.Build.0 = Debug|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Release.ActiveCfg = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.Release.Build.0 = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseAxp.ActiveCfg = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseAxp.Build.0 = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutAsm.ActiveCfg = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutAsm.Build.0 = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutCrtdll.ActiveCfg = Release|Win32
+ {AA6666AA-E09F-4135-9C0C-4FE50C3C654C}.ReleaseWithoutCrtdll.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
diff --git a/zlib/contrib/vstudio/vc7/zlibvc.vcproj b/zlib/contrib/vstudio/vc7/zlibvc.vcproj index 4e57bcdff6f..8533b49475e 100644 --- a/zlib/contrib/vstudio/vc7/zlibvc.vcproj +++ b/zlib/contrib/vstudio/vc7/zlibvc.vcproj @@ -20,6 +20,7 @@ <Tool
Name="VCCLCompilerTool"
Optimization="0"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32,ZLIB_WINAPI,ASMV,ASMINF"
ExceptionHandling="FALSE"
RuntimeLibrary="1"
@@ -35,7 +36,7 @@ <Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
- AdditionalDependencies="gvmat32.obj inffas32.obj"
+ AdditionalDependencies="..\..\masmx86\gvmat32.obj ..\..\masmx86\inffas32.obj"
OutputFile=".\DebugDll\zlibwapi.dll"
LinkIncremental="2"
SuppressStartupBanner="TRUE"
@@ -72,10 +73,12 @@ IntermediateDirectory=".\zlibDllWithoutAsm"
ConfigurationType="2"
UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="FALSE">
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32,ZLIB_WINAPI"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -134,10 +137,12 @@ IntermediateDirectory=".\zlibDllWithoutCrtDll"
ConfigurationType="2"
UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="FALSE">
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32,ZLIB_WINAPI,ASMV,ASMINF"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -156,7 +161,7 @@ <Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
- AdditionalDependencies="gvmat32.obj inffas32.obj "
+ AdditionalDependencies="..\..\masmx86\gvmat32.obj ..\..\masmx86\inffas32.obj "
OutputFile=".\zlibDllWithoutCrtDll\zlibwapi.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
@@ -196,10 +201,12 @@ IntermediateDirectory=".\zlibvc__"
ConfigurationType="2"
UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="FALSE">
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32,ZLIB_WINAPI"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -256,10 +263,12 @@ IntermediateDirectory=".\ReleaseDll"
ConfigurationType="2"
UseOfMFC="0"
- ATLMinimizesCRunTimeLibraryUsage="FALSE">
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ WholeProgramOptimization="TRUE">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="..\..\..;..\..\masmx86"
PreprocessorDefinitions="WIN32,ZLIB_WINAPI,ASMV,ASMINF"
StringPooling="TRUE"
ExceptionHandling="FALSE"
@@ -278,7 +287,7 @@ <Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
- AdditionalDependencies="gvmat32.obj inffas32.obj crtdll.lib"
+ AdditionalDependencies="..\..\masmx86\gvmat32.obj ..\..\masmx86\inffas32.obj crtdll.lib"
OutputFile=".\ReleaseDll\zlibwapi.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
@@ -318,19 +327,19 @@ Name="Source Files"
Filter="cpp;c;cxx;rc;def;r;odl;hpj;bat;for;f90">
<File
- RelativePath=".\adler32.c">
+ RelativePath="..\..\..\adler32.c">
</File>
<File
- RelativePath=".\compress.c">
+ RelativePath="..\..\..\compress.c">
</File>
<File
- RelativePath=".\crc32.c">
+ RelativePath="..\..\..\crc32.c">
</File>
<File
- RelativePath=".\deflate.c">
+ RelativePath="..\..\..\deflate.c">
</File>
<File
- RelativePath=".\gvmat32c.c">
+ RelativePath="..\..\masmx86\gvmat32c.c">
<FileConfiguration
Name="ReleaseWithoutAsm|Win32"
ExcludedFromBuild="TRUE">
@@ -339,34 +348,34 @@ </FileConfiguration>
</File>
<File
- RelativePath=".\gzio.c">
+ RelativePath="..\..\..\gzio.c">
</File>
<File
- RelativePath=".\infback.c">
+ RelativePath="..\..\..\infback.c">
</File>
<File
- RelativePath=".\inffast.c">
+ RelativePath="..\..\..\inffast.c">
</File>
<File
- RelativePath=".\inflate.c">
+ RelativePath="..\..\..\inflate.c">
</File>
<File
- RelativePath=".\inftrees.c">
+ RelativePath="..\..\..\inftrees.c">
</File>
<File
- RelativePath=".\ioapi.c">
+ RelativePath="..\..\minizip\ioapi.c">
</File>
<File
- RelativePath=".\iowin32.c">
+ RelativePath="..\..\minizip\iowin32.c">
</File>
<File
- RelativePath=".\trees.c">
+ RelativePath="..\..\..\trees.c">
</File>
<File
- RelativePath=".\uncompr.c">
+ RelativePath="..\..\..\uncompr.c">
</File>
<File
- RelativePath=".\unzip.c">
+ RelativePath="..\..\minizip\unzip.c">
<FileConfiguration
Name="Release|Win32">
<Tool
@@ -376,7 +385,7 @@ </FileConfiguration>
</File>
<File
- RelativePath=".\zip.c">
+ RelativePath="..\..\minizip\zip.c">
<FileConfiguration
Name="Release|Win32">
<Tool
@@ -392,38 +401,38 @@ RelativePath=".\zlibvc.def">
</File>
<File
- RelativePath=".\zutil.c">
+ RelativePath="..\..\..\zutil.c">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;fi;fd">
<File
- RelativePath=".\deflate.h">
+ RelativePath="..\..\..\deflate.h">
</File>
<File
- RelativePath=".\infblock.h">
+ RelativePath="..\..\..\infblock.h">
</File>
<File
- RelativePath=".\infcodes.h">
+ RelativePath="..\..\..\infcodes.h">
</File>
<File
- RelativePath=".\inffast.h">
+ RelativePath="..\..\..\inffast.h">
</File>
<File
- RelativePath=".\inftrees.h">
+ RelativePath="..\..\..\inftrees.h">
</File>
<File
- RelativePath=".\infutil.h">
+ RelativePath="..\..\..\infutil.h">
</File>
<File
- RelativePath=".\zconf.h">
+ RelativePath="..\..\..\zconf.h">
</File>
<File
- RelativePath=".\zlib.h">
+ RelativePath="..\..\..\zlib.h">
</File>
<File
- RelativePath=".\zutil.h">
+ RelativePath="..\..\..\zutil.h">
</File>
</Filter>
<Filter
diff --git a/zlib/old/Makefile.riscos b/zlib/old/Makefile.riscos index d97f4492370..57e29d3fba9 100644 --- a/zlib/old/Makefile.riscos +++ b/zlib/old/Makefile.riscos @@ -3,12 +3,12 @@ # test works out-of-the-box, installs `somewhere' on demand # Toolflags: -CCflags = -c -depend !Depend -IC: -g -throwback -DRISCOS -fah +CCflags = -c -depend !Depend -IC: -g -throwback -DRISCOS -fah C++flags = -c -depend !Depend -IC: -throwback -Linkflags = -aif -c++ -o $@ +Linkflags = -aif -c++ -o $@ ObjAsmflags = -throwback -NoCache -depend !Depend -CMHGflags = -LibFileflags = -c -l -o $@ +CMHGflags = +LibFileflags = -c -l -o $@ Squeezeflags = -o $@ # change the line below to where _you_ want the library installed. @@ -17,10 +17,10 @@ libdest = lib:zlib # Final targets: @.lib: @.o.adler32 @.o.compress @.o.crc32 @.o.deflate @.o.gzio \ @.o.infblock @.o.infcodes @.o.inffast @.o.inflate @.o.inftrees @.o.infutil @.o.trees \ - @.o.uncompr @.o.zutil + @.o.uncompr @.o.zutil LibFile $(LibFileflags) @.o.adler32 @.o.compress @.o.crc32 @.o.deflate \ @.o.gzio @.o.infblock @.o.infcodes @.o.inffast @.o.inflate @.o.inftrees @.o.infutil \ - @.o.trees @.o.uncompr @.o.zutil + @.o.trees @.o.uncompr @.o.zutil test: @.minigzip @.example @.lib @copy @.lib @.libc A~C~DF~L~N~P~Q~RS~TV @echo running tests: hang on. @@ -41,9 +41,9 @@ test: @.minigzip @.example @.lib @/@.example @.fred @.fred @echo that will have given lots of hello!'s. -@.minigzip: @.o.minigzip @.lib C:o.Stubs - Link $(Linkflags) @.o.minigzip @.lib C:o.Stubs -@.example: @.o.example @.lib C:o.Stubs +@.minigzip: @.o.minigzip @.lib C:o.Stubs + Link $(Linkflags) @.o.minigzip @.lib C:o.Stubs +@.example: @.o.example @.lib C:o.Stubs Link $(Linkflags) @.o.example @.lib C:o.Stubs install: @.lib diff --git a/zlib/old/README b/zlib/old/README index 06ebe3d13f8..800bf079827 100644 --- a/zlib/old/README +++ b/zlib/old/README @@ -1,3 +1,3 @@ -This directory contains files that have not been updated for zlib 1.2.1 +This directory contains files that have not been updated for zlib 1.2.x (Volunteers are encouraged to help clean this up. Thanks.) diff --git a/zlib/qnx/package.qpg b/zlib/qnx/package.qpg index 2b7d95198e7..8a4a47c723a 100644 --- a/zlib/qnx/package.qpg +++ b/zlib/qnx/package.qpg @@ -25,10 +25,10 @@ <QPG:Files> <QPG:Add file="../zconf.h" install="/opt/include/" user="root:sys" permission="644"/> <QPG:Add file="../zlib.h" install="/opt/include/" user="root:sys" permission="644"/> - <QPG:Add file="../libz.so.1.2.1" install="/opt/lib/" user="root:bin" permission="644"/> - <QPG:Add file="libz.so" install="/opt/lib/" component="dev" filetype="symlink" linkto="libz.so.1.2.1"/> - <QPG:Add file="libz.so.1" install="/opt/lib/" filetype="symlink" linkto="libz.so.1.2.1"/> - <QPG:Add file="../libz.so.1.2.1" install="/opt/lib/" component="slib"/> + <QPG:Add file="../libz.so.1.2.3" install="/opt/lib/" user="root:bin" permission="644"/> + <QPG:Add file="libz.so" install="/opt/lib/" component="dev" filetype="symlink" linkto="libz.so.1.2.3"/> + <QPG:Add file="libz.so.1" install="/opt/lib/" filetype="symlink" linkto="libz.so.1.2.3"/> + <QPG:Add file="../libz.so.1.2.3" install="/opt/lib/" component="slib"/> </QPG:Files> <QPG:PackageFilter> @@ -63,7 +63,7 @@ </QPM:ProductDescription> <QPM:ReleaseDescription> - <QPM:ReleaseVersion>1.2.1</QPM:ReleaseVersion> + <QPM:ReleaseVersion>1.2.3</QPM:ReleaseVersion> <QPM:ReleaseUrgency>Medium</QPM:ReleaseUrgency> <QPM:ReleaseStability>Stable</QPM:ReleaseStability> <QPM:ReleaseNoteMinor></QPM:ReleaseNoteMinor> @@ -105,7 +105,7 @@ </QPM:Script> </QPM:ProductInstallationProcedure> </QPM:PackageManifest> - + <QPM:Launch> </QPM:Launch> </QPG:PackageFilter> @@ -119,7 +119,7 @@ </QPM:OrderDependency> </QPM:ProductInstallationProcedure> </QPM:PackageManifest> - + <QPM:Launch> </QPM:Launch> </QPG:PackageFilter> @@ -133,7 +133,7 @@ </QPM:OrderDependency> </QPM:ProductInstallationProcedure> </QPM:PackageManifest> - + <QPM:Launch> </QPM:Launch> </QPG:PackageFilter> diff --git a/zlib/win32/DLL_FAQ.txt b/zlib/win32/DLL_FAQ.txt index 80b6b42568b..fb18e071189 100644 --- a/zlib/win32/DLL_FAQ.txt +++ b/zlib/win32/DLL_FAQ.txt @@ -12,7 +12,7 @@ in the zlib distribution, or at the following location: 1. What is ZLIB1.DLL, and how can I get it? - ZLIB1.DLL is the official build of zlib as a DLL. - (Please remark the symbol '1' in the name.) + (Please remark the character '1' in the name.) Pointers to a precompiled ZLIB1.DLL can be found in the zlib web site at: @@ -37,20 +37,19 @@ in the zlib distribution, or at the following location: and build settings. If you do build the DLL yourself, please make sure that it complies with all the above requirements, and it runs with the precompiled test programs, bundled with - the original ZLIB1.DLL distribution and available at the zlib - web site. + the original ZLIB1.DLL distribution. If, for any reason, you need to build an incompatible DLL, - please use a different name. + please use a different file name. 2. Why did you change the name of the DLL to ZLIB1.DLL? What happened to the old ZLIB.DLL? - - The old ZLIB.DLL, built from zlib-1.1.x and earlier, required - compilation settings that were incompatible to those used by a - static build. The DLL settings were supposed to be enabled by - defining the macro ZLIB_DLL, before including "zlib.h". + - The old ZLIB.DLL, built from zlib-1.1.4 or earlier, required + compilation settings that were incompatible to those used by + a static build. The DLL settings were supposed to be enabled + by defining the macro ZLIB_DLL, before including "zlib.h". Incorrect handling of this macro was silently accepted at build time, resulting in two major problems: @@ -65,8 +64,8 @@ in the zlib distribution, or at the following location: functions. Failure to do so resulted in creating binaries that were unable to run with the official ZLIB.DLL build. - The only possible solution that we could foresee was to make a - binary-incompatible change in the DLL interfacing, in order to + The only possible solution that we could foresee was to make + a binary-incompatible change in the DLL interface, in order to remove the dependency on the ZLIB_DLL macro, and to release the new DLL under a different name. @@ -85,17 +84,13 @@ in the zlib distribution, or at the following location: - In principle, you can do it by assigning calling convention keywords to the macros ZEXPORT and ZEXPORTVA. In practice, - it depends on what you mean by "an old ZLIB.DLL", because - the old DLL exists in several mutually-incompatible versions. - - If you have a compiled application that works with a certain - ZLIB.DLL without any known security issues, there is hardly - a need to rebuild the DLL from new sources only to link it to - the old app binary. But if you really want to do it, you have - to find out first what kind of calling convention uses your - particular ZLIB.DLL build, and to use the same one in the new - build. If you don't know what this is all about, you might be - better off if you would just forget it. + it depends on what you mean by "an old ZLIB.DLL", because the + old DLL exists in several mutually-incompatible versions. + You have to find out first what kind of calling convention is + being used in your particular ZLIB.DLL build, and to use the + same one in the new build. If you don't know what this is all + about, you might be better off if you would just leave the old + DLL intact. 4. Can I compile my application using the new zlib interface, and @@ -170,19 +165,19 @@ in the zlib distribution, or at the following location: the K&R-style function prototypes, where the argument types are not specified; but that is another story for another day. - The fact that remains is that CDECL is the default convention. - Even if an explicit convention (such as STDCALL or FASTCALL) - is hard-coded into the function prototypes inside C headers, - problems may appear. One problem, for example, deals with the - necessity to expose the convention in users' callbacks. + The remaining fact is that CDECL is the default convention. + Even if an explicit convention is hard-coded into the function + prototypes inside C headers, problems may appear. The + necessity to expose the convention in users' callbacks is one + of these problems. The calling convention issues are also important when using zlib in other programming languages. Some of them, like Ada (GNAT) and Fortran (GNU G77), have C bindings implemented initially on Unix, and relying on the C calling convention. On the other hand, the pre- .NET versions of Microsoft Visual - Basic require STDCALL, while Borland Delphi prefers (although - it does not require) FASTCALL. + Basic require STDCALL, while Borland Delphi prefers, although + it does not require, FASTCALL. In fairness to all possible uses of zlib outside the C programming language, we choose the default "C" convention. @@ -208,7 +203,14 @@ in the zlib distribution, or at the following location: zlib distribution. - 8. If my application uses ZLIB1.DLL, should I link it to + 8. I need to use zlib in my Microsoft .NET project. What can I + do? + + - Henrik Ravn has contributed a .NET wrapper around zlib. Look + into contrib/dotzlib/, inside the zlib distribution. + + + 9. If my application uses ZLIB1.DLL, should I link it to MSVCRT.DLL? Why? - It is not required, but it is recommended to link your @@ -223,8 +225,8 @@ in the zlib distribution, or at the following location: depend on it should also be linked to MSVCRT.DLL. - 9. Why are you saying that ZLIB1.DLL and my application must be - linked to the same C run-time (CRT) library? I linked my +10. Why are you saying that ZLIB1.DLL and my application should + be linked to the same C run-time (CRT) library? I linked my application and my DLLs to different C libraries (e.g. my application to a static library, and my DLLs to MSVCRT.DLL), and everything works fine. @@ -255,11 +257,11 @@ in the zlib distribution, or at the following location: and DLLs are avoiding the corruption of each of the CRTs' internal states, maybe by careful design, or maybe by fortune. - Also note that linking ZLIB1.DLL to non-Microsoft CRTs (such - as those provided by Borland) raises similar problems. + Also note that linking ZLIB1.DLL to non-Microsoft CRTs, such + as those provided by Borland, raises similar problems. -10. Why are you linking ZLIB1.DLL to MSVCRT.DLL? +11. Why are you linking ZLIB1.DLL to MSVCRT.DLL? - MSVCRT.DLL exists on every Windows 95 with a new service pack installed, or with Microsoft Internet Explorer 4 or later, and @@ -269,21 +271,14 @@ in the zlib distribution, or at the following location: software provider for free. The fact that MSVCRT.DLL does not exist on a virgin Windows 95 - is not so problematic. The number of Windows 95 installations - is rapidly decreasing, Microsoft stopped supporting it a long - time ago, and many recent applications from various vendors, - including Microsoft, do not even run on it. Furthermore, no - serious user should run Windows 95 without a proper update - installed. - - There is also the fact that the mainstream C compilers for - Windows are Microsoft Visual C++ 6.0, and gcc/MinGW. Both - are producing executables that link to MSVCRT.DLL by default, - without offering other dynamic CRTs as alternatives easy to - select by users. + is not so problematic. Windows 95 is scarcely found nowadays, + Microsoft ended its support a long time ago, and many recent + applications from various vendors, including Microsoft, do not + even run on it. Furthermore, no serious user should run + Windows 95 without a proper update installed. -11. Why are you not linking ZLIB1.DLL to +12. Why are you not linking ZLIB1.DLL to <<my favorite C run-time library>> ? - We considered and abandoned the following alternatives: @@ -294,27 +289,60 @@ in the zlib distribution, or at the following location: to a static C library, you may as well consider linking zlib in statically, too. - * Linking ZLIB1.DLL to CRTDLL.DLL looks very appealing, - because CRTDLL.DLL is present on every Win32 installation. - Unfortunately, it has a series of problems: it raises - difficulties when using it with C++ code, it does not work - with 64-bit file offsets, (and so on...), and Microsoft - discontinued its support a long time ago. - - * Linking ZLIB1.DLL to MSVCR70.DLL, supplied with the - Microsoft .NET platform and Visual C++ 7.0 or newer, is not - a good option. Although it is available for free download - and distribution, its presence is scarce on today's Win32 - installations. If it will ever become more popular than - MSVCRT.DLL and will be pre-installed on the future Win32 - systems, we will probably think again about it. - - * Linking ZLIB1.DLL to NTDLL.DLL is not possible. - NTDLL.DLL exports only a part of the C library, and only on - Windows NT systems. - - -12. I need to link my own DLL build to a CRT different than + * Linking ZLIB1.DLL to CRTDLL.DLL looks appealing, because + CRTDLL.DLL is present on every Win32 installation. + Unfortunately, it has a series of problems: it does not + work properly with Microsoft's C++ libraries, it does not + provide support for 64-bit file offsets, (and so on...), + and Microsoft discontinued its support a long time ago. + + * Linking ZLIB1.DLL to MSVCR70.DLL or MSVCR71.DLL, supplied + with the Microsoft .NET platform, and Visual C++ 7.0/7.1, + raises problems related to the status of ZLIB1.DLL as a + system component. According to the Microsoft Knowledge Base + article KB326922 "INFO: Redistribution of the Shared C + Runtime Component in Visual C++ .NET", MSVCR70.DLL and + MSVCR71.DLL are not supposed to function as system DLLs, + because they may clash with MSVCRT.DLL. Instead, the + application's installer is supposed to put these DLLs + (if needed) in the application's private directory. + If ZLIB1.DLL depends on a non-system runtime, it cannot + function as a redistributable system component. + + * Linking ZLIB1.DLL to non-Microsoft runtimes, such as + Borland's, or Cygwin's, raises problems related to the + reliable presence of these runtimes on Win32 systems. + It's easier to let the DLL build of zlib up to the people + who distribute these runtimes, and who may proceed as + explained in the answer to Question 14. + + +13. If ZLIB1.DLL cannot be linked to MSVCR70.DLL or MSVCR71.DLL, + how can I build/use ZLIB1.DLL in Microsoft Visual C++ 7.0 + (Visual Studio .NET) or newer? + + - Due to the problems explained in the Microsoft Knowledge Base + article KB326922 (see the previous answer), the C runtime that + comes with the VC7 environment is no longer considered a + system component. That is, it should not be assumed that this + runtime exists, or may be installed in a system directory. + Since ZLIB1.DLL is supposed to be a system component, it may + not depend on a non-system component. + + In order to link ZLIB1.DLL and your application to MSVCRT.DLL + in VC7, you need the library of Visual C++ 6.0 or older. If + you don't have this library at hand, it's probably best not to + use ZLIB1.DLL. + + We are hoping that, in the future, Microsoft will provide a + way to build applications linked to a proper system runtime, + from the Visual C++ environment. Until then, you have a + couple of alternatives, such as linking zlib in statically. + If your application requires dynamic linking, you may proceed + as explained in the answer to Question 14. + + +14. I need to link my own DLL build to a CRT different than MSVCRT.DLL. What can I do? - Feel free to rebuild the DLL from the zlib sources, and link @@ -330,7 +358,7 @@ in the zlib distribution, or at the following location: CYGWIN1.DLL, and it is distributed under the name CYGZ.DLL. -13. May I include additional pieces of code that I find useful, +15. May I include additional pieces of code that I find useful, link them in ZLIB1.DLL, and export them? - No. A legitimate build of ZLIB1.DLL must not include code @@ -338,14 +366,12 @@ in the zlib distribution, or at the following location: But you can make your own private DLL build, under a different file name, as suggested in the previous answer. - For example, in Borland Delphi and C++ Builder, zlib is a part - of the standard VCL library. If an application links to VCL - dynamically, the name of the distributable binary (VCLxx.DLL) - does not posess any danger of clashing with a legitimate but - incompatible ZLIB1.DLL. + For example, zlib is a part of the VCL library, distributed + with Borland Delphi and C++ Builder. The DLL build of VCL + is a redistributable file, named VCLxx.DLL. -14. May I remove some functionality out of ZLIB1.DLL, by enabling +16. May I remove some functionality out of ZLIB1.DLL, by enabling macros like NO_GZCOMPRESS or NO_GZIP at compile time? - No. A legitimate build of ZLIB1.DLL must provide the complete @@ -354,7 +380,7 @@ in the zlib distribution, or at the following location: different file name, as suggested in the previous answer. -15. I made my own ZLIB1.DLL build. Can I test it for compliance? +17. I made my own ZLIB1.DLL build. Can I test it for compliance? - We prefer that you download the official DLL from the zlib web site. If you need something peculiar from this DLL, you diff --git a/zlib/win32/zlib1.rc b/zlib/win32/zlib1.rc index 326375d85c7..99025c97422 100644 --- a/zlib/win32/zlib1.rc +++ b/zlib/win32/zlib1.rc @@ -5,8 +5,8 @@ VS_VERSION_INFO VERSIONINFO #else VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE #endif - FILEVERSION 1,2,1,0 - PRODUCTVERSION 1,2,1,0 + FILEVERSION 1,2,2,0 + PRODUCTVERSION 1,2,2,0 FILEFLAGSMASK VS_FFI_FILEFLAGSMASK #ifdef _DEBUG FILEFLAGS 1 @@ -23,12 +23,12 @@ BEGIN //language ID = U.S. English, char set = Windows, Multilingual BEGIN VALUE "FileDescription", "zlib data compression library\0" - VALUE "FileVersion", "1.2.1\0" + VALUE "FileVersion", "1.2.3\0" VALUE "InternalName", "zlib1.dll\0" - VALUE "LegalCopyright", "(C) 1995-2003 Jean-loup Gailly & Mark Adler\0" + VALUE "LegalCopyright", "(C) 1995-2004 Jean-loup Gailly & Mark Adler\0" VALUE "OriginalFilename", "zlib1.dll\0" VALUE "ProductName", "zlib\0" - VALUE "ProductVersion", "1.2.1\0" + VALUE "ProductVersion", "1.2.3\0" VALUE "Comments","DLL support by Alessandro Iacopetti & Gilles Vollant\0" END END |