From 0f0aea9011a5e0347a60640a7197cdc75e09e382 Mon Sep 17 00:00:00 2001 From: florian Date: Wed, 27 Aug 2008 15:16:45 +0000 Subject: o patch from Sergej Gorelkin to improvement code generation for string literals * Replaces linear search through assembler list by the hash lookup. This considerably improves performance on large projects (one example is winunits-jedi package, in which tcgstringconstnode.pass_generate_code was top #1 in calltree, consuming about 12% IRefs). * Enables reusing memory locations for widestring constants (and in general, the same approach may be used for any other type of constants). * Saves a sizeof(pointer) bytes per constant, by removing a location which points to the string. This location is necessary for the typed consts which may be modified, but redundant for string literals because the language does not allow to modify string literals in any way. git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@11657 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/aasmdata.pas | 18 ++++ compiler/cclasses.pas | 248 +++++++++++++++++++++++++++++++++++++++++++++++++- compiler/ncgcnv.pas | 7 +- compiler/ncgcon.pas | 190 ++++++++++---------------------------- 4 files changed, 314 insertions(+), 149 deletions(-) diff --git a/compiler/aasmdata.pas b/compiler/aasmdata.pas index cee6213faa..1dd68c3e32 100644 --- a/compiler/aasmdata.pas +++ b/compiler/aasmdata.pas @@ -66,6 +66,19 @@ interface al_end ); + { Type of constant 'pools'. Currently contains only string types, + but may be extended with reals, sets, etc. } + + TConstPoolType = ( + sp_invalid, + sp_conststr, + sp_shortstr, + sp_longstr, + sp_ansistr, + sp_widestr, + sp_unicodestr + ); + const AsmListTypeStr : array[TAsmListType] of string[24] =( 'al_begin', @@ -126,6 +139,8 @@ interface { Assembler lists } AsmLists : array[TAsmListType] of TAsmList; CurrAsmList : TAsmList; + { hash tables for reusing constant storage } + ConstPools : array[TConstPoolType] of THashSet; constructor create(const n:string); destructor destroy;override; { asmsymbol } @@ -293,6 +308,7 @@ implementation destructor TAsmData.destroy; var hal : TAsmListType; + hp : TConstPoolType; begin { Symbols } {$ifdef MEMDEBUG} @@ -321,6 +337,8 @@ implementation {$ifdef MEMDEBUG} memasmlists.stop; {$endif} + for hp := low(TConstPoolType) to high(TConstPoolType) do + ConstPools[hp].Free; end; diff --git a/compiler/cclasses.pas b/compiler/cclasses.pas index 66ea091c45..51d6cd299b 100644 --- a/compiler/cclasses.pas +++ b/compiler/cclasses.pas @@ -459,7 +459,51 @@ type end; +{****************************************************************** + THashSet (keys not limited to ShortString, no indexed access) +*******************************************************************} + + PPHashSetItem = ^PHashSetItem; + PHashSetItem = ^THashSetItem; + THashSetItem = record + Next: PHashSetItem; + Key: Pointer; + KeyLength: Integer; + HashValue: LongWord; + Data: TObject; + end; + + THashSet = class(TObject) + private + FCount: LongWord; + FBucketCount: LongWord; + FBucket: PPHashSetItem; + FOwnsObjects: Boolean; + FOwnsKeys: Boolean; + function Lookup(Key: Pointer; KeyLen: Integer; var Found: Boolean; + CanCreate: Boolean): PHashSetItem; + procedure Resize(NewCapacity: LongWord); + public + constructor Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean); + destructor Destroy; override; + procedure Clear; + { finds an entry by key } + function Find(Key: Pointer; KeyLen: Integer): PHashSetItem; + { finds an entry, creates one if not exists } + function FindOrAdd(Key: Pointer; KeyLen: Integer; + var Found: Boolean): PHashSetItem; + { finds an entry, creates one if not exists } + function FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem; + { returns Data by given Key } + function Get(Key: Pointer; KeyLen: Integer): TObject; + { removes an entry, returns False if entry wasn't there } + function Remove(Entry: PHashSetItem): Boolean; + property Count: LongWord read FCount; + end; + + function FPHash(const s:shortstring):LongWord; + function FPHash(P: PChar; Len: Integer): LongWord; implementation @@ -1043,7 +1087,7 @@ end; pmax:=@s[length(s)+1]; while (p FBucketCount then { arbitrary limit, probably too high } + begin + { rehash and repeat search } + Resize(FBucketCount * 2); + Result := Lookup(Key, KeyLen, Found, CanCreate); + end + else + begin + New(Result); + if FOwnsKeys then + begin + GetMem(Result^.Key, KeyLen); + Move(Key^, Result^.Key^, KeyLen); + end + else + Result^.Key := Key; + Result^.KeyLength := KeyLen; + Result^.HashValue := h; + Result^.Data := nil; + Result^.Next := nil; + Inc(FCount); + Entry^ := Result; + end; + end; + + + procedure THashSet.Resize(NewCapacity: LongWord); + var + p, chain: PPHashSetItem; + i: Integer; + e, n: PHashSetItem; + begin + p := AllocMem(NewCapacity * sizeof(PHashSetItem)); + for i := 0 to FBucketCount-1 do + begin + e := FBucket[i]; + while Assigned(e) do + begin + chain := @p[e^.HashValue mod NewCapacity]; + n := e^.Next; + e^.Next := chain^; + chain^ := e; + e := n; + end; + end; + FBucketCount := NewCapacity; + FreeMem(FBucket); + FBucket := p; + end; + + + function THashSet.Remove(Entry: PHashSetItem): Boolean; + var + chain: PPHashSetItem; + begin + chain := @FBucket[Entry^.HashValue mod FBucketCount]; + while Assigned(chain^) do + begin + if chain^ = Entry then + begin + chain^ := Entry^.Next; + if FOwnsObjects then + Entry^.Data.Free; + if FOwnsKeys then + FreeMem(Entry^.Key); + Dispose(Entry); + Dec(FCount); + Result := True; + Exit; + end; + chain := @chain^^.Next; + end; + Result := False; + end; + end. diff --git a/compiler/ncgcnv.pas b/compiler/ncgcnv.pas index c7db0d2fe2..734667c020 100644 --- a/compiler/ncgcnv.pas +++ b/compiler/ncgcnv.pas @@ -159,8 +159,7 @@ interface end else begin - location.register:=cg.getaddressregister(current_asmdata.CurrAsmList); - cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,left.location.reference,location.register); + location_copy(location,left.location); end; end; cst_longstring: @@ -179,9 +178,7 @@ interface end else begin - location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); - cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_INT,left.location.reference, - location.register); + location_copy(location,left.location); end; end; end; diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas index 2914e814a8..b1a1975027 100644 --- a/compiler/ncgcon.pas +++ b/compiler/ncgcon.pas @@ -71,7 +71,7 @@ implementation symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil, cpuinfo,cpubase, cgbase,cgobj,cgutils, - ncgutil + ncgutil, cclasses ; @@ -262,14 +262,24 @@ implementation procedure tcgstringconstnode.pass_generate_code; var - hp1,hp2 : tai; l1, lastlabel : tasmlabel; - lastlabelhp : tai; pc : pchar; - same_string : boolean; - l,j, - i,mylength : longint; + l,i : longint; + href: treference; + pooltype: TConstPoolType; + pool: THashSet; + entry: PHashSetItem; + + const + PoolMap: array[tconststringtype] of TConstPoolType = ( + sp_conststr, + sp_shortstr, + sp_longstr, + sp_ansistr, + sp_widestr, + sp_unicodestr + ); begin { for empty ansistrings we could return a constant 0 } if (cst_type in [cst_ansistring,cst_widestring]) and (len=0) then @@ -278,160 +288,49 @@ implementation location.value:=0; exit; end; - { return a constant reference in memory } - location_reset(location,LOC_CREFERENCE,def_cgsize(resultdef)); { const already used ? } - lastlabel:=nil; - lastlabelhp:=nil; if not assigned(lab_str) then begin - if is_shortstring(resultdef) then - mylength:=len+2 + pooltype := PoolMap[cst_type]; + if current_asmdata.ConstPools[pooltype] = nil then + current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False); + pool := current_asmdata.ConstPools[pooltype]; + + if cst_type in [cst_widestring, cst_unicodestring] then + entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size) else - mylength:=len+1; - { widestrings can't be reused yet } - if not(is_widestring(resultdef)) then - begin - { tries to find an old entry } - hp1:=tai(current_asmdata.asmlists[al_typedconsts].first); - while assigned(hp1) do - begin - if hp1.typ=ait_label then - begin - lastlabel:=tai_label(hp1).labsym; - lastlabelhp:=hp1; - end - else - begin - same_string:=false; - if (hp1.typ=ait_string) and - (lastlabel<>nil) and - (tai_string(hp1).len=mylength) then - begin - case cst_type of - cst_conststring: - begin - j:=0; - same_string:=true; - if len>0 then - begin - for i:=0 to len-1 do - begin - if tai_string(hp1).str[j]<>value_str[i] then - begin - same_string:=false; - break; - end; - inc(j); - end; - end; - end; - cst_shortstring: - begin - { if shortstring then check the length byte first and - set the start index to 1 } - if len=ord(tai_string(hp1).str[0]) then - begin - j:=1; - same_string:=true; - if len>0 then - begin - for i:=0 to len-1 do - begin - if tai_string(hp1).str[j]<>value_str[i] then - begin - same_string:=false; - break; - end; - inc(j); - end; - end; - end; - end; - cst_ansistring, - cst_widestring : - begin - { before the string the following sequence must be found: -