diff options
author | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2008-08-27 15:16:45 +0000 |
---|---|---|
committer | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2008-08-27 15:16:45 +0000 |
commit | 0f0aea9011a5e0347a60640a7197cdc75e09e382 (patch) | |
tree | 0ecb7c57a2a95f4e8e8d2c1b709d3b400b552600 | |
parent | 99b31d58de46c66b3f0182df7ac2bcf7d9cd5761 (diff) | |
download | fpc-0f0aea9011a5e0347a60640a7197cdc75e09e382.tar.gz |
o patch from Sergej Gorelkin to improvement code generation for string literals
* Replaces linear search through assembler list by the hash lookup.
This considerably improves performance on large projects
(one example is winunits-jedi package, in which tcgstringconstnode.pass_generate_code
was top #1 in calltree, consuming about 12% IRefs).
* Enables reusing memory locations for widestring constants
(and in general, the same approach may be used for any other type of constants).
* Saves a sizeof(pointer) bytes per constant, by removing a location
which points to the string. This location is necessary for the
typed consts which may be modified, but redundant for string literals
because the language does not allow to modify string literals in any way.
git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@11657 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r-- | compiler/aasmdata.pas | 18 | ||||
-rw-r--r-- | compiler/cclasses.pas | 248 | ||||
-rw-r--r-- | compiler/ncgcnv.pas | 7 | ||||
-rw-r--r-- | compiler/ncgcon.pas | 190 |
4 files changed, 314 insertions, 149 deletions
diff --git a/compiler/aasmdata.pas b/compiler/aasmdata.pas index cee6213faa..1dd68c3e32 100644 --- a/compiler/aasmdata.pas +++ b/compiler/aasmdata.pas @@ -66,6 +66,19 @@ interface al_end ); + { Type of constant 'pools'. Currently contains only string types, + but may be extended with reals, sets, etc. } + + TConstPoolType = ( + sp_invalid, + sp_conststr, + sp_shortstr, + sp_longstr, + sp_ansistr, + sp_widestr, + sp_unicodestr + ); + const AsmListTypeStr : array[TAsmListType] of string[24] =( 'al_begin', @@ -126,6 +139,8 @@ interface { Assembler lists } AsmLists : array[TAsmListType] of TAsmList; CurrAsmList : TAsmList; + { hash tables for reusing constant storage } + ConstPools : array[TConstPoolType] of THashSet; constructor create(const n:string); destructor destroy;override; { asmsymbol } @@ -293,6 +308,7 @@ implementation destructor TAsmData.destroy; var hal : TAsmListType; + hp : TConstPoolType; begin { Symbols } {$ifdef MEMDEBUG} @@ -321,6 +337,8 @@ implementation {$ifdef MEMDEBUG} memasmlists.stop; {$endif} + for hp := low(TConstPoolType) to high(TConstPoolType) do + ConstPools[hp].Free; end; diff --git a/compiler/cclasses.pas b/compiler/cclasses.pas index 66ea091c45..51d6cd299b 100644 --- a/compiler/cclasses.pas +++ b/compiler/cclasses.pas @@ -459,7 +459,51 @@ type end; +{****************************************************************** + THashSet (keys not limited to ShortString, no indexed access) +*******************************************************************} + + PPHashSetItem = ^PHashSetItem; + PHashSetItem = ^THashSetItem; + THashSetItem = record + Next: PHashSetItem; + Key: Pointer; + KeyLength: Integer; + HashValue: LongWord; + Data: TObject; + end; + + THashSet = class(TObject) + private + FCount: LongWord; + FBucketCount: LongWord; + FBucket: PPHashSetItem; + FOwnsObjects: Boolean; + FOwnsKeys: Boolean; + function Lookup(Key: Pointer; KeyLen: Integer; var Found: Boolean; + CanCreate: Boolean): PHashSetItem; + procedure Resize(NewCapacity: LongWord); + public + constructor Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean); + destructor Destroy; override; + procedure Clear; + { finds an entry by key } + function Find(Key: Pointer; KeyLen: Integer): PHashSetItem; + { finds an entry, creates one if not exists } + function FindOrAdd(Key: Pointer; KeyLen: Integer; + var Found: Boolean): PHashSetItem; + { finds an entry, creates one if not exists } + function FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem; + { returns Data by given Key } + function Get(Key: Pointer; KeyLen: Integer): TObject; + { removes an entry, returns False if entry wasn't there } + function Remove(Entry: PHashSetItem): Boolean; + property Count: LongWord read FCount; + end; + + function FPHash(const s:shortstring):LongWord; + function FPHash(P: PChar; Len: Integer): LongWord; implementation @@ -1043,7 +1087,7 @@ end; pmax:=@s[length(s)+1]; while (p<pmax) do begin - result:=LongWord((result shl 5) - result) xor LongWord(P^); + result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^); inc(p); end; {$ifdef overflowon} @@ -1052,6 +1096,26 @@ end; {$endif} end; + function FPHash(P: PChar; Len: Integer): LongWord; + Var + pmax : pchar; + begin +{$ifopt Q+} +{$define overflowon} +{$Q-} +{$endif} + result:=0; + pmax:=p+len; + while (p<pmax) do + begin + result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^); + inc(p); + end; +{$ifdef overflowon} +{$Q+} +{$undef overflowon} +{$endif} + end; procedure TFPHashList.RaiseIndexError(Index : Integer); begin @@ -2226,16 +2290,14 @@ end; function TCmdStrList.Find(const s:TCmdStr):TCmdStrListItem; var NewNode : TCmdStrListItem; - ups : string; begin result:=nil; if s='' then exit; - ups:=upper(s); NewNode:=TCmdStrListItem(FFirst); while assigned(NewNode) do begin - if upper(NewNode.FPStr)=ups then + if SysUtils.CompareText(s, NewNode.FPStr)=0 then begin result:=NewNode; exit; @@ -2521,4 +2583,182 @@ end; end; end; +{**************************************************************************** + thashset +****************************************************************************} + + constructor THashSet.Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean); + var + I: Integer; + begin + inherited Create; + FOwnsObjects := OwnObjects; + FOwnsKeys := OwnKeys; + I := 64; + while I < InitSize do I := I shl 1; + FBucketCount := I; + FBucket := AllocMem(I * sizeof(PHashSetItem)); + end; + + + destructor THashSet.Destroy; + begin + Clear; + FreeMem(FBucket); + inherited Destroy; + end; + + + procedure THashSet.Clear; + var + I: Integer; + item, next: PHashSetItem; + begin + for I := 0 to FBucketCount-1 do + begin + item := FBucket[I]; + while Assigned(item) do + begin + next := item^.Next; + if FOwnsObjects then + item^.Data.Free; + if FOwnsKeys then + FreeMem(item^.Key); + Dispose(item); + item := next; + end; + end; + FillChar(FBucket^, FBucketCount * sizeof(PHashSetItem), 0); + end; + + + function THashSet.Find(Key: Pointer; KeyLen: Integer): PHashSetItem; + var + Dummy: Boolean; + begin + Result := Lookup(Key, KeyLen, Dummy, False); + end; + + + function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer; + var Found: Boolean): PHashSetItem; + begin + Result := Lookup(Key, KeyLen, Found, True); + end; + + + function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem; + var + Dummy: Boolean; + begin + Result := Lookup(Key, KeyLen, Dummy, True); + end; + + + function THashSet.Get(Key: Pointer; KeyLen: Integer): TObject; + var + e: PHashSetItem; + Dummy: Boolean; + begin + e := Lookup(Key, KeyLen, Dummy, False); + if Assigned(e) then + Result := e^.Data + else + Result := nil; + end; + + + function THashSet.Lookup(Key: Pointer; KeyLen: Integer; + var Found: Boolean; CanCreate: Boolean): PHashSetItem; + var + Entry: PPHashSetItem; + h: LongWord; + begin + h := FPHash(Key, KeyLen); + Entry := @FBucket[h mod FBucketCount]; + while Assigned(Entry^) and + not ((Entry^^.HashValue = h) and (Entry^^.KeyLength = KeyLen) and + (CompareByte(Entry^^.Key^, Key^, KeyLen) = 0)) do + Entry := @Entry^^.Next; + Found := Assigned(Entry^); + if Found or (not CanCreate) then + begin + Result := Entry^; + Exit; + end; + if FCount > FBucketCount then { arbitrary limit, probably too high } + begin + { rehash and repeat search } + Resize(FBucketCount * 2); + Result := Lookup(Key, KeyLen, Found, CanCreate); + end + else + begin + New(Result); + if FOwnsKeys then + begin + GetMem(Result^.Key, KeyLen); + Move(Key^, Result^.Key^, KeyLen); + end + else + Result^.Key := Key; + Result^.KeyLength := KeyLen; + Result^.HashValue := h; + Result^.Data := nil; + Result^.Next := nil; + Inc(FCount); + Entry^ := Result; + end; + end; + + + procedure THashSet.Resize(NewCapacity: LongWord); + var + p, chain: PPHashSetItem; + i: Integer; + e, n: PHashSetItem; + begin + p := AllocMem(NewCapacity * sizeof(PHashSetItem)); + for i := 0 to FBucketCount-1 do + begin + e := FBucket[i]; + while Assigned(e) do + begin + chain := @p[e^.HashValue mod NewCapacity]; + n := e^.Next; + e^.Next := chain^; + chain^ := e; + e := n; + end; + end; + FBucketCount := NewCapacity; + FreeMem(FBucket); + FBucket := p; + end; + + + function THashSet.Remove(Entry: PHashSetItem): Boolean; + var + chain: PPHashSetItem; + begin + chain := @FBucket[Entry^.HashValue mod FBucketCount]; + while Assigned(chain^) do + begin + if chain^ = Entry then + begin + chain^ := Entry^.Next; + if FOwnsObjects then + Entry^.Data.Free; + if FOwnsKeys then + FreeMem(Entry^.Key); + Dispose(Entry); + Dec(FCount); + Result := True; + Exit; + end; + chain := @chain^^.Next; + end; + Result := False; + end; + end. diff --git a/compiler/ncgcnv.pas b/compiler/ncgcnv.pas index c7db0d2fe2..734667c020 100644 --- a/compiler/ncgcnv.pas +++ b/compiler/ncgcnv.pas @@ -159,8 +159,7 @@ interface end else begin - location.register:=cg.getaddressregister(current_asmdata.CurrAsmList); - cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,left.location.reference,location.register); + location_copy(location,left.location); end; end; cst_longstring: @@ -179,9 +178,7 @@ interface end else begin - location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); - cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_INT,left.location.reference, - location.register); + location_copy(location,left.location); end; end; end; diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas index 2914e814a8..b1a1975027 100644 --- a/compiler/ncgcon.pas +++ b/compiler/ncgcon.pas @@ -71,7 +71,7 @@ implementation symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil, cpuinfo,cpubase, cgbase,cgobj,cgutils, - ncgutil + ncgutil, cclasses ; @@ -262,14 +262,24 @@ implementation procedure tcgstringconstnode.pass_generate_code; var - hp1,hp2 : tai; l1, lastlabel : tasmlabel; - lastlabelhp : tai; pc : pchar; - same_string : boolean; - l,j, - i,mylength : longint; + l,i : longint; + href: treference; + pooltype: TConstPoolType; + pool: THashSet; + entry: PHashSetItem; + + const + PoolMap: array[tconststringtype] of TConstPoolType = ( + sp_conststr, + sp_shortstr, + sp_longstr, + sp_ansistr, + sp_widestr, + sp_unicodestr + ); begin { for empty ansistrings we could return a constant 0 } if (cst_type in [cst_ansistring,cst_widestring]) and (len=0) then @@ -278,160 +288,49 @@ implementation location.value:=0; exit; end; - { return a constant reference in memory } - location_reset(location,LOC_CREFERENCE,def_cgsize(resultdef)); { const already used ? } - lastlabel:=nil; - lastlabelhp:=nil; if not assigned(lab_str) then begin - if is_shortstring(resultdef) then - mylength:=len+2 + pooltype := PoolMap[cst_type]; + if current_asmdata.ConstPools[pooltype] = nil then + current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False); + pool := current_asmdata.ConstPools[pooltype]; + + if cst_type in [cst_widestring, cst_unicodestring] then + entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size) else - mylength:=len+1; - { widestrings can't be reused yet } - if not(is_widestring(resultdef)) then - begin - { tries to find an old entry } - hp1:=tai(current_asmdata.asmlists[al_typedconsts].first); - while assigned(hp1) do - begin - if hp1.typ=ait_label then - begin - lastlabel:=tai_label(hp1).labsym; - lastlabelhp:=hp1; - end - else - begin - same_string:=false; - if (hp1.typ=ait_string) and - (lastlabel<>nil) and - (tai_string(hp1).len=mylength) then - begin - case cst_type of - cst_conststring: - begin - j:=0; - same_string:=true; - if len>0 then - begin - for i:=0 to len-1 do - begin - if tai_string(hp1).str[j]<>value_str[i] then - begin - same_string:=false; - break; - end; - inc(j); - end; - end; - end; - cst_shortstring: - begin - { if shortstring then check the length byte first and - set the start index to 1 } - if len=ord(tai_string(hp1).str[0]) then - begin - j:=1; - same_string:=true; - if len>0 then - begin - for i:=0 to len-1 do - begin - if tai_string(hp1).str[j]<>value_str[i] then - begin - same_string:=false; - break; - end; - inc(j); - end; - end; - end; - end; - cst_ansistring, - cst_widestring : - begin - { before the string the following sequence must be found: - <label> - constsymbol <datalabel> - constint -1 - constint <len> - we must then return <label> to reuse - } - hp2:=tai(lastlabelhp.previous); - if assigned(hp2) and - (hp2.typ=ait_const) and - (tai_const(hp2).consttype=aitconst_aint) and - (tai_const(hp2).value=len) and - assigned(hp2.previous) and - (tai(hp2.previous).typ=ait_const) and - (tai_const(hp2.previous).consttype=aitconst_aint) and - (tai_const(hp2.previous).value=-1) and - assigned(hp2.previous.previous) and - (tai(hp2.previous.previous).typ=ait_const) and - (tai_const(hp2.previous.previous).consttype=aitconst_ptr) and - assigned(hp2.previous.previous.previous) and - (tai(hp2.previous.previous.previous).typ=ait_label) then - begin - lastlabel:=tai_label(hp2.previous.previous.previous).labsym; - same_string:=true; - j:=0; - if len>0 then - begin - for i:=0 to len-1 do - begin - if tai_string(hp1).str[j]<>value_str[i] then - begin - same_string:=false; - break; - end; - inc(j); - end; - end; - end; - end; - end; - { found ? } - if same_string then - begin - lab_str:=lastlabel; - break; - end; - end; - lastlabel:=nil; - end; - hp1:=tai(hp1.next); - end; - end; + entry := pool.FindOrAdd(value_str, len); + + lab_str := TAsmLabel(entry^.Data); // is it needed anymore? + { :-(, we must generate a new entry } - if not assigned(lab_str) then + if not assigned(entry^.Data) then begin current_asmdata.getdatalabel(lastlabel); lab_str:=lastlabel; + entry^.Data := lastlabel; maybe_new_object_file(current_asmdata.asmlists[al_typedconsts]); if (len=0) or not(cst_type in [cst_ansistring,cst_widestring]) then new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,lastlabel.name,const_align(sizeof(pint))) else new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata,lastlabel.name,const_align(sizeof(pint))); - current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel)); { generate an ansi string ? } case cst_type of cst_ansistring: begin - { an empty ansi string is nil! } if len=0 then - current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil)) - else + InternalError(2008032301) { empty string should be handled above } + else begin current_asmdata.getdatalabel(l1); - current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1)); + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1)); current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len)); + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel)); { make sure the string doesn't get dead stripped if the header is referenced } if (target_info.system in systems_darwin) then current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name)); - current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); { ... and vice versa } if (target_info.system in systems_darwin) then current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name)); @@ -444,14 +343,12 @@ implementation end; cst_widestring: begin - { an empty wide string is nil! } if len=0 then - current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil)) + InternalError(2008032302) { empty string should be handled above } else begin current_asmdata.getdatalabel(l1); - current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1)); - + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); { we use always UTF-16 coding for constants } { at least for now } { Consts.concat(Tai_const.Create_8bit(2)); } @@ -462,10 +359,10 @@ implementation current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1)); current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len*cwidechartype.size)); end; + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel)); { make sure the string doesn't get dead stripped if the header is referenced } if (target_info.system in systems_darwin) then current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name)); - current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); { ... and vice versa } if (target_info.system in systems_darwin) then current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name)); @@ -477,6 +374,7 @@ implementation end; cst_shortstring: begin + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel)); { truncate strings larger than 255 chars } if len>255 then l:=255 @@ -491,6 +389,7 @@ implementation end; cst_conststring: begin + current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel)); { include terminating zero } getmem(pc,len+1); move(value_str^,pc[0],len); @@ -500,7 +399,18 @@ implementation end; end; end; - location.reference.symbol:=lab_str; + if cst_type in [cst_ansistring, cst_widestring] then + begin + location_reset(location, LOC_CREGISTER, OS_ADDR); + reference_reset_symbol(href, lab_str, 0); + location.register:=cg.getaddressregister(current_asmdata.CurrAsmList); + cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,location.register); + end + else + begin + location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef)); + location.reference.symbol:=lab_str; + end; end; |