summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2008-08-27 15:16:45 +0000
committerflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2008-08-27 15:16:45 +0000
commit0f0aea9011a5e0347a60640a7197cdc75e09e382 (patch)
tree0ecb7c57a2a95f4e8e8d2c1b709d3b400b552600
parent99b31d58de46c66b3f0182df7ac2bcf7d9cd5761 (diff)
downloadfpc-0f0aea9011a5e0347a60640a7197cdc75e09e382.tar.gz
o patch from Sergej Gorelkin to improvement code generation for string literals
* Replaces linear search through assembler list by the hash lookup. This considerably improves performance on large projects (one example is winunits-jedi package, in which tcgstringconstnode.pass_generate_code was top #1 in calltree, consuming about 12% IRefs). * Enables reusing memory locations for widestring constants (and in general, the same approach may be used for any other type of constants). * Saves a sizeof(pointer) bytes per constant, by removing a location which points to the string. This location is necessary for the typed consts which may be modified, but redundant for string literals because the language does not allow to modify string literals in any way. git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@11657 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r--compiler/aasmdata.pas18
-rw-r--r--compiler/cclasses.pas248
-rw-r--r--compiler/ncgcnv.pas7
-rw-r--r--compiler/ncgcon.pas190
4 files changed, 314 insertions, 149 deletions
diff --git a/compiler/aasmdata.pas b/compiler/aasmdata.pas
index cee6213faa..1dd68c3e32 100644
--- a/compiler/aasmdata.pas
+++ b/compiler/aasmdata.pas
@@ -66,6 +66,19 @@ interface
al_end
);
+ { Type of constant 'pools'. Currently contains only string types,
+ but may be extended with reals, sets, etc. }
+
+ TConstPoolType = (
+ sp_invalid,
+ sp_conststr,
+ sp_shortstr,
+ sp_longstr,
+ sp_ansistr,
+ sp_widestr,
+ sp_unicodestr
+ );
+
const
AsmListTypeStr : array[TAsmListType] of string[24] =(
'al_begin',
@@ -126,6 +139,8 @@ interface
{ Assembler lists }
AsmLists : array[TAsmListType] of TAsmList;
CurrAsmList : TAsmList;
+ { hash tables for reusing constant storage }
+ ConstPools : array[TConstPoolType] of THashSet;
constructor create(const n:string);
destructor destroy;override;
{ asmsymbol }
@@ -293,6 +308,7 @@ implementation
destructor TAsmData.destroy;
var
hal : TAsmListType;
+ hp : TConstPoolType;
begin
{ Symbols }
{$ifdef MEMDEBUG}
@@ -321,6 +337,8 @@ implementation
{$ifdef MEMDEBUG}
memasmlists.stop;
{$endif}
+ for hp := low(TConstPoolType) to high(TConstPoolType) do
+ ConstPools[hp].Free;
end;
diff --git a/compiler/cclasses.pas b/compiler/cclasses.pas
index 66ea091c45..51d6cd299b 100644
--- a/compiler/cclasses.pas
+++ b/compiler/cclasses.pas
@@ -459,7 +459,51 @@ type
end;
+{******************************************************************
+ THashSet (keys not limited to ShortString, no indexed access)
+*******************************************************************}
+
+ PPHashSetItem = ^PHashSetItem;
+ PHashSetItem = ^THashSetItem;
+ THashSetItem = record
+ Next: PHashSetItem;
+ Key: Pointer;
+ KeyLength: Integer;
+ HashValue: LongWord;
+ Data: TObject;
+ end;
+
+ THashSet = class(TObject)
+ private
+ FCount: LongWord;
+ FBucketCount: LongWord;
+ FBucket: PPHashSetItem;
+ FOwnsObjects: Boolean;
+ FOwnsKeys: Boolean;
+ function Lookup(Key: Pointer; KeyLen: Integer; var Found: Boolean;
+ CanCreate: Boolean): PHashSetItem;
+ procedure Resize(NewCapacity: LongWord);
+ public
+ constructor Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean);
+ destructor Destroy; override;
+ procedure Clear;
+ { finds an entry by key }
+ function Find(Key: Pointer; KeyLen: Integer): PHashSetItem;
+ { finds an entry, creates one if not exists }
+ function FindOrAdd(Key: Pointer; KeyLen: Integer;
+ var Found: Boolean): PHashSetItem;
+ { finds an entry, creates one if not exists }
+ function FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem;
+ { returns Data by given Key }
+ function Get(Key: Pointer; KeyLen: Integer): TObject;
+ { removes an entry, returns False if entry wasn't there }
+ function Remove(Entry: PHashSetItem): Boolean;
+ property Count: LongWord read FCount;
+ end;
+
+
function FPHash(const s:shortstring):LongWord;
+ function FPHash(P: PChar; Len: Integer): LongWord;
implementation
@@ -1043,7 +1087,7 @@ end;
pmax:=@s[length(s)+1];
while (p<pmax) do
begin
- result:=LongWord((result shl 5) - result) xor LongWord(P^);
+ result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^);
inc(p);
end;
{$ifdef overflowon}
@@ -1052,6 +1096,26 @@ end;
{$endif}
end;
+ function FPHash(P: PChar; Len: Integer): LongWord;
+ Var
+ pmax : pchar;
+ begin
+{$ifopt Q+}
+{$define overflowon}
+{$Q-}
+{$endif}
+ result:=0;
+ pmax:=p+len;
+ while (p<pmax) do
+ begin
+ result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^);
+ inc(p);
+ end;
+{$ifdef overflowon}
+{$Q+}
+{$undef overflowon}
+{$endif}
+ end;
procedure TFPHashList.RaiseIndexError(Index : Integer);
begin
@@ -2226,16 +2290,14 @@ end;
function TCmdStrList.Find(const s:TCmdStr):TCmdStrListItem;
var
NewNode : TCmdStrListItem;
- ups : string;
begin
result:=nil;
if s='' then
exit;
- ups:=upper(s);
NewNode:=TCmdStrListItem(FFirst);
while assigned(NewNode) do
begin
- if upper(NewNode.FPStr)=ups then
+ if SysUtils.CompareText(s, NewNode.FPStr)=0 then
begin
result:=NewNode;
exit;
@@ -2521,4 +2583,182 @@ end;
end;
end;
+{****************************************************************************
+ thashset
+****************************************************************************}
+
+ constructor THashSet.Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean);
+ var
+ I: Integer;
+ begin
+ inherited Create;
+ FOwnsObjects := OwnObjects;
+ FOwnsKeys := OwnKeys;
+ I := 64;
+ while I < InitSize do I := I shl 1;
+ FBucketCount := I;
+ FBucket := AllocMem(I * sizeof(PHashSetItem));
+ end;
+
+
+ destructor THashSet.Destroy;
+ begin
+ Clear;
+ FreeMem(FBucket);
+ inherited Destroy;
+ end;
+
+
+ procedure THashSet.Clear;
+ var
+ I: Integer;
+ item, next: PHashSetItem;
+ begin
+ for I := 0 to FBucketCount-1 do
+ begin
+ item := FBucket[I];
+ while Assigned(item) do
+ begin
+ next := item^.Next;
+ if FOwnsObjects then
+ item^.Data.Free;
+ if FOwnsKeys then
+ FreeMem(item^.Key);
+ Dispose(item);
+ item := next;
+ end;
+ end;
+ FillChar(FBucket^, FBucketCount * sizeof(PHashSetItem), 0);
+ end;
+
+
+ function THashSet.Find(Key: Pointer; KeyLen: Integer): PHashSetItem;
+ var
+ Dummy: Boolean;
+ begin
+ Result := Lookup(Key, KeyLen, Dummy, False);
+ end;
+
+
+ function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer;
+ var Found: Boolean): PHashSetItem;
+ begin
+ Result := Lookup(Key, KeyLen, Found, True);
+ end;
+
+
+ function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem;
+ var
+ Dummy: Boolean;
+ begin
+ Result := Lookup(Key, KeyLen, Dummy, True);
+ end;
+
+
+ function THashSet.Get(Key: Pointer; KeyLen: Integer): TObject;
+ var
+ e: PHashSetItem;
+ Dummy: Boolean;
+ begin
+ e := Lookup(Key, KeyLen, Dummy, False);
+ if Assigned(e) then
+ Result := e^.Data
+ else
+ Result := nil;
+ end;
+
+
+ function THashSet.Lookup(Key: Pointer; KeyLen: Integer;
+ var Found: Boolean; CanCreate: Boolean): PHashSetItem;
+ var
+ Entry: PPHashSetItem;
+ h: LongWord;
+ begin
+ h := FPHash(Key, KeyLen);
+ Entry := @FBucket[h mod FBucketCount];
+ while Assigned(Entry^) and
+ not ((Entry^^.HashValue = h) and (Entry^^.KeyLength = KeyLen) and
+ (CompareByte(Entry^^.Key^, Key^, KeyLen) = 0)) do
+ Entry := @Entry^^.Next;
+ Found := Assigned(Entry^);
+ if Found or (not CanCreate) then
+ begin
+ Result := Entry^;
+ Exit;
+ end;
+ if FCount > FBucketCount then { arbitrary limit, probably too high }
+ begin
+ { rehash and repeat search }
+ Resize(FBucketCount * 2);
+ Result := Lookup(Key, KeyLen, Found, CanCreate);
+ end
+ else
+ begin
+ New(Result);
+ if FOwnsKeys then
+ begin
+ GetMem(Result^.Key, KeyLen);
+ Move(Key^, Result^.Key^, KeyLen);
+ end
+ else
+ Result^.Key := Key;
+ Result^.KeyLength := KeyLen;
+ Result^.HashValue := h;
+ Result^.Data := nil;
+ Result^.Next := nil;
+ Inc(FCount);
+ Entry^ := Result;
+ end;
+ end;
+
+
+ procedure THashSet.Resize(NewCapacity: LongWord);
+ var
+ p, chain: PPHashSetItem;
+ i: Integer;
+ e, n: PHashSetItem;
+ begin
+ p := AllocMem(NewCapacity * sizeof(PHashSetItem));
+ for i := 0 to FBucketCount-1 do
+ begin
+ e := FBucket[i];
+ while Assigned(e) do
+ begin
+ chain := @p[e^.HashValue mod NewCapacity];
+ n := e^.Next;
+ e^.Next := chain^;
+ chain^ := e;
+ e := n;
+ end;
+ end;
+ FBucketCount := NewCapacity;
+ FreeMem(FBucket);
+ FBucket := p;
+ end;
+
+
+ function THashSet.Remove(Entry: PHashSetItem): Boolean;
+ var
+ chain: PPHashSetItem;
+ begin
+ chain := @FBucket[Entry^.HashValue mod FBucketCount];
+ while Assigned(chain^) do
+ begin
+ if chain^ = Entry then
+ begin
+ chain^ := Entry^.Next;
+ if FOwnsObjects then
+ Entry^.Data.Free;
+ if FOwnsKeys then
+ FreeMem(Entry^.Key);
+ Dispose(Entry);
+ Dec(FCount);
+ Result := True;
+ Exit;
+ end;
+ chain := @chain^^.Next;
+ end;
+ Result := False;
+ end;
+
end.
diff --git a/compiler/ncgcnv.pas b/compiler/ncgcnv.pas
index c7db0d2fe2..734667c020 100644
--- a/compiler/ncgcnv.pas
+++ b/compiler/ncgcnv.pas
@@ -159,8 +159,7 @@ interface
end
else
begin
- location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,left.location.reference,location.register);
+ location_copy(location,left.location);
end;
end;
cst_longstring:
@@ -179,9 +178,7 @@ interface
end
else
begin
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_INT,left.location.reference,
- location.register);
+ location_copy(location,left.location);
end;
end;
end;
diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas
index 2914e814a8..b1a1975027 100644
--- a/compiler/ncgcon.pas
+++ b/compiler/ncgcon.pas
@@ -71,7 +71,7 @@ implementation
symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil,
cpuinfo,cpubase,
cgbase,cgobj,cgutils,
- ncgutil
+ ncgutil, cclasses
;
@@ -262,14 +262,24 @@ implementation
procedure tcgstringconstnode.pass_generate_code;
var
- hp1,hp2 : tai;
l1,
lastlabel : tasmlabel;
- lastlabelhp : tai;
pc : pchar;
- same_string : boolean;
- l,j,
- i,mylength : longint;
+ l,i : longint;
+ href: treference;
+ pooltype: TConstPoolType;
+ pool: THashSet;
+ entry: PHashSetItem;
+
+ const
+ PoolMap: array[tconststringtype] of TConstPoolType = (
+ sp_conststr,
+ sp_shortstr,
+ sp_longstr,
+ sp_ansistr,
+ sp_widestr,
+ sp_unicodestr
+ );
begin
{ for empty ansistrings we could return a constant 0 }
if (cst_type in [cst_ansistring,cst_widestring]) and (len=0) then
@@ -278,160 +288,49 @@ implementation
location.value:=0;
exit;
end;
- { return a constant reference in memory }
- location_reset(location,LOC_CREFERENCE,def_cgsize(resultdef));
{ const already used ? }
- lastlabel:=nil;
- lastlabelhp:=nil;
if not assigned(lab_str) then
begin
- if is_shortstring(resultdef) then
- mylength:=len+2
+ pooltype := PoolMap[cst_type];
+ if current_asmdata.ConstPools[pooltype] = nil then
+ current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False);
+ pool := current_asmdata.ConstPools[pooltype];
+
+ if cst_type in [cst_widestring, cst_unicodestring] then
+ entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size)
else
- mylength:=len+1;
- { widestrings can't be reused yet }
- if not(is_widestring(resultdef)) then
- begin
- { tries to find an old entry }
- hp1:=tai(current_asmdata.asmlists[al_typedconsts].first);
- while assigned(hp1) do
- begin
- if hp1.typ=ait_label then
- begin
- lastlabel:=tai_label(hp1).labsym;
- lastlabelhp:=hp1;
- end
- else
- begin
- same_string:=false;
- if (hp1.typ=ait_string) and
- (lastlabel<>nil) and
- (tai_string(hp1).len=mylength) then
- begin
- case cst_type of
- cst_conststring:
- begin
- j:=0;
- same_string:=true;
- if len>0 then
- begin
- for i:=0 to len-1 do
- begin
- if tai_string(hp1).str[j]<>value_str[i] then
- begin
- same_string:=false;
- break;
- end;
- inc(j);
- end;
- end;
- end;
- cst_shortstring:
- begin
- { if shortstring then check the length byte first and
- set the start index to 1 }
- if len=ord(tai_string(hp1).str[0]) then
- begin
- j:=1;
- same_string:=true;
- if len>0 then
- begin
- for i:=0 to len-1 do
- begin
- if tai_string(hp1).str[j]<>value_str[i] then
- begin
- same_string:=false;
- break;
- end;
- inc(j);
- end;
- end;
- end;
- end;
- cst_ansistring,
- cst_widestring :
- begin
- { before the string the following sequence must be found:
- <label>
- constsymbol <datalabel>
- constint -1
- constint <len>
- we must then return <label> to reuse
- }
- hp2:=tai(lastlabelhp.previous);
- if assigned(hp2) and
- (hp2.typ=ait_const) and
- (tai_const(hp2).consttype=aitconst_aint) and
- (tai_const(hp2).value=len) and
- assigned(hp2.previous) and
- (tai(hp2.previous).typ=ait_const) and
- (tai_const(hp2.previous).consttype=aitconst_aint) and
- (tai_const(hp2.previous).value=-1) and
- assigned(hp2.previous.previous) and
- (tai(hp2.previous.previous).typ=ait_const) and
- (tai_const(hp2.previous.previous).consttype=aitconst_ptr) and
- assigned(hp2.previous.previous.previous) and
- (tai(hp2.previous.previous.previous).typ=ait_label) then
- begin
- lastlabel:=tai_label(hp2.previous.previous.previous).labsym;
- same_string:=true;
- j:=0;
- if len>0 then
- begin
- for i:=0 to len-1 do
- begin
- if tai_string(hp1).str[j]<>value_str[i] then
- begin
- same_string:=false;
- break;
- end;
- inc(j);
- end;
- end;
- end;
- end;
- end;
- { found ? }
- if same_string then
- begin
- lab_str:=lastlabel;
- break;
- end;
- end;
- lastlabel:=nil;
- end;
- hp1:=tai(hp1.next);
- end;
- end;
+ entry := pool.FindOrAdd(value_str, len);
+
+ lab_str := TAsmLabel(entry^.Data); // is it needed anymore?
+
{ :-(, we must generate a new entry }
- if not assigned(lab_str) then
+ if not assigned(entry^.Data) then
begin
current_asmdata.getdatalabel(lastlabel);
lab_str:=lastlabel;
+ entry^.Data := lastlabel;
maybe_new_object_file(current_asmdata.asmlists[al_typedconsts]);
if (len=0) or
not(cst_type in [cst_ansistring,cst_widestring]) then
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,lastlabel.name,const_align(sizeof(pint)))
else
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata,lastlabel.name,const_align(sizeof(pint)));
- current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
{ generate an ansi string ? }
case cst_type of
cst_ansistring:
begin
- { an empty ansi string is nil! }
if len=0 then
- current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil))
- else
+ InternalError(2008032301) { empty string should be handled above }
+ else
begin
current_asmdata.getdatalabel(l1);
- current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1));
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1));
current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len));
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
{ make sure the string doesn't get dead stripped if the header is referenced }
if (target_info.system in systems_darwin) then
current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name));
- current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
{ ... and vice versa }
if (target_info.system in systems_darwin) then
current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name));
@@ -444,14 +343,12 @@ implementation
end;
cst_widestring:
begin
- { an empty wide string is nil! }
if len=0 then
- current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil))
+ InternalError(2008032302) { empty string should be handled above }
else
begin
current_asmdata.getdatalabel(l1);
- current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1));
-
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
{ we use always UTF-16 coding for constants }
{ at least for now }
{ Consts.concat(Tai_const.Create_8bit(2)); }
@@ -462,10 +359,10 @@ implementation
current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1));
current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len*cwidechartype.size));
end;
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
{ make sure the string doesn't get dead stripped if the header is referenced }
if (target_info.system in systems_darwin) then
current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name));
- current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
{ ... and vice versa }
if (target_info.system in systems_darwin) then
current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name));
@@ -477,6 +374,7 @@ implementation
end;
cst_shortstring:
begin
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
{ truncate strings larger than 255 chars }
if len>255 then
l:=255
@@ -491,6 +389,7 @@ implementation
end;
cst_conststring:
begin
+ current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
{ include terminating zero }
getmem(pc,len+1);
move(value_str^,pc[0],len);
@@ -500,7 +399,18 @@ implementation
end;
end;
end;
- location.reference.symbol:=lab_str;
+ if cst_type in [cst_ansistring, cst_widestring] then
+ begin
+ location_reset(location, LOC_CREGISTER, OS_ADDR);
+ reference_reset_symbol(href, lab_str, 0);
+ location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+ cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,location.register);
+ end
+ else
+ begin
+ location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef));
+ location.reference.symbol:=lab_str;
+ end;
end;