{ This file is part of the Free Pascal Integrated Development Environment Copyright (c) 2000 by Berczi Gabor HTML scanner objects See the file COPYING.FPC, included in this distribution, for details about the copyright. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************} unit WHTMLScn; {$ifdef cpullvm} {$modeswitch nestedprocvars} {$endif} interface uses Objects, WHTML; const HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24; HTMLIndexVersion = 2; type PHTMLLinkScanner = ^THTMLLinkScanner; PHTMLLinkScanDocument = ^THTMLLinkScanDocument; TCustomHTMLLinkScanner = object(THTMLParser) function DocAddTextChar(C: char): boolean; virtual; procedure DocAnchor(Entered: boolean); virtual; public {a}function CheckURL(const URL: string): boolean; virtual; {a}function CheckText(const Text: string): boolean; virtual; {a}procedure AddLink(const LinkText, LinkURL: string); virtual; {a}procedure AddRef(LinkURL: string); virtual; {a}procedure AddNameID(AName: string); virtual; {a}procedure AddID(AName: string); virtual; {a}function GetDocumentBaseURL: string; virtual; private CurLinkText: string; CurURL: string; CurName, CurID: string; CurDoc: string; InAnchor,InNameAnchor, HasHRef : boolean; LastSynonym: PHTMLLinkScanDocument; end; TNameIDState = (IsReferenced, IsFound,IsID); TNameIDStates = set of TNameIDState; PNameID = ^TNameID; TNameID = object(TObject) constructor Init(const AName : string; Astate : TNameIDState); destructor Done; virtual; procedure SetState(Astate : TNameIDState; enabled : boolean); procedure SetOrigin(const AOrigin : string); procedure SetLine(ALine : sw_integer); function GetLine : sw_integer; function GetState : TNameIDStates; function GetName : string; function GetOrigin : string; private Name : pstring; Origin : pstring; Line : sw_integer; State : TNameIDStates; end; PNameIDCollection = ^TNameIDCollection; TNameIDCollection = object(TSortedCollection) function At(Index: sw_Integer): PNameID; function Compare(Key1, Key2: Pointer): sw_Integer; virtual; end; THTMLLinkScanDocument = object(TObject) constructor Init(const ADocName: string); function GetName: string; function GetUniqueName: string; function GetAliasCount: sw_integer; function GetAlias(Index: sw_integer): string; procedure AddAlias(const Alias: string); constructor Load(var S: TStream); procedure Store(var S: TStream); destructor Done; virtual; private DocName: PString; Synonym: PHTMLLinkScanDocument; Aliases: PStringCollection; end; PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection; THTMLLinkScanDocumentCollection = object(TSortedCollection) constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer); function Compare(Key1, Key2: Pointer): sw_Integer; virtual; function At(Index: sw_Integer): PHTMLLinkScanDocument; function SearchDocument(const DocName: string): PHTMLLinkScanDocument; procedure MoveAliasesToSynonym; private Scanner: PHTMLLinkScanner; end; THTMLLinkScanner = object(TCustomHTMLLinkScanner) constructor Init(const ABaseDir: string); procedure SetBaseDir(const ABaseDir: string); {a}function FindID(const AName : string) : PNameID; virtual; function GetDocumentCount: sw_integer; function GetDocumentURL(DocIndex: sw_integer): string; function GetUniqueDocumentURL(DocIndex: sw_integer): string; function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer; function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string; constructor LoadDocuments(var S: TStream); procedure StoreDocuments(var S: TStream); destructor Done; virtual; public procedure AddLink(const LinkText, LinkURL: string); virtual; private Documents: PHTMLLinkScanDocumentCollection; BaseDir: PString; function ExpandChildURL(const S: string): string; function NormalizeChildURL(const S: string): string; end; THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned); PHTMLLinkScanFile = ^THTMLLinkScanFile; THTMLLinkScanFile = object(TObject) constructor Init(const ADocumentURL: string); function GetDocumentURL: string; destructor Done; virtual; function AddReferencedName (const AName : string) : PNameID; function AddFoundName (const AName : string) : PNameID; procedure CheckNameList; function FindID(const AName : string) : PNameID; virtual; private DocumentURL : PString; NameIDList : PNameIDCollection; Owner : PHTMLLinkScanner; public State : THTMLLinkScanState; end; PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection; THTMLLinkScanFileCollection = object(TSortedCollection) function At(Index: sw_Integer): PHTMLLinkScanFile; function Compare(Key1, Key2: Pointer): sw_Integer; virtual; function SearchFile(const DocURL: string): PHTMLLinkScanFile; function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile; procedure CheckNameIDLists; end; THTMLLinkScanOption = (soSubDocsOnly); THTMLLinkScanOptions = set of THTMLLinkScanOption; THTMLFileLinkScanner = object(THTMLLinkScanner) constructor Init(const ABaseDir: string); procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions); destructor Done; virtual; public function GetDocumentBaseURL: string; virtual; function FindID(const AName : string) : PNameID; virtual; procedure AddLink(const LinkText, LinkURL: string); virtual; procedure AddRef(LinkURL: string); virtual; procedure AddNameID(AName: string); virtual; procedure AddID(AName: string); virtual; function CheckURL(const URL: string): boolean; virtual; private Options: THTMLLinkScanOptions; BaseURL: string; CurBaseURL: string; IDList : PNameIDCollection; DocumentFiles: PHTMLLinkScanFileCollection; procedure ScheduleDoc(const DocumentURL: string); public procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual; end; procedure RegisterWHTMLScan; implementation uses WUtils; const RHTMLLinkScanDocument: TStreamRec = ( ObjType: 19500; VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^); Load: @THTMLLinkScanDocument.Load; Store: @THTMLLinkScanDocument.Store ); const CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion; function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean; var Added: boolean; begin Added:=false; if InAnchor then begin CurLinkText:=CurLinkText+C; Added:=true; end; if ord(c)>32 then LastSynonym:=nil; DocAddTextChar:=Added; end; procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean); begin if Entered then begin CurLinkText:=''; if DocGetTagParam('HREF',CurURL) then HasHRef:=true else CurURL:=''; if not DocGetTagParam('NAME',CurName) then if not DocGetTagParam('ID',CurName) then CurName:=''; if not DocGetTagParam('ID',CurID) then CurID:=''; if CurName<>'' then begin InNameAnchor:=true; If Pos('#',CurName)=0 then CurName:=CurDoc+'#'+CurName; CurName:=Trim(CurName); CurName:=CompleteURL(GetDocumentBaseURL,CurName); if CurURL='' then CurURL:=CurName; end else CurName:=''; CurURL:=Trim(CurURL); if pos('#',CurURL)=1 then CurURL:=CurDoc+CurURL; CurURL:=CompleteURL(GetDocumentBaseURL,CurURL); end else begin CurLinkText:=Trim(CurLinkText); if HasHRef then begin if CheckURL(CurURL) and CheckText(CurLinkText) and not DisableCrossIndexing then begin AddLink(CurLinkText,CurURL); {$ifdef DEBUG} DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+ CurURL+'"',Line,1); {$endif DEBUG} end; { Be sure to parse referenced file, even if that link is not valid } AddRef(CurURL); end; if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then begin AddLink(CurLinkText,CurName); {$ifdef DEBUG} DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1); {$endif DEBUG} end; if InNameAnchor then begin AddNameID(CurName); end; if not HasHRef and (CurID<>'') then AddID(CurID); InNameAnchor:=false; HasHRef:=false; end; InAnchor:=Entered; end; function TCustomHTMLLinkScanner.GetDocumentBaseURL: string; begin { Abstract } GetDocumentBaseURL:=''; end; function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean; begin { Abstract } CheckURL:=true; end; function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean; begin { Abstract } CheckText:=true; end; procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string); begin { Abstract } end; procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string); begin { Abstract } end; procedure TCustomHTMLLinkScanner.AddNameID(AName: string); begin { Abstract } end; procedure TCustomHTMLLinkScanner.AddID(AName: string); begin { Abstract } end; constructor TNameID.Init(const AName : string; Astate : TNameIDState); begin inherited Init; SetStr(Name,AName); Origin:=nil; State:=[AState]; end; destructor TNameID.Done; begin if assigned(Name) then DisposeStr(Name); Name:=nil; if assigned(Origin) then DisposeStr(Origin); Origin:=nil; inherited Done; end; procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean); begin if enabled then Include(State,AState) else Exclude(State,AState); end; function TNameID.GetState : TNameIDStates; begin GetState:=State; end; function TNameID.GetName : string; begin GetName:=GetStr(Name); end; function TNameID.GetOrigin : string; begin GetOrigin:=GetStr(Origin); end; procedure TNameID.SetOrigin(const AOrigin : string); begin SetStr(Origin,AOrigin); end; procedure TNameID.SetLine(ALine : sw_integer); begin Line:=ALine; end; function TNameID.GetLine : sw_integer; begin GetLine:=Line; end; function TNameIDCollection.At(Index: sw_Integer): PNameID; begin At:=Inherited At(Index); end; function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer; var R: sw_integer; K1: PNameID absolute Key1; K2: PNameID absolute Key2; S1,S2: string; begin S1:=K1^.GetName; S2:=K2^.GetName; S1:=UpcaseStr(S1); S2:=UpcaseStr(S2); if S1S2 then R:= 1 else R:=0; Compare:=R; end; constructor THTMLLinkScanDocument.Init(const ADocName: string); begin inherited Init; SetStr(DocName,ADocName); New(Aliases, Init(10,10)); {$ifdef DEBUG} DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1); {$endif DEBUG} Synonym:=nil; end; function THTMLLinkScanDocument.GetName: string; begin GetName:=GetStr(DocName); end; function THTMLLinkScanDocument.GetUniqueName: string; var PD: PHTMLLinkScanDocument; begin PD:=@Self; while assigned(PD^.synonym) do PD:=PD^.Synonym; GetUniqueName:=GetStr(PD^.DocName); end; function THTMLLinkScanDocument.GetAliasCount: sw_integer; begin GetAliasCount:=Aliases^.Count; end; function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string; begin GetAlias:=GetStr(Aliases^.At(Index)); end; procedure THTMLLinkScanDocument.AddAlias(const Alias: string); begin Aliases^.Insert(NewStr(Alias)); {$ifdef DEBUG} DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1); {$endif DEBUG} end; constructor THTMLLinkScanDocument.Load(var S: TStream); var i: sw_integer; begin inherited Init; DocName:=S.ReadStr; if assigned(DocName) then for i:=1 to Length(DocName^) do if (DocName^[i]='\') or (DocName^[i]='/') then DocName^[i]:=DirSep; New(Aliases, Load(S)); end; procedure THTMLLinkScanDocument.Store(var S: TStream); begin S.WriteStr(DocName); Aliases^.Store(S); end; destructor THTMLLinkScanDocument.Done; begin if Assigned(Aliases) then Dispose(Aliases, Done); Aliases:=nil; if Assigned(DocName) then DisposeStr(DocName); DocName:=nil; inherited Done; end; constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer); begin inherited Init(ALimit,ADelta); Scanner:=AScanner; end; function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer; var R: sw_integer; K1: PHTMLLinkScanDocument absolute Key1; K2: PHTMLLinkScanDocument absolute Key2; S1,S2: string; begin S1:=K1^.GetName; S2:=K2^.GetName; if Assigned(Scanner) then begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end; S1:=UpcaseStr(S1); S2:=UpcaseStr(S2); if S1S2 then R:= 1 else R:=0; Compare:=R; end; function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument; begin At:=inherited At(Index); end; function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument; var D,P: PHTMLLinkScanDocument; Index: sw_integer; begin New(D, Init(DocName)); if Search(D, Index)=false then P:=nil else P:=At(Index); Dispose(D, Done); SearchDocument:=P; end; procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym; procedure MoveAliases(P: PHTMLLinkScanDocument); var PD: PHTMLLinkScanDocument; i: sw_integer; begin if not assigned(P^.synonym) then exit; PD:=P; while assigned(PD^.synonym) do PD:=PD^.Synonym; For i:=P^.GetAliasCount-1 downto 0 do begin PD^.AddAlias(P^.GetAlias(i)); P^.Aliases^.AtFree(i); end; end; begin ForEach(TCallbackProcParam(@MoveAliases)); end; constructor THTMLLinkScanner.Init(const ABaseDir: string); begin inherited Init; New(Documents, Init(@Self,50,100)); SetBaseDir(ABaseDir); end; procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string); begin if Assigned(BaseDir) then DisposeStr(BaseDir); BaseDir:=NewStr(CompleteDir(ABaseDir)); end; function THTMLLinkScanner.GetDocumentCount: sw_integer; begin GetDocumentCount:=Documents^.Count; end; function THTMLLinkScanner.ExpandChildURL(const S: string): string; begin ExpandChildURL:=CompleteURL(GetStr(BaseDir),S); end; function THTMLLinkScanner.NormalizeChildURL(const S: string): string; var URL: string; begin URL:=S; if GetStr(BaseDir)<>'' then if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then URL:=copy(S,length(GetStr(BaseDir))+1,length(S)); NormalizeChildURL:=URL; end; function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string; begin GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName); end; function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string; begin GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName); end; function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer; begin GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount; end; function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string; begin GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex); end; procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string); var D: PHTMLLinkScanDocument; DoInsert: boolean; int: sw_integer; Text: string; error: word; begin D:=Documents^.SearchDocument(LinkURL); if D=nil then begin New(D, Init(NormalizeChildURL(LinkURL))); Documents^.Insert(D); end; If assigned(LastSynonym) then LastSynonym^.Synonym:=D; DoInsert:=true; If (length(LinkText)=0) or (Pos(',',LinkText)=1) then DoInsert:=false; Val(LinkText,int,error); If (Error>1) and (LinkText[Error]=' ') then Text:=Trim(Copy(LinkText,error+1,length(LinkText))) else Text:=LinkText; IF DoInsert then D^.AddAlias(Text); If InNameAnchor then LastSynonym:=D; end; constructor THTMLLinkScanner.LoadDocuments(var S: TStream); var P,L: longint; OK: boolean; PS: PString; begin OK:=false; P:=S.GetPos; S.Read(L,sizeof(L)); if (S.Status=stOK) and (L=HTMLIndexMagicNo) then begin S.Read(L,sizeof(L)); CurrentHTMLIndexVersion:=L; OK:=(S.Status=stOK); end; if not OK then begin S.Reset; S.Seek(P); end else BaseDir:=S.ReadStr; New(Documents, Load(S)); if not Assigned(Documents) then Fail; Documents^.MoveAliasesToSynonym; CurrentHTMLIndexVersion:=HTMLIndexVersion; end; function THTMLLinkScanner.FindID(const AName : string) : PNameID; begin {abstract}FindID:=nil; end; procedure THTMLLinkScanner.StoreDocuments(var S: TStream); var L: longint; begin L:=HTMLIndexMagicNo; S.Write(L,sizeof(L)); L:=HTMLIndexVersion; CurrentHTMLIndexVersion:=L; S.Write(L,sizeof(L)); S.WriteStr(BaseDir); Documents^.MoveAliasesToSynonym; Documents^.Store(S); end; destructor THTMLLinkScanner.Done; begin if Assigned(Documents) then Dispose(Documents, Done); Documents:=nil; if Assigned(BaseDir) then DisposeStr(BaseDir); BaseDir:=nil; inherited Done; end; constructor THTMLLinkScanFile.Init(const ADocumentURL: string); begin inherited Init; SetStr(DocumentURL,ADocumentURL); New(NameIDList, Init(5,10)); end; function THTMLLinkScanFile.GetDocumentURL: string; begin GetDocumentURL:=GetStr(DocumentURL); end; function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID; var index : sw_integer; PN : PNameID; begin new(PN,init(AName,IsReferenced)); if not NameIDList^.Search(PN,Index) then NameIDList^.Insert(PN) else begin dispose(PN,Done); PN:=NameIDList^.At(Index); PN^.SetState(IsReferenced,true); end; AddReferencedName:=PN; end; function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID; var index : sw_integer; PN : PNameID; begin new(PN,init(AName,IsFound)); if not NameIDList^.Search(PN,Index) then NameIDList^.Insert(PN) else begin dispose(PN,Done); PN:=NameIDList^.At(Index); PN^.SetState(IsFound,true); end; AddFoundName:=PN; end; procedure THTMLLinkScanFile.CheckNameList; var i : sw_integer; PN,PN2 : PNameID; begin {$ifdef DEBUG} for i:=0 to NameIDList^.Count-1 do begin PN:=NameIDList^.At(i); if not (IsFound in PN^.GetState) then begin if (IsReferenced in PN^.GetState) then DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+ PN^.GetOrigin+'" not found',1,1); PN2:=Owner^.FindID(PN^.GetName); if assigned(PN2) then begin DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1); if not (IsFound in PN2^.GetState) then DebugMessage('','ID not found',1,1); end; end; end; {$endif DEBUG} end; function THTMLLinkScanFile.FindID(const AName : string) : PNameID; var PN : PNameID; Index : sw_integer; begin new(PN,init(AName,IsID)); if NameIDList^.Search(PN,Index) then begin dispose(PN,done); PN:=NameIDList^.At(Index); if (IsID in PN^.GetState) then FindId:=PN else FindID:=nil; end else begin dispose(PN,done); PN:=nil; FindID:=nil; end; end; destructor THTMLLinkScanFile.Done; begin if Assigned(DocumentURL) then DisposeStr(DocumentURL); DocumentURL:=nil; dispose(NameIDList,done); NameIDList:=nil; inherited Done; end; function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile; begin At:=inherited At(Index); end; function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer; var R: integer; K1: PHTMLLinkScanFile absolute Key1; K2: PHTMLLinkScanFile absolute Key2; S1,S2: string; begin S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL); if S1S2 then R:= 1 else R:=0; Compare:=R; end; function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile; var P,D: PHTMLLinkScanFile; Index: sw_integer; begin New(D, Init(DocURL)); if Search(D,Index)=false then P:=nil else P:=At(Index); Dispose(D, Done); SearchFile:=P; end; function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile; var I: sw_integer; P,D: PHTMLLinkScanFile; begin P:=nil; for I:=0 to Count-1 do begin D:=At(I); if D^.State=AState then begin P:=D; Break; end; end; FindFileWithState:=P; end; procedure THTMLLinkScanFileCollection.CheckNameIDLists; procedure DoCheckNameList(P : PHTMLLinkScanFile); begin P^.CheckNameList; end; begin ForEach(TCallbackProcParam(@DoCheckNameList)); end; constructor THTMLFileLinkScanner.Init(const ABaseDir: string); begin inherited Init(ABaseDir); New(DocumentFiles, Init(50,100)); New(IDList, Init(50,100)); {$ifdef DEBUG} DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1); {$endif DEBUG} end; procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions); var P: PHTMLLinkScanFile; begin CurBaseURL:=''; Options:=AOptions; ScheduleDoc(DocumentURL); repeat P:=DocumentFiles^.FindFileWithState(ssScheduled); if Assigned(P) then ProcessDoc(P); until P=nil; {$ifdef DEBUG} DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1); DocumentFiles^.CheckNameIDLists; DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1); {$endif DEBUG} end; function THTMLFileLinkScanner.GetDocumentBaseURL: string; begin GetDocumentBaseURL:=CurBaseURL; end; function THTMLFileLinkScanner.CheckURL(const URL: string): boolean; var OK: boolean; begin if soSubDocsOnly in Options then OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL) else OK:=true; CheckURL:=OK; end; procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string); var D: PHTMLLinkScanFile; P: sw_integer; DocURL: string; begin P:=Pos('#',LinkURL); if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1); D:=DocumentFiles^.SearchFile(DocURL); if not Assigned(D) then ScheduleDoc(DocURL); inherited AddLink(LinkText,LinkURL); end; procedure THTMLFileLinkScanner.AddRef(LinkURL: string); var D: PHTMLLinkScanFile; P: sw_integer; DocURL: string; PN : PNameID; begin {$ifdef DEBUG} DebugMessage(CurDoc,' Adding Ref to "'+ LinkURL+'"',Line,1); {$endif DEBUG} P:=Pos('#',LinkURL); if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1); D:=DocumentFiles^.SearchFile(DocURL); if not Assigned(D) then ScheduleDoc(DocURL); D:=DocumentFiles^.SearchFile(DocURL); if P>0 then begin PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL))); PN^.SetOrigin(CurDoc); PN^.SetLine(Line); end; end; procedure THTMLFileLinkScanner.AddNameID(AName : string); var D: PHTMLLinkScanFile; P: sw_integer; PN : PNameID; DocURL: string; begin {$ifdef DEBUG} DebugMessage(CurDoc,' Adding NameID "'+ CurName+'"',Line,1); {$endif DEBUG} P:=Pos('#',AName); if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1); D:=DocumentFiles^.SearchFile(DocURL); if not Assigned(D) then ScheduleDoc(DocURL); D:=DocumentFiles^.SearchFile(DocURL); PN:=D^.AddFoundName(copy(AName,P+1,length(AName))); PN^.SetOrigin(CurDoc); PN^.SetLine(Line); end; procedure THTMLFileLinkScanner.AddID(AName : string); var D: PHTMLLinkScanFile; PN : PNameID; index : sw_integer; begin {$ifdef DEBUG} DebugMessage(CurDoc,' Adding Id "'+ AName+'"',Line,1); {$endif DEBUG} D:=DocumentFiles^.SearchFile(CurDoc); if not Assigned(D) then ScheduleDoc(CurDoc); D:=DocumentFiles^.SearchFile(CurDoc); PN:=D^.AddFoundName(AName); PN^.SetState(IsId,true); PN^.SetOrigin(CurDoc); PN^.SetLine(Line); new(PN,init(AName,IsID)); if IDList^ .Search(PN,index) then begin dispose(PN,done); {$ifdef DEBUG} PN:=IDList^.At(Index); DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+ PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1); {$endif DEBUG} end else begin IDList^.Insert(PN); PN^.SetOrigin(CurDoc); PN^.SetLine(Line); end; end; function THTMLFileLinkScanner.FindID(const AName : string) : PNameID; Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean; begin ContainsNamedID:=D^.FindID(AName)<>nil; end; var D : PHTMLLinkScanFile; begin D:=DocumentFiles^.FirstThat(TCallbackFunBoolParam(@ContainsNamedID)); if assigned(D) then FindID:=D^.FindID(AName) else FindID:=nil; end; procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile); var F: PDOSTextFile; begin if Assigned(Doc)=false then Exit; Doc^.State:=ssProcessing; CurDoc:=Doc^.GetDocumentURL; New(F, Init(CurDoc)); if Assigned(F) then begin CurBaseURL:=CompleteURL(CurDoc,''); {$ifdef DEBUG} DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1); {$endif DEBUG} Process(F); {$ifdef DEBUG} DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1); {$endif DEBUG} Dispose(F, Done); end else begin {$ifdef DEBUG} DebugMessage(CurDoc,'file not found',1,1); {$endif DEBUG} end; Doc^.State:=ssScanned; CurDoc:=''; end; procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string); var D: PHTMLLinkScanFile; begin New(D, Init(DocumentURL)); D^.State:=ssScheduled; D^.Owner:=@Self; {$ifdef DEBUG} DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1); {$endif DEBUG} DocumentFiles^.Insert(D); end; destructor THTMLFileLinkScanner.Done; begin if Assigned(DocumentFiles) then Dispose(DocumentFiles, Done); DocumentFiles:=nil; if Assigned(IDList) then Dispose(IDList, Done); IDList:=nil; inherited Done; end; procedure RegisterWHTMLScan; begin RegisterType(RHTMLLinkScanDocument); end; END.