diff options
author | michael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2007-03-10 16:34:09 +0000 |
---|---|---|
committer | michael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2007-03-10 16:34:09 +0000 |
commit | 356f0425b78e204cc4d44c344b82ceb639187da4 (patch) | |
tree | 142d51a48b6f8dcca0ee0c40deccce88ad281322 | |
parent | 1a4a6e70d69b9caa6380c03c7dca889e57b4ea76 (diff) | |
download | fpc-356f0425b78e204cc4d44c344b82ceb639187da4.tar.gz |
Merged revisions 6749 via svnmerge from
svn+ssh://svn.freepascal.org/FPC/svn/fpc/trunk
........
r6749 | michael | 2007-03-08 10:40:00 +0100 (Thu, 08 Mar 2007) | 11 lines
* Patch from Sergei Gorelkin
+ DTD validation
+ Correct reporting of the position of most fatal errors
+ TDOMDocument.CreateElement and others check their arguments for validity
(INVALID_CHARACTER_ERR is reported where specification says)
+ property TDOMAttr.DataType
+ implemented TDOMDocument.GetElementByID
* Common code moved to xmlutils.pp
* whitespace in PublicID literals is normalized
........
git-svn-id: http://svn.freepascal.org/svn/fpc/branches/fixes_2_2@6771 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r-- | packages/fcl-xml/Makefile | 105 | ||||
-rw-r--r-- | packages/fcl-xml/Makefile.fpc | 2 | ||||
-rw-r--r-- | packages/fcl-xml/src/dom.pp | 240 | ||||
-rw-r--r-- | packages/fcl-xml/src/sax_html.pp | 3 | ||||
-rw-r--r-- | packages/fcl-xml/src/xmlread.pp | 2028 | ||||
-rw-r--r-- | packages/fcl-xml/src/xmlutils.pp | 222 | ||||
-rw-r--r-- | packages/fcl-xml/src/xmlwrite.pp | 15 | ||||
-rw-r--r-- | packages/fcl-xml/tests/xmlts.pp | 778 |
8 files changed, 2673 insertions, 720 deletions
diff --git a/packages/fcl-xml/Makefile b/packages/fcl-xml/Makefile index 0e62405ccb..80d3e64b10 100644 --- a/packages/fcl-xml/Makefile +++ b/packages/fcl-xml/Makefile @@ -233,157 +233,160 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F override PACKAGE_NAME=fcl-xml override PACKAGE_VERSION=2.0.0 ifeq ($(FULL_TARGET),i386-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-go32v2) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-win32) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-os2) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-freebsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-beos) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-netbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-solaris) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-qnx) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-netware) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-openbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-wdosx) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-darwin) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-emx) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-watcom) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-netwlibc) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-wince) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-symbian) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-freebsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-netbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-amiga) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-atari) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-openbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-palmos) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),m68k-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-netbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-amiga) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-macos) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-darwin) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-morphos) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),sparc-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),sparc-netbsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),sparc-solaris) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),sparc-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),x86_64-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),x86_64-freebsd) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),x86_64-win64) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),x86_64-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-palmos) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-wince) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-gba) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-nds) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),arm-symbian) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc64-linux) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +endif +ifeq ($(FULL_TARGET),powerpc64-darwin) +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),powerpc64-embedded) -override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath endif ifeq ($(FULL_TARGET),i386-linux) override TARGET_RSTS+=sax xpath diff --git a/packages/fcl-xml/Makefile.fpc b/packages/fcl-xml/Makefile.fpc index cbe75d6c47..2beddcb06d 100644 --- a/packages/fcl-xml/Makefile.fpc +++ b/packages/fcl-xml/Makefile.fpc @@ -7,7 +7,7 @@ name=fcl-xml version=2.0.0 [target] -units=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath +units=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath rsts=sax xpath [require] diff --git a/packages/fcl-xml/src/dom.pp b/packages/fcl-xml/src/dom.pp index 752137ab17..b0c6f1eed5 100644 --- a/packages/fcl-xml/src/dom.pp +++ b/packages/fcl-xml/src/dom.pp @@ -109,8 +109,10 @@ type // DOMString // ------------------------------------------------------- + TSetOfChar = set of Char; DOMString = WideString; DOMPChar = PWideChar; + PDOMString = ^DOMString; EDOMError = class(Exception) public @@ -205,6 +207,8 @@ type function GetNodeType: Integer; virtual; abstract; function GetTextContent: DOMString; virtual; procedure SetTextContent(const AValue: DOMString); virtual; + function GetLocalName: DOMString; virtual; + function GetNamespaceURI: DOMString; virtual; public constructor Create(AOwner: TDOMDocument); destructor Destroy; override; @@ -239,15 +243,13 @@ type function HasAttributes: Boolean; virtual; procedure Normalize; - (* - // TODO: What is that Java NULL for strings ??? // always '' for nodes other than ELEMENT and ATTRIBUTE // as well as for nodes created with DOM 1 methods - property NamespaceURI: DOMString read GetNamespaceURI; - + //property NamespaceURI: DOMString read GetNamespaceURI; + //property LocalName: DOMString read GetLocalName; + (* // Prefix may only be changed if it was specified at creation time. property Prefix: DOMString read FPrefix (write SetPrefix?); - property LocalName: DOMString read FLocalName; *) // DOM level 3 property TextContent: DOMString read GetTextContent write SetTextContent; @@ -406,12 +408,17 @@ type TDOMDocument = class(TDOMNode_WithChildren) protected + FIDList: TList; FRevision: Integer; + FXML11: Boolean; FImplementation: TDOMImplementation; function GetDocumentElement: TDOMElement; function GetDocType: TDOMDocumentType; function GetNodeType: Integer; override; function GetNodeName: DOMString; override; + function IndexOfNS(const nsURI: DOMString): Integer; + function FindID(const aID: DOMString; out Index: LongWord): Boolean; + procedure ClearIDList; public property DocType: TDOMDocumentType read GetDocType; property Impl: TDOMImplementation read FImplementation; @@ -444,17 +451,24 @@ type // Extensions to DOM interface: // TODO: obsolete now, but must check for usage dependencies constructor Create; + destructor Destroy; override; + function AddID(Attr: TDOMAttr): Boolean; + procedure RemoveID(Attr: TDOMAttr); end; TXMLDocument = class(TDOMDocument) + private + FXMLVersion: DOMString; + procedure SetXMLVersion(const aValue: DOMString); public // These fields are extensions to the DOM interface: - XMLVersion, Encoding, StylesheetType, StylesheetHRef: DOMString; + Encoding, StylesheetType, StylesheetHRef: DOMString; function CreateCDATASection(const data: DOMString): TDOMCDATASection; override; function CreateProcessingInstruction(const target, data: DOMString): TDOMProcessingInstruction; override; function CreateEntityReference(const name: DOMString): TDOMEntityReference; override; + property XMLVersion: DOMString read FXMLVersion write SetXMLVersion; end; @@ -462,12 +476,27 @@ type // Attr // ------------------------------------------------------- + TAttrDataType = ( + dtCdata, + dtId, + dtIdRef, + dtIdRefs, + dtEntity, + dtEntities, + dtNmToken, + dtNmTokens, + dtNotation + ); + TDOMAttr = class(TDOMNode_WithChildren) protected FName: DOMString; - FSpecified: Boolean; - FNormalize: Boolean; FOwnerElement: TDOMElement; + // TODO: following 3 - replace with a link to AttDecl ?? + // ('specified' isn't related...) + FSpecified: Boolean; + FDeclared: Boolean; + FDataType: TAttrDataType; function GetNodeValue: DOMString; override; function GetNodeType: Integer; override; function GetNodeName: DOMString; override; @@ -477,10 +506,10 @@ type property Name: DOMString read FName; property Specified: Boolean read FSpecified; property Value: DOMString read GetNodeValue write SetNodeValue; - // Introduced in DOM level 2: property OwnerElement: TDOMElement read FOwnerElement; // extensions function CompareName(const AName: DOMString): Integer; override; + property DataType: TAttrDataType read FDataType; end; @@ -532,11 +561,16 @@ type TDOMText = class(TDOMCharacterData) protected + // set by parser if text contains only literal whitespace (i.e. not coming from CharRefs) + FMayBeIgnorable: Boolean; function GetNodeType: Integer; override; function GetNodeName: DOMString; override; + procedure SetNodeValue(const aValue: DOMString); override; public function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override; function SplitText(offset: LongWord): TDOMText; + // Extension + property MayBeIgnorable: Boolean read FMayBeIgnorable write FMayBeIgnorable; end; @@ -577,10 +611,12 @@ type FSystemID: DOMString; FInternalSubset: DOMString; FEntities, FNotations: TDOMNamedNodeMap; + FElementDefs: TDOMNamedNodeMap; function GetEntities: TDOMNamedNodeMap; function GetNotations: TDOMNamedNodeMap; function GetNodeType: Integer; override; function GetNodeName: DOMString; override; + function GetElementDefs: TDOMNamedNodeMap; public destructor Destroy; override; function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override; @@ -591,6 +627,8 @@ type property PublicID: DOMString read FPublicID; property SystemID: DOMString read FSystemID; property InternalSubset: DOMString read FInternalSubset; + // extensions + property ElementDefs: TDOMNamedNodeMap read GetElementDefs; end; @@ -669,6 +707,15 @@ type implementation +uses + xmlutils; + +type + PIDItem = ^TIDItem; + TIDItem = record + ID: WideString; + Element: TDOMElement; + end; constructor TRefClass.Create; begin @@ -881,7 +928,8 @@ begin if Assigned(Txt) then begin tmp := Child.NextSibling; - Txt.AppendData(Child.nodeValue); + Txt.AppendData(TDOMText(Child).Data); + Txt.FMayBeIgnorable := Txt.FMayBeIgnorable and TDOMText(Child).FMayBeIgnorable; RemoveChild(Child); Child := tmp; end @@ -910,6 +958,16 @@ begin NodeValue := AValue; end; +function TDOMNode.GetNamespaceURI: DOMString; +begin + Result := ''; +end; + +function TDOMNode.GetLocalName: DOMString; +begin + Result := ''; +end; + function CompareDOMStrings(const s1, s2: DOMPChar; l1, l2: integer): integer; var i: integer; begin @@ -935,16 +993,12 @@ end; function CompareDOMNodeWithDOMNode(Node1, Node2: Pointer): integer; begin - Result:=CompareDOMStrings(DOMPChar(TDOMNode(Node1).NodeName), - DOMPChar(TDOMNode(Node2).NodeName), - length(TDOMNode(Node1).NodeName), - length(TDOMNode(Node2).NodeName) - ); + Result := TDOMNode(Node1).CompareName(TDOMNode(Node2).NodeName); end; function CompareDOMStringWithDOMNode(AKey, ANode: Pointer): integer; begin - Result := TDOMNode(ANode).CompareName(DOMString(AKey)); + Result := TDOMNode(ANode).CompareName(PDOMString(AKey)^); end; @@ -1047,14 +1101,11 @@ end; function TDOMNode_WithChildren.ReplaceChild(NewChild, OldChild: TDOMNode): TDOMNode; begin -// Inc(FOwnerDocument.FRevision); // invalidate nodelists (will happen anyway) - RemoveFromChildNodeTree(OldChild); InsertBefore(NewChild, OldChild); if Assigned(OldChild) then RemoveChild(OldChild); - // TODO: must return OldChild, if I understand that right... - // but OldChild is destroyed! + // TODO: per DOM spec, must return OldChild, but OldChild is destroyed Result := NewChild; end; @@ -1167,7 +1218,7 @@ var AVLNode: TAVLTreeNode; begin Result:=nil; if FChildNodeTree<>nil then begin - AVLNode:=FChildNodeTree.FindKey(DOMPChar(ANodeName), + AVLNode:=FChildNodeTree.FindKey(Pointer(ANodeName), @CompareDOMStringWithDOMNode); if AVLNode<>nil then Result:=TDOMNode(AVLNode.Data); @@ -1289,7 +1340,7 @@ begin BuildList; if index < LongWord(FList.Count) then - Result := TDOMNode(FList[index]) + Result := TDOMNode(FList.List^[index]) else Result := nil; end; @@ -1618,6 +1669,77 @@ begin inherited Create(nil); // TODO: DOM lvl 2 states that Document should be unowned. Any dependencies? FOwnerDocument := Self; + FIDList := TList.Create; +end; + +destructor TDOMDocument.Destroy; +begin + ClearIDList; + FIDList.Free; + inherited Destroy; +end; + +function TDOMDocument.AddID(Attr: TDOMAttr): Boolean; +var + I: Cardinal; + Item: PIDItem; +begin + New(Item); + Item^.ID := Attr.Value; + Item^.Element := Attr.OwnerElement; + if not FindID(Item^.ID, I) then + begin + FIDList.Insert(I, Item); + Result := True; + end + else + begin + Dispose(Item); + Result := False; + end; +end; + +procedure TDOMDocument.RemoveID(Attr: TDOMAttr); +begin + // TODO: Implement this +end; + +function TDOMDocument.FindID(const aID: DOMString; out Index: LongWord): Boolean; +var + L, H, I, C: Integer; + P: PIDItem; +begin + Result := False; + L := 0; + H := FIDList.Count - 1; + while L <= H do + begin + I := (L + H) shr 1; + P := PIDItem(FIDList.List^[I]); + C := CompareDOMStrings(PWideChar(aID), PWideChar(P^.ID), Length(aID), Length(P^.ID)); + if C > 0 then L := I + 1 else + begin + H := I - 1; + if C = 0 then + begin + Result := True; + L := I; + end; + end; + end; + Index := L; +end; + +procedure TDOMDocument.ClearIDList; +var + I: Integer; +begin + if Assigned(FIDList) then + begin + for I := 0 to FIDList.Count-1 do + Dispose(PIDItem(FIDList.List^[I])); + FIDList.Clear; + end; end; function TDOMDocument.GetNodeType: Integer; @@ -1652,6 +1774,8 @@ end; function TDOMDocument.CreateElement(const tagName: DOMString): TDOMElement; begin + if not IsXmlName(tagName, FXML11) then + raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateElement'); Result := TDOMElement.Create(Self); Result.FNodeName := tagName; // TODO: attach default attributes @@ -1710,6 +1834,8 @@ end; function TDOMDocument.CreateAttribute(const name: DOMString): TDOMAttr; begin + if not IsXmlName(name, FXML11) then + raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute'); Result := TDOMAttr.Create(Self); Result.FName := name; end; @@ -1718,6 +1844,7 @@ function TDOMDocument.CreateAttributeBuf(Buf: DOMPChar; Length: Integer): TDOMAt begin Result := TDOMAttr.Create(Self); SetString(Result.FName, Buf, Length); + Result.FSpecified := True; end; function TDOMDocument.CreateEntityReference(const name: DOMString): @@ -1753,13 +1880,15 @@ begin Result := nil; end; -function TDOMDocument.GetElementById( - const ElementID: DOMString): TDOMElement; +function TDOMDocument.GetElementById(const ElementID: DOMString): TDOMElement; +var + I: Cardinal; begin // TODO: Implement TDOMDocument.GetElementById - // "Implementations that do not know whether attributes are - // of type ID or not are expected to return null" - Result := nil; + if FindID(ElementID, I) then + Result := PIDItem(FIDList.List^[I])^.Element + else + Result := nil; end; function TDOMDocument.ImportNode(ImportedNode: TDOMNode; @@ -1770,6 +1899,12 @@ begin Result := nil; end; +function TDOMDocument.IndexOfNS(const nsURI: DOMString): Integer; +begin + // TODO: implement + Result := -1; +end; + function TXMLDocument.CreateCDATASection(const data: DOMString): TDOMCDATASection; @@ -1781,6 +1916,8 @@ end; function TXMLDocument.CreateProcessingInstruction(const target, data: DOMString): TDOMProcessingInstruction; begin + if not IsXmlName(target, FXML11) then + raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateProcessingInstruction'); Result := TDOMProcessingInstruction.Create(Self); Result.FTarget := target; Result.FNodeValue := data; @@ -1789,10 +1926,17 @@ end; function TXMLDocument.CreateEntityReference(const name: DOMString): TDOMEntityReference; begin + if not IsXmlName(name, FXML11) then + raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateEntityReference'); Result := TDOMEntityReference.Create(Self); Result.FName := name; end; +procedure TXMLDocument.SetXMLVersion(const aValue: DOMString); +begin + FXMLVersion := aValue; + FXML11 := (aValue = '1.1'); +end; // ------------------------------------------------------- // Attr @@ -1813,31 +1957,16 @@ begin // Cloned attribute is always specified and carries its children Result := ACloneOwner.CreateAttribute(FName); TDOMAttr(Result).FSpecified := True; - TDOMAttr(Result).FNormalize := FNormalize; + TDOMAttr(Result).FDataType := FDataType; + // Declared = ? CloneChildren(Result, ACloneOwner); end; function TDOMAttr.GetNodeValue: DOMString; -var - I,J: Integer; begin Result := GetTextContent; - // TODO: probably must be speed optimized - if FNormalize then - begin - Result := Trim(Result); - I := 1; - while I < Length(Result) do - begin - if Result[I] = #32 then - begin - J := I+1; - while (J <= Length(Result)) and (Result[J] = #32) do Inc(J); - if J-I > 1 then Delete(Result, I+1, J-I-1); - end; - Inc(I); - end; - end; + if FDataType <> dtCdata then + NormalizeSpaces(Result); end; procedure TDOMAttr.SetNodeValue(const AValue: DOMString); @@ -1945,7 +2074,7 @@ procedure TDOMElement.RemoveAttributeNS(const namespaceURI, localName: DOMString); begin // TODO: Implement TDOMElement.RemoveAttributeNS - raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS'); + raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS'); end; procedure TDOMElement.SetAttributeNS(const namespaceURI, qualifiedName, @@ -2057,9 +2186,17 @@ begin Result := '#text'; end; +procedure TDOMText.SetNodeValue(const aValue: DOMString); +begin + // TODO: may analyze aValue, but this will slow things down... + FMayBeIgnorable := False; + FNodeValue := aValue; +end; + function TDOMText.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; begin Result := ACloneOwner.CreateTextNode(FNodeValue); + TDOMText(Result).FMayBeIgnorable := FMayBeIgnorable; end; function TDOMText.SplitText(offset: LongWord): TDOMText; @@ -2069,6 +2206,7 @@ begin Result := TDOMText.Create(FOwnerDocument); Result.FNodeValue := Copy(FNodeValue, offset + 1, Length); + Result.FMayBeIgnorable := FMayBeIgnorable; FNodeValue := Copy(FNodeValue, 1, offset); FParentNode.InsertBefore(Result, FNextSibling); end; @@ -2132,6 +2270,7 @@ destructor TDOMDocumentType.Destroy; begin FEntities.Free; FNotations.Free; + FElementDefs.Free; inherited Destroy; end; @@ -2159,6 +2298,13 @@ begin Result := FNotations; end; +function TDOMDocumentType.GetElementDefs: TDOMNamedNodeMap; +begin + if FElementDefs = nil then + FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE); + Result := FElementDefs; +end; + // ------------------------------------------------------- // Notation // ------------------------------------------------------- diff --git a/packages/fcl-xml/src/sax_html.pp b/packages/fcl-xml/src/sax_html.pp index f210be8b91..9db74b2bcc 100644 --- a/packages/fcl-xml/src/sax_html.pp +++ b/packages/fcl-xml/src/sax_html.pp @@ -29,7 +29,7 @@ unit SAX_HTML; interface -uses SysUtils, Classes, SAX, DOM, DOM_HTML; +uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs; type @@ -111,7 +111,6 @@ procedure ReadHTMLFragment(AParentNode: TDOMNode; var f: TStream); implementation -uses HTMLDefs; const WhitespaceChars = [#9, #10, #13, ' ']; diff --git a/packages/fcl-xml/src/xmlread.pp b/packages/fcl-xml/src/xmlread.pp index 0c247279b7..3a99dba008 100644 --- a/packages/fcl-xml/src/xmlread.pp +++ b/packages/fcl-xml/src/xmlread.pp @@ -26,7 +26,20 @@ uses SysUtils, Classes, DOM; type - EXMLReadError = class(Exception); + TErrorSeverity = (esWarning, esError, esFatal); + + EXMLReadError = class(Exception) + private + FSeverity: TErrorSeverity; + FErrorMessage: string; + FLine: Integer; + FLinePos: Integer; + public + property Severity: TErrorSeverity read FSeverity; + property ErrorMessage: string read FErrorMessage; + property Line: Integer read FLine; + property LinePos: Integer read FLinePos; + end; procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload; procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload; @@ -43,97 +56,135 @@ procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload; procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload; procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload; +type + TDOMParseOptions = class(TObject) + private + FValidate: Boolean; + FPreserveWhitespace: Boolean; + FExpandEntities: Boolean; + FIgnoreComments: Boolean; + FCDSectionsAsText: Boolean; + public + property Validate: Boolean read FValidate write FValidate; + property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace; + property ExpandEntities: Boolean read FExpandEntities write FExpandEntities; + property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments; + property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText; + end; + + // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1 + TXMLContextAction = (xaAppendAsChildren, xaReplaceChildren, xaInsertBefore, + xaInsertAfter, xaReplace); + + TXMLErrorEvent = procedure(Error: EXMLReadError) of object; + + // This may be augmented with ByteOffset, UTF8Offset, etc. + TLocation = record + Line: Integer; + LinePos: Integer; + end; + + TXMLInputSource = class(TObject) + private + FStream: TStream; + FStringData: string; +// FBaseURI: WideString; + FSystemID: WideString; + FPublicID: WideString; +// FEncoding: string; + public + constructor Create(AStream: TStream); overload; + constructor Create(const AStringData: string); overload; + property Stream: TStream read FStream; + property StringData: string read FStringData; +// property BaseURI: WideString read FBaseURI write FBaseURI; + property SystemID: WideString read FSystemID write FSystemID; + property PublicID: WideString read FPublicID write FPublicID; +// property Encoding: string read FEncoding write FEncoding; + end; + + TDOMParser = class(TObject) + private + FOptions: TDOMParseOptions; + FOnError: TXMLErrorEvent; + public + constructor Create; + destructor Destroy; override; + procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument); + procedure ParseUri(const URI: WideString; out ADoc: TXMLDocument); + function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode; + Action: TXMLContextAction): TDOMNode; + property Options: TDOMParseOptions read FOptions; + property OnError: TXMLErrorEvent read FOnError write FOnError; + end; + + // ======================================================= implementation uses - UriParser; - -type - TSetOfChar = set of Char; + UriParser, xmlutils; const - Letter = ['A'..'Z', 'a'..'z']; - Digit = ['0'..'9']; PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9', '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*', '#', '@', '$', '_', '%']; type TDOMNotationEx = class(TDOMNotation); - TDOMAttrEx = class(TDOMAttr); - - TXMLInputSource = class; + TDOMDocumentTypeEx = class(TDOMDocumentType); TDOMElementDef = class; + TDOMAttrDef = class; TDOMEntityEx = class(TDOMEntity) protected - FInternal: Boolean; + FExternallyDeclared: Boolean; FResolved: Boolean; FOnStack: Boolean; FReplacementText: DOMString; + FStartLocation: TLocation; end; - // TODO: Do I need PEMap in DocType? Maybe move it to Reader itself? - // (memory usage - they are not needed after parsing) - TDOMDocumentTypeEx = class(TDOMDocumentType) - private - FHasPERefs: Boolean; - FPEMap: TDOMNamedNodeMap; - FElementDefs: TDOMNamedNodeMap; - function GetPEMap: TDOMNamedNodeMap; - function GetElementDefs: TDOMNamedNodeMap; - protected - property PEMap: TDOMNamedNodeMap read GetPEMap; - property ElementDefs: TDOMNamedNodeMap read GetElementDefs; - property HasPERefs: Boolean read FHasPERefs write FHasPERefs; - public - destructor Destroy; override; - end; - - TXMLReader = class; TDecoder = class; TDecoderRef = class of TDecoder; - TXMLInputSource = class + TXMLCharSource = class(TObject) private FBuf: PChar; FBufEnd: PChar; FEof: Boolean; FSurrogate: WideChar; - FReader: TXMLReader; - FParent: TXMLInputSource; + FReader: TObject; // weak reference + FParent: TXMLCharSource; FEntity: TObject; // weak reference FCursor: TObject; // weak reference - FLine: Integer; - FColumn: Integer; + FSavedLocation: TLocation; FSystemID: WideString; FPublicID: WideString; function GetSystemID: WideString; function GetPublicID: WideString; - protected - procedure FetchData; virtual; public constructor Create(const AData: WideString); function NextChar: WideChar; virtual; procedure Initialize; virtual; - procedure SetEncoding(const AEncoding: string); virtual; + function SetEncoding(const AEncoding: string): Boolean; virtual; property SystemID: WideString read GetSystemID write FSystemID; property PublicID: WideString read GetPublicID write FPublicID; end; - TXMLDecodingSource = class(TXMLInputSource) + TXMLDecodingSource = class(TXMLCharSource) private FDecoder: TDecoder; FSeenCR: Boolean; function InternalNextChar: WideChar; - procedure DecodingError(const Msg: string); overload; - procedure DecodingError(const Msg: string; const Args: array of const); overload; + procedure DecodingError(const Msg: string); + protected + procedure FetchData; virtual; public destructor Destroy; override; function NextChar: WideChar; override; - procedure SetEncoding(const AEncoding: string); override; + function SetEncoding(const AEncoding: string): Boolean; override; procedure Initialize; override; end; @@ -195,51 +246,120 @@ type MaxLength: Integer; end; - TEntityResolveEvent = procedure(const PubID, SysID: WideString; var Source: TXMLInputSource) of object; + PWideStrWrapper = ^TWideStrWrapper; + TWideStrWrapper = record + Value: WideString; + end; + TDeclType = (dtNone, dtXml, dtText); + TCPType = (ctName, ctChoice, ctSeq); + TCPQuant = (cqOnce, cqZeroOrOnce, cqZeroOrMore, cqOnceOrMore); + + TContentParticle = class(TObject) + private + FParent: TContentParticle; + FChildren: TList; + function InternalMatch(List: TList; var Index: Integer): Boolean; + function GetChildCount: Integer; + function GetChild(Index: Integer): TContentParticle; + public + CPType: TCPType; + CPQuant: TCPQuant; + Name: WideString; + constructor Create; + destructor Destroy; override; + function Add: TContentParticle; + function MatchNodeList(List: TList; var Index: Integer): Boolean; + property ChildCount: Integer read GetChildCount; + property Children[Index: Integer]: TContentParticle read GetChild; + end; + + // This class is intended to store context information during parsing + // However, right now it's written to validate completely parsed elements + TElementValidator = class(TObject) + private + FParent: TElementValidator; + // to be deleted + FList: TList; + FElementDef: TDOMElementDef; + FIndex: Integer; + FChildElementCount: Integer; + FCurCP: TContentParticle; + FAmbiguous: Boolean; + public + constructor Create(aElDef: TDOMElementDef); + destructor Destroy; override; + // to be deleted + procedure AddElement(aNode: TDOMElement); + // to be deleted + function Match: Boolean; + function IsElementAllowed(const aName: DOMString): Boolean; + property Parent: TElementValidator read FParent write FParent; + end; + TXMLReader = class private - FSource: TXMLInputSource; + FSource: TXMLCharSource; + FCtrl: TDOMParser; FCurChar: WideChar; - FWhitespace: Boolean; FXML11: Boolean; + FIntSubset: Boolean; + FDtdParsed: Boolean; + FInsideRoot: Boolean; + FRecognizePE: Boolean; + FHavePERefs: Boolean; + FDocNotValid: Boolean; FValue: TWideCharBuf; FName: TWideCharBuf; FCopyBuf: PWideCharBuf; - FIntSubset: Boolean; FAllowedDecl: TDeclType; - FDtdParsed: Boolean; - FRecognizePE: Boolean; + FLocation: TLocation; + FTokenStart: TLocation; FStandalone: Boolean; // property of Doc ? - FInvalid: Boolean; - // TODO: This array must be stored globally, not per instance FNamePages: PByteArray; FForbiddenAscii: TSetOfChar; FDocType: TDOMDocumentTypeEx; // a shortcut - FEntityLevel: Integer; + FPEMap: TDOMNamedNodeMap; + FIDRefs: TList; + + FValidate: Boolean; // parsing options, copy of FCtrl.Options FPreserveWhitespace: Boolean; - FCreateEntityRefs: Boolean; + FExpandEntities: Boolean; + FIgnoreComments: Boolean; + FCDSectionsAsText: Boolean; + procedure RaiseExpectedQmark; procedure GetChar; procedure GetCharRaw; - procedure Unget(wc: WideChar); - procedure Initialize(ASource: TXMLInputSource); - procedure InitializeRoot(ASource: TXMLInputSource); + procedure UngetCurChar; + procedure Initialize(ASource: TXMLCharSource); + procedure InitializeRoot(ASource: TXMLCharSource); procedure DoParseAttValue(Delim: WideChar); procedure DoParseFragment; - procedure DoParseExtSubset(ASource: TXMLInputSource); + procedure DoParseExtSubset(ASource: TXMLCharSource); function ContextPush(AEntity: TDOMEntityEx): Boolean; function ContextPop: Boolean; procedure XML11_BuildTables; function XML11_CheckName: Boolean; + procedure ParseQuantity(CP: TContentParticle); + procedure MarkTokenStart; + function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean; + procedure AddIdRef(Buf: PWideChar; Length: Integer); + procedure ClearIdRefs; + procedure ValidateIdRefs; + procedure StandaloneError; + procedure CallErrorHandler(E: EXMLReadError); protected FCursor: TDOMNode; + // TODO: probably TObjectList + FValStack: TList; // validation: keep track of models - procedure RaiseExc(const descr: String); overload; - procedure RaiseExc(const descr: string; const args: array of const); overload; - procedure RaiseExc(Expected: WideChar); overload; + procedure FatalError(const descr: String; AtTokenStart: Boolean=False); overload; + procedure FatalError(const descr: string; const args: array of const; AtTokenStart: Boolean=False); overload; + procedure FatalError(Expected: WideChar); overload; function SkipWhitespace: Boolean; + function SkipWhitespaceRaw: Boolean; procedure ExpectWhitespace; procedure ExpectString(const s: String); procedure ExpectChar(wc: WideChar); @@ -255,14 +375,13 @@ type function SkipQuotedLiteral: Boolean; procedure ExpectAttValue; // [10] procedure SkipPubidLiteral; // [12] - procedure SkipSystemLiteral(out Literal: WideString; Required: Boolean); + procedure SkipSystemLiteral(out Literal: WideString); procedure ParseComment; // [15] procedure ParsePI; // [16] procedure ParseCDSect; // [18] procedure ParseXmlOrTextDecl(TextDecl: Boolean); function ParseEq: Boolean; // [25] procedure ExpectEq; - procedure ParseMisc; // [27] procedure ParseDoctypeDecl; // [28] procedure ParseMarkupDecl; // [29] procedure ParseElement; // [39] @@ -277,57 +396,72 @@ type SysIdOptional: Boolean): Boolean; procedure ProcessTextAndRefs; - procedure AssertPENesting(CurrentLevel: Integer); + procedure BadPENesting; procedure ParseEntityDecl; procedure ParseEntityDeclValue(Delim: WideChar); procedure ParseAttlistDecl; - procedure ExpectChoiceOrSeq; - procedure ParseMixedOrChildren; + procedure ExpectChoiceOrSeq(CP: TContentParticle); procedure ParseElementDecl; procedure ParseNotationDecl; - function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean; + function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLCharSource): Boolean; procedure ProcessDefaultAttributes(Element: TDOMElement); + + procedure PushVC(aElDef: TDOMElementDef); + procedure PopVC; + function CurrentElementDef: TDOMElementDef; + procedure ValidateElement(Element: TDOMElement); + procedure ValidateDTD; procedure ValidationError(const Msg: string; const args: array of const); + procedure CheckNotation(const Name: WideString); + // Some SAX-alike stuff (at a very early stage) + procedure DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean=False); + procedure DoComment(ch: PWideChar; Count: Integer); + procedure DoCDSect(ch: PWideChar; Count: Integer); + procedure DoNotationDecl(const aName, aPubID, aSysID: WideString); public doc: TDOMDocument; - constructor Create; + constructor Create; overload; + constructor Create(AParser: TDOMParser); overload; destructor Destroy; override; - procedure ProcessXML(ASource: TXMLInputSource); // [1] - procedure ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode); - procedure ProcessDTD(ASource: TXMLInputSource); // ([29]) - end; - - // AttDef/ElementDef support - TAttrDataType = ( - DT_CDATA, - DT_ID, - DT_IDREF, - DT_IDREFS, - DT_ENTITY, - DT_ENTITIES, - DT_NMTOKEN, - DT_NMTOKENS, - DT_NOTATION - ); + procedure ProcessXML(ASource: TXMLCharSource); // [1] + procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode); + procedure ProcessDTD(ASource: TXMLCharSource); // ([29]) + end; + + // Attribute/Element declarations TAttrDefault = ( - AD_IMPLIED, - AD_DEFAULT, - AD_REQUIRED, - AD_FIXED + adImplied, + adDefault, + adRequired, + adFixed + ); + + TElementContentType = ( + ctEmpty, + ctAny, + ctMixed, + ctChildren ); TDOMAttrDef = class(TDOMAttr) protected - FDataType: TAttrDataType; + FExternallyDeclared: Boolean; FDefault: TAttrDefault; - // FEnumeration: TWideStringList? array of WideStrings? + FEnumeration: array of WideString; + function AddEnumToken(const aValue: WideString): Boolean; + function HasEnumToken(const aValue: WideString): Boolean; end; - TDOMElementDef = class(TDOMElement); - - -{$i names.inc} + TDOMElementDef = class(TDOMElement) + public + FExternallyDeclared: Boolean; + ContentType: TElementContentType; + HasElementDecl: Boolean; + RootCP: TContentParticle; + constructor Create(aOwner: TDOMDocument); + destructor Destroy; override; + end; // TODO: List of registered/supported decoders function FindDecoder(const Encoding: string): TDecoderRef; @@ -343,12 +477,12 @@ procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer); begin ABuffer.MaxLength := ALength; ABuffer.Length := 0; - ABuffer.Buffer:=AllocMem(ABuffer.MaxLength*SizeOf(WideChar)); + ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar)); end; procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar); var - OldLength : integer; + OldLength: Integer; begin if ABuffer.Length >= ABuffer.MaxLength then begin @@ -361,67 +495,102 @@ begin Inc(ABuffer.Length); end; -function IsValidEncName(const s: WideString): Boolean; -var - I: Integer; +{ TXMLInputSource } + +constructor TXMLInputSource.Create(AStream: TStream); begin - Result := False; - if (s = '') or (s[1] > #255) or not (char(s[1]) in ['A'..'Z', 'a'..'z']) then - Exit; - for I := 2 to Length(s) do - if (s[I] > #255) or not (char(s[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then - Exit; - Result := True; + inherited Create; + FStream := AStream; end; -{ TDOMDocumentTypeEx } +constructor TXMLInputSource.Create(const AStringData: string); +begin + inherited Create; + FStringData := AStringData; +end; -destructor TDOMDocumentTypeEx.Destroy; +{ TDOMParser } + +constructor TDOMParser.Create; begin - FPEMap.Free; - FElementDefs.Free; + FOptions := TDOMParseOptions.Create; +end; + +destructor TDOMParser.Destroy; +begin + FOptions.Free; inherited Destroy; end; -function TDOMDocumentTypeEx.GetElementDefs: TDOMNamedNodeMap; +procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument); +var + InputSrc: TXMLCharSource; begin - if FElementDefs = nil then - FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE); - Result := FElementDefs; + with TXMLReader.Create(Self) do + try + InputSrc := nil; + if Assigned(Src) then + begin + if Assigned(Src.FStream) then + InputSrc := TXMLStreamInputSource.Create(Src.FStream, False) + else if Src.FStringData <> '' then + InputSrc := TXMLStreamInputSource.Create(TStringStream.Create(Src.FStringData), True) + else if (Src.SystemID <> '') then + ResolveEntity(Src.SystemID, Src.PublicID, InputSrc); + end; + if Assigned(InputSrc) then + ProcessXML(InputSrc) + else + FatalError('No input source specified'); + finally + ADoc := TXMLDocument(doc); + Free; + end; end; -function TDOMDocumentTypeEx.GetPEMap: TDOMNamedNodeMap; +procedure TDOMParser.ParseUri(const URI: WideString; out ADoc: TXMLDocument); +var + Src: TXMLCharSource; begin - if FPEMap = nil then - FPEMap := TDOMNamedNodeMap.Create(Self, ENTITY_NODE); - Result := FPEMap; + ADoc := nil; + with TXMLReader.Create(Self) do + try + if ResolveEntity(URI, '', Src) then + ProcessXML(Src); + finally + ADoc := TXMLDocument(doc); + Free; + end; end; +function TDOMParser.ParseWithContext(Src: TXMLInputSource; + Context: TDOMNode; Action: TXMLContextAction): TDOMNode; +begin + // TODO: implement + Result := nil; +end; // TODO: These classes still cannot be considered as the final solution... { TXMLInputSource } -constructor TXMLInputSource.Create(const AData: WideString); +constructor TXMLCharSource.Create(const AData: WideString); begin inherited Create; FBuf := PChar(PWideChar(AData)); FBufEnd := FBuf + Length(AData) * sizeof(WideChar); end; -procedure TXMLInputSource.Initialize; +procedure TXMLCharSource.Initialize; begin - FLine := 1; - FColumn := 0; end; -function TXMLInputSource.NextChar: WideChar; +function TXMLCharSource.NextChar: WideChar; begin - if FSurrogate <> #0 then - begin - Result := FSurrogate; - FSurrogate := #0; - end + Result := FSurrogate; + FSurrogate := #0; + if Result <> #0 then + Exit else if FBufEnd <= FBuf then begin Result := #0; @@ -432,38 +601,14 @@ begin Result := PWideChar(FBuf)^; Inc(FBuf, sizeof(WideChar)); end; - // TODO: Column counting - surrogate pair is a single char! - if Result = #10 then - begin - Inc(FLine); - FColumn := 0; - end - else - Inc(FColumn); -end; - -procedure TXMLDecodingSource.DecodingError(const Msg: string); -begin - FReader.RaiseExc(Msg); end; -procedure TXMLDecodingSource.DecodingError(const Msg: string; - const Args: array of const); +function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean; begin - FReader.RaiseExc(Msg, Args); + Result := True; // always succeed end; -procedure TXMLInputSource.FetchData; -begin - FEof := True; -end; - -procedure TXMLInputSource.SetEncoding(const AEncoding: string); -begin - // do nothing -end; - -function TXMLInputSource.GetPublicID: WideString; +function TXMLCharSource.GetPublicID: WideString; begin if FPublicID <> '' then Result := FPublicID @@ -473,7 +618,7 @@ begin Result := ''; end; -function TXMLInputSource.GetSystemID: WideString; +function TXMLCharSource.GetSystemID: WideString; begin if FSystemID <> '' then Result := FSystemID @@ -491,21 +636,27 @@ begin inherited Destroy; end; +procedure TXMLDecodingSource.DecodingError(const Msg: string); +begin + TXMLReader(FReader).FatalError(Msg); +end; + +procedure TXMLDecodingSource.FetchData; +begin + FEof := True; +end; + function TXMLDecodingSource.InternalNextChar: WideChar; begin // TODO: find a place for it, finally... - if FSurrogate <> #0 then - begin - Result := FSurrogate; - FSurrogate := #0; + Result := FSurrogate; + FSurrogate := #0; + if Result <> #0 then Exit; - end; if FBufEnd <= FBuf then FetchData; if not FEof then - Result := FDecoder.DecodeNext - else - Result := #0; + Result := FDecoder.DecodeNext; end; function TXMLDecodingSource.NextChar: WideChar; @@ -513,7 +664,7 @@ begin Result := InternalNextChar; if FSeenCR then begin - if (Result = #10) or ((Result = #$85) and FReader.FXML11) then + if (Result = #10) or ((Result = #$85) and TXMLReader(FReader).FXML11) then Result := InternalNextChar; FSeenCR := False; end; @@ -524,21 +675,12 @@ begin end; #$85, #$2028: - if FReader.FXML11 then + if TXMLReader(FReader).FXML11 then Result := #10; end; - if (Result < #256) and (char(Result) in FReader.FForbiddenAscii) or - ((ord(Result) or 1) = $FFFF) then + if (Result < #256) and (char(Result) in TXMLReader(FReader).FForbiddenAscii) or + (Result >= #$FFFE) then DecodingError('Invalid character'); - - // TODO: Column counting - surrogate pair is a single char! - if Result = #10 then - begin - Inc(FLine); - FColumn := 0; - end - else - Inc(FColumn); end; procedure TXMLDecodingSource.Initialize; @@ -570,20 +712,19 @@ begin FDecoder := TUTF8Decoder.Create(Self); end; -procedure TXMLDecodingSource.SetEncoding(const AEncoding: string); +function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean; var NewDecoder: TDecoderRef; begin + Result := True; if FDecoder.Supports(AEncoding) then // no change needed Exit; // hardcoded stuff - special case of UCS2 if FDecoder is TUCS2Decoder then begin // check for 'UTF-16LE' or 'UTF-16BE' - if SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding) then - Exit - else - DecodingError('Current encoding cannot be switched to ''%s''', [AEncoding]); + Result := SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding); + Exit; end; NewDecoder := FindDecoder(AEncoding); if Assigned(NewDecoder) then @@ -592,7 +733,7 @@ begin FDecoder := NewDecoder.Create(Self); end else - DecodingError('Encoding ''%s'' is not supported', [AEncoding]); + Result := False; end; @@ -725,16 +866,16 @@ begin Inc(FBuf); if Result < #$80 then Exit; - if Byte(Result) and $40 = 0 then - DecodingError('Invalid UTF8 sequence start byte'); + if Word(Result) and $40 = 0 then + DecodingError('Invalid UTF-8 sequence start byte'); bc := 1; - if Byte(Result) and $20 <> 0 then + if Word(Result) and $20 <> 0 then begin Inc(bc); - if Byte(Result) and $10 <> 0 then + if Word(Result) and $10 <> 0 then begin Inc(bc); - if Byte(Result) and $8 <> 0 then + if Word(Result) and $8 <> 0 then DecodingError('UCS4 character out of supported range'); end; end; @@ -747,7 +888,7 @@ begin while bc > 0 do begin if ord(FBuf[0]) and $C0 <> $80 then - DecodingError('Invalid byte in UTF8 sequence'); + DecodingError('Invalid byte in UTF-8 sequence'); Value := (Value shl 6) or (Cardinal(FBuf[0]) and $3F); Inc(FBuf); Dec(bc); @@ -755,7 +896,7 @@ begin Value := Value and MaxCode[I]; // RFC2279 check if Value <= MaxCode[I-1] then - DecodingError('Invalid UTF8 sequence'); + DecodingError('Invalid UTF-8 sequence'); case Value of 0..$D7FF, $E000..$FFFF: begin @@ -780,17 +921,27 @@ end; { TXMLReader } -function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean; +procedure TXMLReader.MarkTokenStart; +begin + FTokenStart := FLocation; +end; + +function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLCharSource): Boolean; var AbsSysID: WideString; Filename: string; Stream: TStream; begin - Result := False; + Result := True; + if Assigned(FSource) then + Result := ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID) + else + AbsSysID := SystemID; - if ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID) then + if Result then begin Source := nil; + Result := False; // TODO: alternative resolvers if URIToFilename(AbsSysID, Filename) then begin @@ -808,102 +959,130 @@ begin end; end; -procedure TXMLReader.InitializeRoot(ASource: TXMLInputSource); +procedure TXMLReader.InitializeRoot(ASource: TXMLCharSource); begin Initialize(ASource); GetChar; // TODO: presence of BOM must prevent UTF-8 encoding from being changed - CheckForChar(#$FEFF); // skip BOM, if one is present + if CheckForChar(#$FEFF) then // skip BOM, if one is present + Dec(FLocation.LinePos); end; -procedure TXMLReader.Initialize(ASource: TXMLInputSource); +procedure TXMLReader.Initialize(ASource: TXMLCharSource); begin FSource := ASource; FSource.FReader := Self; + FLocation.Line := 1; + FLocation.LinePos := 0; // TODO: or 1? FSource.Initialize; end; procedure TXMLReader.GetCharRaw; begin FCurChar := FSource.NextChar; - FWhitespace := (FCurChar = #32) or (FCurChar = #10) or - (FCurChar = #9) or (FCurChar = #13); - // Used for handling the internal DTD subset - if Assigned(FCopyBuf) and (FSource.FParent = nil) then - BufAppend(FCopyBuf^, FCurChar); + if FCurChar = #10 then + begin + Inc(FLocation.Line); + FLocation.LinePos := 0; + end + else + Inc(FLocation.LinePos); end; procedure TXMLReader.GetChar; begin GetCharRaw; + // Used for handling the internal DTD subset + if Assigned(FCopyBuf) and (FSource.FParent = nil) then + BufAppend(FCopyBuf^, FCurChar); if not FRecognizePE then Exit; if (FCurChar = #0) and ContextPop then begin - Unget(FCurChar); + UngetCurChar; FCurChar := #32; - FWhitespace := True; end else if FCurChar = '%' then begin - FCurChar := FSource.NextChar; + GetCharRaw; if not CheckName then begin - Unget(FCurChar); + UngetCurChar; FCurChar := '%'; Exit; end; if FCurChar = ';' then // "%pe1;%pe2" - must not recognize pe2 immediately! GetCharRaw else - RaiseExc(WideChar(';')); + FatalError(WideChar(';')); StartPE; FCurChar := #32; - FWhitespace := True; end; end; -procedure TXMLReader.Unget(wc: WideChar); +procedure TXMLReader.UngetCurChar; begin - FSource.FSurrogate := wc; + FSource.FSurrogate := FCurChar; end; procedure TXMLReader.RaiseExpectedQmark; begin - RaiseExc('Expected single or double quote'); + FatalError('Expected single or double quote'); end; -procedure TXMLReader.RaiseExc(Expected: WideChar); +procedure TXMLReader.FatalError(Expected: WideChar); begin // FIX: don't output what is found - anything may be found, including exploits... - RaiseExc('Expected "%1s"', [string(Expected)]); + FatalError('Expected "%1s"', [string(Expected)]); end; -procedure TXMLReader.RaiseExc(const descr: String); +procedure TXMLReader.FatalError(const descr: String; AtTokenStart: Boolean); +var + RealLocation: ^TLocation; + E: EXMLReadError; begin - raise EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, FSource.FLine, FSource.FColumn, descr]); + if AtTokenStart then + RealLocation := @FTokenStart + else + RealLocation := @FLocation; + E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, RealLocation^.Line, RealLocation^.LinePos, descr]); + E.FSeverity := esFatal; + E.FErrorMessage := descr; + E.FLine := RealLocation^.Line; + E.FLinePos := RealLocation^.LinePos; + CallErrorHandler(E); end; -procedure TXMLReader.RaiseExc(const descr: string; const args: array of const); +procedure TXMLReader.FatalError(const descr: string; const args: array of const; AtTokenStart: Boolean); begin - RaiseExc(Format(descr, args)); + FatalError(Format(descr, args), AtTokenStart); end; function TXMLReader.SkipWhitespace: Boolean; begin Result := False; - while FWhitespace do + while (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) do begin GetChar; Result := True; end; end; +function TXMLReader.SkipWhitespaceRaw: Boolean; +begin + Result := False; + while (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) do + begin + GetCharRaw; + Result := True; + end; +end; + procedure TXMLReader.ExpectWhitespace; begin if not SkipWhitespace then - RaiseExc('Expected whitespace'); + FatalError('Expected whitespace'); end; procedure TXMLReader.ExpectChar(wc: WideChar); @@ -911,7 +1090,7 @@ begin if FCurChar = wc then GetChar else - RaiseExc(wc); + FatalError(wc); end; procedure TXMLReader.ExpectString(const s: String); @@ -921,7 +1100,7 @@ begin for I := 1 to Length(s) do begin if FCurChar <> WideChar(s[i]) then - RaiseExc('Expected "%s"', [s]); + FatalError('Expected "%s"', [s]); GetChar; end; end; @@ -952,83 +1131,83 @@ end; constructor TXMLReader.Create; begin inherited Create; - // Naming bitmap: Point to static data for XML 1.0, - // and allocate buffer in XML11_BuildTables when necessary. - FNamePages := @NamePages; BufAllocate(FName, 128); BufAllocate(FValue, 512); + FIDRefs := TList.Create; + FValStack := TList.Create; + + // Set char rules to XML 1.0 + FNamePages := @NamePages; FForbiddenAscii := [#1..#8, #11..#12, #14..#31]; - // TODO: put under user control - FPreserveWhitespace := True; - FCreateEntityRefs := True; +end; + +constructor TXMLReader.Create(AParser: TDOMParser); +begin + Create; + FCtrl := AParser; + FValidate := FCtrl.Options.Validate; + FPreserveWhitespace := FCtrl.Options.PreserveWhitespace; + FExpandEntities := FCtrl.Options.ExpandEntities; + FCDSectionsAsText := FCtrl.Options.CDSectionsAsText; + FIgnoreComments := FCtrl.Options.IgnoreComments; end; destructor TXMLReader.Destroy; +var + I: Integer; begin - if FXML11 then - FreeMem(FNamePages); FreeMem(FName.Buffer); FreeMem(FValue.Buffer); while ContextPop do; // clean input stack FSource.Free; + FPEMap.Free; + for I := FValStack.Count-1 downto 0 do + TObject(FValStack[I]).Free; + FValStack.Free; + ClearIDRefs; + FIDRefs.Free; inherited Destroy; end; procedure TXMLReader.XML11_BuildTables; -var - I: Integer; begin - if not FXML11 then - GetMem(FNamePages, 512); + FNamePages := Xml11NamePages; FXML11 := True; - for I := 0 to 255 do - FNamePages^[I] := ord(Byte(I) in Xml11HighPages); - FNamePages^[0] := 2; - FNamePages^[3] := $2c; - FNamePages^[$20] := $2a; - FNamePages^[$21] := $2b; - FNamePages^[$2f] := $29; - FNamePages^[$30] := $2d; - FNamePages^[$fd] := $28; - - Move(FNamePages^, FNamePages^[256], 256); - FNamePages^[$100] := $19; - FNamePages^[$103] := $2E; - FNamePages^[$120] := $2F; FForbiddenAscii := [#1..#8, #11..#12, #14..#31, #$7F..#$84, #$86..#$9F]; end; - -procedure TXMLReader.ProcessXML(ASource: TXMLInputSource); +procedure TXMLReader.ProcessXML(ASource: TXMLCharSource); begin doc := TXMLDocument.Create; FCursor := doc; InitializeRoot(ASource); - FAllowedDecl := dtXml; - ParseMisc; - FDtdParsed := True; - if FDocType = nil then - ValidationError('Missing DTD', []); - if CheckName then - ParseElement - else - RaiseExc('Expected element'); - ParseMisc; - if Assigned(FDocType) and (doc.DocumentElement.TagName <> FDocType.Name) then - ValidationError('DTD name does not match root element', []); + DoParseFragment; // case FCurChar <> #0 is handled - if FCurChar <> #0 then - RaiseExc('Text after end of document element found'); + if doc.DocumentElement = nil then + FatalError('Root element is missing'); + + if FValidate then + begin + if Assigned(FDocType) then + begin + if doc.DocumentElement.TagName <> FDocType.Name then + ValidationError('DTD name does not match root element', []); + ValidateIdRefs; + end + else + ValidationError('Missing DTD', []); + end; end; -procedure TXMLReader.ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode); +procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode); begin doc := AOwner.OwnerDocument; FCursor := AOwner; InitializeRoot(ASource); FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1'); FAllowedDecl := dtText; + FInsideRoot := True; DoParseFragment; end; @@ -1038,7 +1217,8 @@ begin if (FCurChar >= #$D800) and (FCurChar <= #$DB7F) then begin BufAppend(FName, FCurChar); - GetCharRaw; + // TODO: do I need to update Location here??? + FCurChar := FSource.NextChar; Result := (FCurChar >= #$DC00) and (FCurChar <= #$DFFF); end else @@ -1073,7 +1253,11 @@ end; procedure TXMLReader.RaiseNameNotFound; begin - RaiseExc('Name starts with invalid character'); + // Coming at no cost, this allows more user-friendly error messages + if (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) then + FatalError('Whitespace is not allowed here') + else + FatalError('Name starts with invalid character'); end; function TXMLReader.ExpectName: WideString; @@ -1110,6 +1294,7 @@ function TXMLReader.ParseCharRef: Boolean; // [66] var Value: Integer; begin + GetCharRaw; // skip '&' Result := FCurChar = '#'; if Result then begin @@ -1143,7 +1328,7 @@ begin if FXML11 then BufAppend(FValue, WideChar(Value)) else - RaiseExc('Invalid character reference'); + FatalError('Invalid character reference'); $09, $0A, $0D, $20..$D7FF, $E000..$FFFD: BufAppend(FValue, WideChar(Value)); $10000..$10FFFF: @@ -1152,7 +1337,7 @@ begin BufAppend(FValue, WideChar($DC00 xor (Value and $3FF))); end; else - RaiseExc('Invalid character reference'); + FatalError('Invalid character reference'); end; end; end; @@ -1165,17 +1350,16 @@ begin while (FCurChar <> Delim) and (FCurChar <> #0) do begin if FCurChar = '<' then - RaiseExc('Literal "<" in attribute value') + FatalError('Character ''<'' is not allowed in attribute value') else if FCurChar <> '&' then begin - if FWhitespace then + if (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) then FCurChar := #32; BufAppend(FValue, FCurChar); GetCharRaw; end else begin - GetCharRaw; // skip '&' if ParseCharRef then Continue; @@ -1184,12 +1368,12 @@ begin begin if FValue.Length > 0 then begin - FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length)); + DoText(FValue.Buffer, FValue.Length); FValue.Length := 0; end; if RefNode.SystemID <> '' then - RaiseExc('External entity reference is not allowed in attribute value'); + FatalError('External entity reference is not allowed in attribute value', True); IncludeEntity(RefNode, True); end; @@ -1197,22 +1381,24 @@ begin end; // while if FValue.Length > 0 then begin - FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length)); + DoText(FValue.Buffer, FValue.Length); FValue.Length := 0; end; end; procedure TXMLReader.DoParseFragment; begin + // SAX: ContentHandler.StartDocument() - here? ParseContent; if FCurChar <> #0 then - RaiseExc('Closing tag not allowed here'); + FatalError('Closing tag is not allowed here'); + // SAX: ContentHandler.EndDocument() - here? or somewhere in destructor? end; function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean; var - Src: TXMLInputSource; + Src: TXMLCharSource; begin if AEntity.SystemID <> '' then begin @@ -1227,35 +1413,37 @@ begin FAllowedDecl := dtText; end else - Src := TXMLInputSource.Create(AEntity.FReplacementText); + Src := TXMLCharSource.Create(AEntity.FReplacementText); AEntity.FOnStack := True; Src.FEntity := AEntity; Src.FParent := FSource; Src.FCursor := FCursor; - Unget(FCurChar); // remember FCurChar in previous context + UngetCurChar; // remember FCurChar and current location in previous context + Src.FSavedLocation := FLocation; - Inc(FEntityLevel); Initialize(Src); Result := True; end; function TXMLReader.ContextPop: Boolean; var - Src: TXMLInputSource; + Src: TXMLCharSource; + TmpLocation: TLocation; begin Result := Assigned(FSource.FParent); if Result then begin Src := FSource.FParent; + TmpLocation := FSource.FSavedLocation; if Assigned(FSource.FEntity) then TDOMEntityEx(FSource.FEntity).FOnStack := False; FCursor := TDOMNode(FSource.FCursor); FSource.Free; FSource := Src; - Dec(FEntityLevel); GetChar; // re-classify - case of "%pe1;%pe2;" + FLocation := TmpLocation; end; end; @@ -1266,12 +1454,13 @@ begin if not AEntity.FResolved then begin if AEntity.FOnStack then - RaiseExc('Entity ''%s'' recursively references itself', [AEntity.NodeName]); + FatalError('Entity ''%s'' recursively references itself', [AEntity.NodeName]); if ContextPush(AEntity) then begin GetCharRaw; - CheckForChar(#$FEFF); + if CheckForChar(#$FEFF) then + Dec(FLocation.LinePos); FCursor := AEntity; // build child node tree for the entity try @@ -1287,7 +1476,7 @@ begin end; end; Node := FCursor; - if FCreateEntityRefs or (not AEntity.FResolved) then + if (not FExpandEntities) or (not AEntity.FResolved) then begin Node := doc.CreateEntityReference(AEntity.NodeName); FCursor.AppendChild(Node); @@ -1307,7 +1496,9 @@ var PEnt: TDOMEntityEx; begin SetString(PEName, FName.Buffer, FName.Length); - PEnt := FDocType.PEMap.GetNamedItem(PEName) as TDOMEntityEx; + PEnt := nil; + if Assigned(FPEMap) then + PEnt := FPEMap.GetNamedItem(PEName) as TDOMEntityEx; if PEnt = nil then // TODO -cVC: Referencing undefined PE begin // (These are classified as 'optional errors'...) // ValidationError('Undefined parameter entity referenced: %s', [PEName]); @@ -1315,7 +1506,7 @@ begin end; if PEnt.FOnStack then - RaiseExc('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]); + FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]); ContextPush(PEnt); end; @@ -1326,6 +1517,7 @@ var Predef: WideChar; begin Result := nil; + MarkTokenStart; RefName := ExpectName; ExpectChar(';'); Predef := ResolvePredefined(RefName); @@ -1338,17 +1530,17 @@ begin if Result = nil then begin - if FStandalone or (FDocType = nil) or not (FDocType.HasPERefs or (FDocType.SystemID <> '')) then - RaiseExc('Undefined entity ''%s'' referenced', [RefName]) + if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then + FatalError('Reference to undefined entity ''%s''', [RefName], True) else ValidationError('Undefined entity ''%s'' referenced', [RefName]); end else begin - if FStandalone and (not Result.FInternal) then - RaiseExc('Standalone constraint violation'); + if FStandalone and Result.FExternallyDeclared then + FatalError('Standalone constraint violation', True); if Result.NotationName <> '' then - RaiseExc('Reference to unparsed entity ''%s''', [RefName]); + FatalError('Reference to unparsed entity ''%s''', [RefName], True); end; end; end; @@ -1356,43 +1548,43 @@ end; procedure TXMLReader.ProcessTextAndRefs; var nonWs: Boolean; - last: WideChar; RefNode: TDOMEntityEx; begin FValue.Length := 0; nonWs := False; - FAllowedDecl := dtNone; + MarkTokenStart; while (FCurChar <> '<') and (FCurChar <> #0) do begin if FCurChar <> '&' then begin - if not FWhitespace then + if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then nonWs := True; BufAppend(FValue, FCurChar); if FCurChar = '>' then with FValue do - if (Length >= 3) and - (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then - RaiseExc('Literal '']]>'' is not allowed in text'); + if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then + begin + Dec(FLocation.LinePos, 2); + FatalError('Literal '']]>'' is not allowed in text'); + end; GetCharRaw; end else begin - GetCharRaw; // skip '&' + if not FInsideRoot then + FatalError('Illegal at document level'); if ParseCharRef then begin - last := FValue.Buffer[FValue.Length-1]; - if (last <> #9) and (last <> #10) and (last <> #13) and (last <> #32) then - nonWs := True; + nonWs := True; // CharRef to whitespace is not considered whitespace Continue; end; - nonWs := True; RefNode := ParseReference; if Assigned(RefNode) then begin if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then begin - FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length)); + // 'Reference illegal at root' is checked above, no need to check here + DoText(FValue.Buffer, FValue.Length, not nonWs); FValue.Length := 0; nonWs := False; end; @@ -1400,11 +1592,16 @@ begin end; end; end; // while - if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then + if FInsideRoot then begin - FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length)); - FValue.Length := 0; - end; + if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then + begin + DoText(FValue.Buffer, FValue.Length, not nonWs); + FValue.Length := 0; + end; + end + else if nonWs then + FatalError('Illegal at document level', True); end; procedure TXMLReader.ExpectAttValue; // [10] @@ -1429,6 +1626,7 @@ begin begin Delim := FCurChar; GetCharRaw; // skip quote + MarkTokenStart; FValue.Length := 0; while (FCurChar <> Delim) and (FCurChar <> #0) do begin @@ -1442,28 +1640,35 @@ end; procedure TXMLReader.SkipPubidLiteral; // [12] var I: Integer; + wc: WideChar; begin if SkipQuotedLiteral then begin for I := 0 to FValue.Length-1 do - if (FValue.Buffer[I] > #255) or not (Char(FValue.Buffer[I]) in PubidChars) then - RaiseExc('Illegal Public ID literal') + begin + wc := FValue.Buffer[I]; + if (wc > #255) or not (Char(wc) in PubidChars) then + FatalError('Illegal Public ID literal', True); + if (wc = #10) or (wc = #13) then + FValue.Buffer[I] := #32; + end; end else RaiseExpectedQMark; end; -procedure TXMLReader.SkipSystemLiteral(out Literal: WideString; Required: Boolean); +procedure TXMLReader.SkipSystemLiteral(out Literal: WideString); begin if SkipQuotedLiteral then SetString(Literal, FValue.Buffer, FValue.Length) - else if Required then + else RaiseExpectedQMark; end; procedure TXMLReader.ParseComment; // [15] begin ExpectString('--'); + MarkTokenStart; FValue.Length := 0; repeat BufAppend(FValue, FCurChar); @@ -1472,14 +1677,13 @@ begin if (Length >= 2) and (Buffer[Length-1] = '-') and (Buffer[Length-2] = '-') then begin - Dec(Length, 2); - if Assigned(FCursor) then - FCursor.AppendChild(doc.CreateCommentBuf(Buffer, Length)); ExpectChar('>'); + Dec(Length, 2); + DoComment(Buffer, Length); Exit; end; until FCurChar = #0; - RaiseExc('Unterminated comment'); + FatalError('Unterminated comment', True); end; procedure TXMLReader.ParsePI; // [16] @@ -1487,6 +1691,7 @@ var Name, Value: WideString; begin GetCharRaw; // skip '?' + MarkTokenStart; Name := ExpectName; with FName do @@ -1496,21 +1701,21 @@ begin ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then begin if Name <> 'xml' then - RaiseExc('''xml'' is a reserved word; it must be lowercase'); - if FAllowedDecl <> dtNone then + FatalError('''xml'' is a reserved word; it must be lowercase', True); + + // Declaration is allowed only at the very beginning of the _external_ entity + if (FTokenStart.Line = 1) and (FTokenStart.LinePos = 3) and (FSource.FSystemID <> '') then begin ParseXmlOrTextDecl(FAllowedDecl = dtText); - FAllowedDecl := dtNone; Exit; end else - RaiseExc('XML declaration not allowed here'); + FatalError('XML declaration is not allowed here', True); end; if FCurChar <> '?' then ExpectWhitespace; - FAllowedDecl := dtNone; FValue.Length := 0; repeat BufAppend(FValue, FCurChar); @@ -1521,12 +1726,13 @@ begin begin Dec(Length, 2); SetString(Value, Buffer, Length); + // SAX: ContentHandler.ProcessingInstruction(Name, Value); if Assigned(FCursor) then FCursor.AppendChild(Doc.CreateProcessingInstruction(Name, Value)); Exit; end; until FCurChar = #0; - RaiseExc('Unterminated processing instruction'); + FatalError('Unterminated processing instruction', True); end; @@ -1542,12 +1748,12 @@ begin begin ExpectString('version'); // [24] ExpectEq; - SkipSystemLiteral(TmpStr, True); + SkipSystemLiteral(TmpStr); IsXML11 := False; if TmpStr = '1.1' then // Checking for bad chars is implied IsXML11 := True else if TmpStr <> '1.0' then - RaiseExc('Illegal version number'); + FatalError('Illegal version number', True); if not TextDecl then begin @@ -1558,7 +1764,7 @@ begin end else // parsing external entity if IsXML11 and not FXML11 then - RaiseExc('XML 1.0 document cannot invoke XML 1.1 entities'); + FatalError('XML 1.0 document cannot invoke XML 1.1 entities', True); if FCurChar <> '?' then ExpectWhitespace; @@ -1569,12 +1775,13 @@ begin begin ExpectString('encoding'); ExpectEq; - SkipSystemLiteral(TmpStr, True); + SkipSystemLiteral(TmpStr); - if not IsValidEncName(TmpStr) then - RaiseExc('Illegal encoding name'); + if not IsValidXmlEncoding(TmpStr) then + FatalError('Illegal encoding name', True); - FSource.SetEncoding(TmpStr); // <-- Wide2Ansi conversion here + if not FSource.SetEncoding(TmpStr) then // <-- Wide2Ansi conversion here + FatalError('Encoding ''%s'' is not supported', [TmpStr], True); // getting here means that specified encoding is supported // TODO: maybe assign the 'preferred' encoding name? if not TextDecl and doc.InheritsFrom(TXMLDocument) then @@ -1589,12 +1796,12 @@ begin begin ExpectString('standalone'); ExpectEq; - SkipSystemLiteral(TmpStr, True); + SkipSystemLiteral(TmpStr); if TmpStr = 'yes' then FStandalone := True else if TmpStr <> 'no' then - RaiseExc('Only "yes" or "no" are permitted as values of "standalone"'); - SkipWhitespace; + FatalError('Only "yes" or "no" are permitted as values of "standalone"', True); + SkipWhitespaceRaw; end; ExpectString('?>'); @@ -1603,17 +1810,15 @@ end; procedure TXMLReader.ParseDoctypeDecl; // [28] var IntSubset: TWideCharBuf; - Src, OldSrc: TXMLInputSource; + Src, OldSrc: TXMLCharSource; begin - FAllowedDecl := dtNone; - if FDtdParsed then - RaiseExc('Markup declaration not allowed here'); + FatalError('Markup declaration is not allowed here'); - ExpectString('DOCTYPE'); // gives possibly incorrect error message + ExpectString('DOCTYPE'); ExpectWhitespace; - FDocType := TDOMDocumentTypeEx.Create(doc); + FDocType := TDOMDocumentTypeEx(TDOMDocumentType.Create(doc)); FDtdParsed := True; { To comply with certain output tests, we must insert PIs coming from internal subset before DocType node. This looks very synthetic, but let it be... @@ -1622,7 +1827,7 @@ begin FDocType.FName := ExpectName; ExpectWhitespace; ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False); - SkipWhitespace; + SkipWhitespaceRaw; if FCurChar = '[' then begin @@ -1640,132 +1845,102 @@ begin FCopyBuf := nil; FreeMem(IntSubset.Buffer); end; - SkipWhitespace; + SkipWhitespaceRaw; end; ExpectChar('>'); - if FDocType.SystemID <> '' then + if (FDocType.SystemID <> '') and ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then begin - if ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then - begin - OldSrc := FSource; - Unget(FCurChar); - FCursor := nil; - try - DoParseExtSubset(Src); - finally - while ContextPop do; // Cleanup after possible exceptions - FSource.Free; - FSource := OldSrc; - GetChar; - FCursor := Doc; - end; + // DTD parsing code assumes that FSource is RootSource, + // therefore we cannot use ContextPush here... + OldSrc := FSource; + UngetCurChar; + FCursor := nil; + try + DoParseExtSubset(Src); + finally + while ContextPop do; // Cleanup after possible exceptions + FSource.Free; + FSource := OldSrc; + GetChar; + FCursor := Doc; end; end; finally doc.AppendChild(FDocType); end; -end; - -procedure TXMLReader.ParseMisc; -begin - repeat - if SkipWhitespace then - FAllowedDecl := dtNone; - if not CheckForChar('<') then - Break; - if CheckForChar('!') then - begin - FAllowedDecl := dtNone; - if FCurChar = '-' then - ParseComment - else - ParseDoctypeDecl; - end - else - if FCurChar = '?' then - ParsePI - else - Break; - until FCurChar = #0; - FAllowedDecl := dtNone; + ValidateDTD; end; function TXMLReader.ParseEq: Boolean; // [25] begin - while FWhitespace do GetCharRaw; + SkipWhitespaceRaw; Result := FCurChar = '='; if Result then begin GetCharRaw; - while FWhitespace do GetCharRaw; + SkipWhitespaceRaw; end; end; procedure TXMLReader.ExpectEq; begin if not ParseEq then - RaiseExc('Expected "="'); + FatalError('Expected "="'); end; { DTD stuff } -procedure TXMLReader.AssertPENesting(CurrentLevel: Integer); +procedure TXMLReader.BadPENesting; begin - if CurrentLevel <> FEntityLevel then - ValidationError('Parameter entities must be properly nested', []); + ValidationError('Parameter entities must be properly nested', []); end; -// content model - -type - TElementContentType = ( - ctEmpty, - ctAny, - ctMixed, - ctName, - ctChoice, - ctSeq - ); +procedure TXMLReader.StandaloneError; +begin + ValidationError('Standalone constriant violation', []); +end; - TElementContentQuant = ( - cqNone, - cqOpt, - cqReq, - cqPlus - ); +procedure TXMLReader.CheckNotation(const Name: WideString); +begin + if FDocType.Notations.GetNamedItem(Name) = nil then + ValidationError('Notation ''%s'' is not declared', [Name]); +end; -{ - TElementContent = record - ContentType: TElementContentType; - ContentQuant: TElementContentQuant; - Name: WideString; - Children: array of TElementContent; - end; -} +procedure TXMLReader.ParseQuantity(CP: TContentParticle); +begin + if CheckForChar('?') then + CP.CPQuant := cqZeroOrOnce + else if CheckForChar('*') then + CP.CPQuant := cqZeroOrMore + else if CheckForChar('+') then + CP.CPQuant := cqOnceOrMore; +end; -procedure TXMLReader.ExpectChoiceOrSeq(); // [49], [50] +procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle); // [49], [50] var Delim: WideChar; - PELevel: Integer; + CurrentEntity: TObject; + CurrentCP: TContentParticle; begin Delim := #0; repeat + CurrentCP := CP.Add; SkipWhitespace; if FCurChar = '(' then begin - PELevel := FEntityLevel; + CurrentEntity := FSource.FEntity; GetChar; - ExpectChoiceOrSeq; - AssertPENesting(PELevel); + ExpectChoiceOrSeq(CurrentCP); + if CurrentEntity <> FSource.FEntity then + BadPENesting; GetChar; end else - SkipName; - if CheckForChar('?') then - else if CheckForChar('*') then - else if CheckForChar('+') then; + CurrentCP.Name := ExpectName; + + ParseQuantity(CurrentCP); SkipWhitespace; if FCurChar = ')' then @@ -1775,93 +1950,115 @@ begin if (FCurChar = '|') or (FCurChar = ',') then Delim := FCurChar else - RaiseExc('Expected "|" or ","'); + FatalError('Expected "|" or ","'); end else if FCurChar <> Delim then - RaiseExc(Delim); + FatalError(Delim); GetChar; // skip delimiter until False; + if Delim = '|' then + CP.CPType := ctChoice + else + CP.CPType := ctSeq; // '(foo)' is a sequence! end; -procedure TXMLReader.ParseMixedOrChildren; +procedure TXMLReader.ParseElementDecl; // [45] var - PELevel: Integer; + ElName: WideString; + ElDef: TDOMElementDef; NeedAsterisk: Boolean; + CurrentCP: TContentParticle; + CurrentEntity: TObject; + I: Integer; begin - PELevel := FEntityLevel; - GetChar; // starting bracket - SkipWhitespace; - if CheckForChar('#') then // Mixed section [51] - begin - ExpectString('PCDATA'); - SkipWhitespace; - NeedAsterisk := False; - while FCurChar <> ')' do - begin - ExpectChar('|'); - NeedAsterisk := True; - SkipWhitespace; - SkipName; - SkipWhitespace; - end; - AssertPENesting(PELevel); - GetChar; - if NeedAsterisk then - ExpectChar('*') - else - CheckForChar('*'); - end - else // Parse Children section [47] + ElName := ExpectName; + ExpectWhitespace; + ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(ElName)); + if Assigned(ElDef) and ElDef.HasElementDecl then + ValidationError('Duplicate declaration of element ''%s''', [ElName]); + if ElDef = nil then begin - ExpectChoiceOrSeq; - AssertPENesting(PELevel); - GetChar; - if CheckForChar('?') then - else if CheckForChar('*') then - else if CheckForChar('+') then; + ElDef := TDOMElementDef.Create(doc); + ElDef.FNodeName := ElName; + FDocType.ElementDefs.SetNamedItem(ElDef); end; -end; - -procedure TXMLReader.ParseElementDecl; // [45] -begin - SkipName; - ExpectWhitespace; + ElDef.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil)); + ElDef.HasElementDecl := True; - // Get contentspec [46] if FCurChar = 'E' then - ExpectString('EMPTY') + begin + ExpectString('EMPTY'); + ElDef.ContentType := ctEmpty; + end else if FCurChar = 'A' then - ExpectString('ANY') + begin + ExpectString('ANY'); + ElDef.ContentType := ctAny; + end else if FCurChar = '(' then - ParseMixedOrChildren + begin + CurrentEntity := FSource.FEntity; + GetChar; // starting bracket + SkipWhitespace; + if FCurChar = '#' then // Mixed section [51] + begin + ExpectString('#PCDATA'); + SkipWhitespace; + ElDef.ContentType := ctMixed; + NeedAsterisk := False; + while FCurChar <> ')' do + begin + ExpectChar('|'); + NeedAsterisk := True; + SkipWhitespace; + + CurrentCP := ElDef.RootCP.Add; + CurrentCP.Name := ExpectName; + // TODO: rethink this + for I := ElDef.RootCP.ChildCount-2 downto 0 do + if CurrentCP.Name = ElDef.RootCP.Children[I].Name then + ValidationError('Duplicate token in mixed section', []); + SkipWhitespace; + end; + if CurrentEntity <> FSource.FEntity then + BadPENesting; + GetChar; + // TODO: does this asterisk have any real meaning? + if NeedAsterisk then + begin + ExpectChar('*'); + ElDef.RootCP.CPQuant := cqZeroOrMore; + end + else + if CheckForChar('*') then + ElDef.RootCP.CPQuant := cqZeroOrMore; + end + else // Children section [47] + begin + ElDef.ContentType := ctChildren; + ExpectChoiceOrSeq(ElDef.RootCP); + if CurrentEntity <> FSource.FEntity then + BadPENesting; + GetChar; + ParseQuantity(ElDef.RootCP); + end; + end else - RaiseExc('Invalid content specification'); + FatalError('Invalid content specification'); + // SAX: DeclHandler.ElementDecl(name, model); end; procedure TXMLReader.ParseNotationDecl; // [82] var - Notation: TDOMNotationEx; + Name, SysID, PubID: WideString; begin - Notation := TDOMNotationEx(TDOMNotation.Create(Doc)); - try - Notation.FName := ExpectName; - ExpectWhitespace; - if not ParseExternalID(Notation.FSystemID, Notation.FPublicID, True) then - RaiseExc('Expected external or public ID'); - except - Notation.Free; - raise; - end; - - if FDocType.Notations.GetNamedItem(Notation.FName) = nil then - FDocType.Notations.SetNamedItem(Notation) - else - begin - ValidationError('Duplicate notation declaration: %s', [Notation.FName]); - Notation.Free; - end; + Name := ExpectName; + ExpectWhitespace; + if not ParseExternalID(SysID, PubID, True) then + FatalError('Expected external or public ID'); + DoNotationDecl(Name, PubID, SysID); end; procedure TXMLReader.ParseAttlistDecl; // [52] @@ -1876,7 +2073,7 @@ begin ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Token)); if ElDef = nil then begin - // TODO -cVC: must distinguish ElementDef created here from one explicitly declared + // DONE -cVC: must distinguish ElementDef created here from one explicitly declared ElDef := TDOMElementDef.Create(doc); ElDef.FNodeName := Token; FDocType.ElementDefs.SetNamedItem(ElDef); @@ -1884,36 +2081,38 @@ begin SkipWhitespace; while FCurChar <> '>' do begin - SkipWhitespace; { !!! } AttDef := TDOMAttrDef.Create(doc); + AttDef.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil)); try AttDef.FName := ExpectName; ExpectWhitespace; Token := GetString(['A'..'Z']); // Get AttType [54], [55], [56] if Token = 'CDATA' then - AttDef.FDataType := DT_CDATA + AttDef.FDataType := dtCdata else if Token = 'ID' then - AttDef.FDataType := DT_ID + AttDef.FDataType := dtId else if Token = 'IDREF' then - AttDef.FDataType := DT_IDREF + AttDef.FDataType := dtIdRef else if Token = 'IDREFS' then - AttDef.FDataType := DT_IDREFS + AttDef.FDataType := dtIdRefs else if Token = 'ENTITY' then - AttDef.FDataType := DT_ENTITY + AttDef.FDataType := dtEntity else if Token = 'ENTITIES' then - AttDef.FDataType := DT_ENTITIES + AttDef.FDataType := dtEntities else if Token = 'NMTOKEN' then - AttDef.FDataType := DT_NMTOKEN + AttDef.FDataType := dtNmToken else if Token = 'NMTOKENS' then - AttDef.FDataType := DT_NMTOKENS + AttDef.FDataType := dtNmTokens else if Token = 'NOTATION' then // [57], [58] begin - AttDef.FDataType := DT_NOTATION; + AttDef.FDataType := dtNotation; ExpectWhitespace; ExpectChar('('); repeat SkipWhitespace; - SkipName; + MarkTokenStart; + if not AttDef.AddEnumToken(ExpectName) then + ValidationError('Duplicate token in NOTATION attribute declaration',[]); SkipWhitespace; until not CheckForChar('|'); ExpectChar(')'); @@ -1921,58 +2120,60 @@ begin else if CheckForChar('(') then // [59] begin - AttDef.FDataType := DT_NMTOKEN; + AttDef.FDataType := dtNmToken; repeat SkipWhitespace; + MarkTokenStart; if not CheckNmToken then RaiseNameNotFound; // not completely correct error message + SetString(Token, FName.Buffer, FName.Length); + if not AttDef.AddEnumToken(Token) then + ValidationError('Duplicate token in enumerated attibute declaration', []); SkipWhitespace; until not CheckForChar('|'); ExpectChar(')'); end else - RaiseExc('Invalid tokenized type'); + FatalError('Illegal attribute type', True); ExpectWhitespace; - // Get DefaultDecl [60] ValueRequired := False; + MarkTokenStart; if CheckForChar('#') then begin Token := GetString(['A'..'Z']); if Token = 'REQUIRED' then - AttDef.FDefault := AD_REQUIRED + AttDef.FDefault := adRequired else if Token = 'IMPLIED' then - AttDef.FDefault := AD_IMPLIED + AttDef.FDefault := adImplied else if Token = 'FIXED' then begin - AttDef.FDefault := AD_FIXED; + AttDef.FDefault := adFixed; ExpectWhitespace; ValueRequired := True; end else - RaiseExc('Illegal attribute default'); + FatalError('Illegal attribute default', True); end else begin - AttDef.FDefault := AD_DEFAULT; + AttDef.FDefault := adDefault; ValueRequired := True; end; - + if ValueRequired then begin SaveCurNode := FCursor; FCursor := AttDef; -// tricky moment, no tests for that -{ FRecognizePE := False; } // TODO: shall it really be disabled? +// See comments to valid-sa-094: PE expansion should be disabled in AttDef. +// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed try ExpectAttValue; finally FCursor := SaveCurNode; -{ FRecognizePE := not FIntSubset; } end; - if AttDef.FDataType = DT_ID then - ValidationError('Attributes of type ID must not have a default value',[]); end; + // SAX: DeclHandler.AttributeDecl(...) // First declaration is binding, subsequent should be ignored if Assigned(ElDef.GetAttributeNode(AttDef.Name)) then @@ -1990,7 +2191,7 @@ end; procedure TXMLReader.ParseEntityDeclValue(Delim: WideChar); // [9] var I: Integer; - Src: TXMLInputSource; + Src: TXMLCharSource; begin Src := FSource; // "Included in literal": process until delimiter hit IN SAME context @@ -1998,13 +2199,12 @@ begin if ParsePEReference then begin if FIntSubset and (FSource.FParent = nil) then - RaiseExc('PE references in internal subset not allowed inside declarations'); + FatalError('PE references in internal subset are not allowed inside declarations', True); StartPE; GetCharRaw; end else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass begin - GetCharRaw; if not ParseCharRef then begin BufAppend(FValue, '&'); @@ -2037,12 +2237,14 @@ begin begin ExpectWhitespace; NDataAllowed := False; - Map := FDocType.PEMap; + if FPEMap = nil then + FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE); + Map := FPEMap; end; Entity := TDOMEntityEx.Create(Doc); try - Entity.FInternal := FIntSubset and (FSource.FParent = nil); + Entity.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil)); Entity.FName := ExpectName; ExpectWhitespace; @@ -2050,6 +2252,7 @@ begin begin NDataAllowed := False; Delim := FCurChar; + Entity.FStartLocation := FLocation; FRecognizePE := False; // PERef right after delimiter should not be recognized GetCharRaw; // at char level - we process it 'manually' FValue.Length := 0; @@ -2059,7 +2262,7 @@ begin end else if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then - RaiseExc('Expected entity value or external ID'); + FatalError('Expected entity value or external ID'); if NDataAllowed then // [76] begin @@ -2070,10 +2273,8 @@ begin ExpectString('NDATA'); ExpectWhitespace; SkipName; - // TODO -cVC: Notation declared. Here or after all has been read? SetString(Entity.FNotationName, FName.Buffer, FName.Length); - if FDocType.Notations.GetNamedItem(Entity.NotationName) = nil then - ValidationError('Reference to undeclared notation ''%s''', [Entity.NotationName]); + // SAX: DTDHandler.UnparsedEntityDecl(...); end; end; except @@ -2094,19 +2295,16 @@ var Token: WideString; IncludeLevel: Integer; IgnoreLevel: Integer; - PELevel: Integer; + CurrentEntity: TObject; begin IncludeLevel := 0; IgnoreLevel := 0; repeat - if SkipWhitespace then - FAllowedDecl := dtNone; + SkipWhitespace; if ParsePEReference then // PERef between declarations should always be recognized begin - FAllowedDecl := dtNone; - if Assigned(FDocType) then - FDocType.HasPERefs := True; + FHavePERefs := True; StartPE; GetChar; Continue; @@ -2125,18 +2323,17 @@ begin if FCurChar <> '<' then Break; - PELevel := FEntityLevel; - GetCharRaw; + CurrentEntity := FSource.FEntity; + GetChar; if CheckForChar('!') then begin - FAllowedDecl := dtNone; if FCurChar = '-' then ParseComment else if FCurChar = '[' then begin if FIntSubset and (FSource.FParent = nil) then - RaiseExc('Conditional sections not allowed in internal subset'); + FatalError('Conditional sections are not allowed in internal subset'); FRecognizePE := not FIntSubset; GetChar; // skip '[' @@ -2149,8 +2346,9 @@ begin else if Token = 'IGNORE' then IgnoreLevel := 1 else - RaiseExc('Expected "INCLUDE" or "IGNORE"'); - AssertPENesting(PELevel); + FatalError('Expected "INCLUDE" or "IGNORE"'); + if CurrentEntity <> FSource.FEntity then + BadPENesting; ExpectChar('['); if IgnoreLevel > 0 then repeat @@ -2165,6 +2363,7 @@ begin else begin FRecognizePE := not FIntSubset; + MarkTokenStart; Token := GetString(['A'..'Z']); ExpectWhitespace; if Token = 'ELEMENT' then @@ -2176,7 +2375,7 @@ begin else if Token = 'NOTATION' then ParseNotationDecl else - RaiseExc('Illegal markup declaration'); + FatalError('Illegal markup declaration', True); SkipWhitespace; FRecognizePE := False; // ! Don't auto-pop context on last markup delimiter @@ -2186,29 +2385,31 @@ begin MarkupDecl starting in PE and ending in root is a WFC [28a] MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14) } - if PELevel > FEntityLevel then - RaiseExc('Parameter entities must be properly nested') - else - AssertPENesting(PELevel); + // TODO: what if statrs in PE1 and ends in PE2, and other cases? + if CurrentEntity <> FSource.FEntity then + if Assigned(FSource.FEntity) then { ends in PE } + BadPENesting + else + FatalError('Parameter entities must be properly nested'); end else if FCurChar = '?' then ParsePI; until False; FRecognizePE := False; if (IncludeLevel > 0) or (IgnoreLevel > 0) then - RaiseExc('Conditional section not closed'); + FatalError('Conditional section is not closed'); end; -procedure TXMLReader.DoParseExtSubset(ASource: TXMLInputSource); +procedure TXMLReader.DoParseExtSubset(ASource: TXMLCharSource); begin InitializeRoot(ASource); FAllowedDecl := dtText; ParseMarkupDecl; if FCurChar <> #0 then - RaiseExc('Illegal character in DTD'); + FatalError('Illegal character in DTD'); end; -procedure TXMLReader.ProcessDTD(ASource: TXMLInputSource); +procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource); begin doc := TXMLDocument.Create; FDocType := TDOMDocumentTypeEx.Create(doc); @@ -2219,10 +2420,11 @@ begin end; procedure TXMLReader.ParseCDSect; // [18] -var - name: WideString; begin ExpectString('[CDATA['); + MarkTokenStart; + if not FInsideRoot then + FatalError('Illegal at document level'); FValue.Length := 0; repeat BufAppend(FValue, FCurChar); @@ -2231,13 +2433,11 @@ begin if (Length >= 3) and (Buffer[Length-1] = '>') and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then begin - Dec(Length, 3); - SetString(name, Buffer, Length); - FCursor.AppendChild(doc.CreateCDATASection(name)); + DoCDSect(Buffer, Length-3); Exit; end; until FCurChar = #0; - RaiseExc('Unterminated CDATA section'); + FatalError('Unterminated CDATA section', True); end; procedure TXMLReader.ParseContent; @@ -2246,18 +2446,18 @@ begin if FCurChar = '<' then begin GetCharRaw; + MarkTokenStart; if CheckName then ParseElement else if FCurChar = '!' then begin GetCharRaw; - FAllowedDecl := dtNone; if FCurChar = '[' then ParseCDSect else if FCurChar = '-' then ParseComment else - ParseDoctypeDecl; // actually will raise error + ParseDoctypeDecl; end else if FCurChar = '?' then ParsePI @@ -2273,104 +2473,225 @@ end; procedure TXMLReader.ParseElement; // [39] [40] [44] var NewElem: TDOMElement; + ElDef: TDOMElementDef; + ElVal: TElementValidator; IsEmpty: Boolean; attr, OldAttr: TDOMNode; begin + if (FCursor = doc) and Assigned(doc.DocumentElement) then + FatalError('Only one top-level element allowed', True) + else + FInsideRoot := True; + NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length); + // First check if NewElem is allowed in this context + if FValStack.Count > 0 then + ElVal := TElementValidator(FValStack.Last) + else + ElVal := nil; + if FValidate and Assigned(ElVal) and not ElVal.IsElementAllowed(NewElem.TagName) then + ValidationError('Element ''%s'' is not allowed in this context',[NewElem.TagName]); + FCursor.AppendChild(NewElem); - Assert(NewElem.ParentNode = FCursor, 'AppendChild did not set ParentNode'); - FCursor := NewElem; + + // Then update ElementDef - it is needed to process attributes + ElDef := nil; + if Assigned(FDocType) then + ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(NewElem.TagName)); IsEmpty := False; - while FCurChar <> '>' do + if SkipWhitespaceRaw then begin - if FCurChar = '/' then + while (FCurChar <> '>') and (FCurChar <> '/') do begin - GetCharRaw; - IsEmpty := True; - FCursor := FCursor.ParentNode; - Break; - end; - - // Get Attribute [41] - ExpectWhitespace; - if not CheckName then // allow stuff like <element >, <element /> - Continue; - - attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length); + MarkTokenStart; + ExpectName; + attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length); - // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute - OldAttr := NewElem.Attributes.SetNamedItem(Attr); - if Assigned(OldAttr) then - begin - OldAttr.Free; - RaiseExc('Duplicate attribute'); - end; - ExpectEq; - Assert(TDOMAttr(attr).OwnerElement = NewElem, 'DOMAttr.OwnerElement not set correctly'); - FCursor := attr; - ExpectAttValue; - FCursor := NewElem; + // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute + OldAttr := NewElem.Attributes.SetNamedItem(Attr); + if Assigned(OldAttr) then + begin + OldAttr.Free; + FatalError('Duplicate attribute', True); + end; + ExpectEq; + FCursor := attr; + ExpectAttValue; + if (FCurChar <> '>') and (FCurChar <> '/') then + ExpectWhitespace; + end; // while + end; + if FCurChar = '/' then + begin + IsEmpty := True; + GetCharRaw; end; ExpectChar('>'); - ProcessDefaultAttributes(NewElem); + + PushVC(ElDef); + // SAX: ContentHandler.StartElement(...) + // SAX: ContentHandler.StartPrefixMapping(...) if not IsEmpty then begin - if not FPreserveWhitespace then // critical for testsuite compliance - SkipWhitespace; + FCursor := NewElem; + if not FPreserveWhitespace then // critical for testsuite compliance + SkipWhitespaceRaw; ParseContent; if FCurChar = '/' then // Get ETag [42] begin GetCharRaw; + MarkTokenStart; if ExpectName <> NewElem.TagName then - RaiseExc('Unmatching element end tag (expected "</%s>")', [NewElem.TagName]); - SkipWhitespace; + FatalError('Unmatching element end tag (expected "</%s>")', [NewElem.TagName], True); + SkipWhitespaceRaw; ExpectChar('>'); - FCursor := FCursor.ParentNode; end else if FCurChar <> #0 then RaiseNameNotFound else // End of stream in content - RaiseExc('Document element not closed'); + FatalError('Closing tag is missing for ''%s''', [NewElem.TagName]); end; + // SAX: ContentHandler.EndElement(...) + // SAX: ContentHandler.EndPrefixMapping(...) + FCursor := NewElem.ParentNode; + if FCursor = doc then + FInsideRoot := False; + ProcessDefaultAttributes(NewElem); + if FValidate then + ValidateElement(NewElem); + PopVC; end; -procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement); +procedure TXMLReader.AddIdRef(Buf: PWideChar; Length: Integer); +var + w: PWideStrWrapper; +begin + New(w); + SetString(w^.Value, Buf, Length); + FIDRefs.Add(w); +end; + +procedure TXMLReader.ClearIdRefs; +var + I: Integer; +begin + for I := 0 to FIDRefs.Count-1 do + Dispose(PWideStrWrapper(FIDRefs.List^[I])); + FIDRefs.Clear; +end; + +procedure TXMLReader.ValidateIdRefs; var I: Integer; +begin + for I := 0 to FIDRefs.Count-1 do + if Doc.GetElementById(PWideStrWrapper(FIDRefs.List^[I])^.Value) = nil then + ValidationError('The ID ''%s'' does not match any element', [PWideStrWrapper(FIDRefs.List^[I])^.Value]); + ClearIDRefs; +end; + +procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement); +var + I, L, StartPos, EndPos: Integer; ElDef: TDOMElementDef; - AttDefs: TDOMNamedNodeMap; + Map: TDOMNamedNodeMap; AttDef: TDOMAttrDef; - Attr: TDOMAttrEx; - Spec: Boolean; + Attr: TDOMAttr; + AttValue: WideString; + Entity: TDOMEntity; begin - if Assigned(FDocType) then + ElDef := CurrentElementDef; + if Assigned(ElDef) and ElDef.HasAttributes then begin - ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Element.TagName)); - if Assigned(ElDef) and ElDef.HasAttributes then + Map := ElDef.Attributes; + + for I := 0 to Map.Length-1 do begin - AttDefs := ElDef.Attributes; - for I := 0 to AttDefs.Length-1 do + AttDef := Map[I] as TDOMAttrDef; + + Attr := Element.GetAttributeNode(AttDef.Name); + if Attr = nil then begin - AttDef := AttDefs[I] as TDOMAttrDef; - Spec := True; - // no validity checking yet; just append default values - Attr := TDOMAttrEx(Element.GetAttributeNode(AttDef.Name)); - if (AttDef.FDefault in [AD_DEFAULT, AD_FIXED]) and (Attr = nil) then - begin - Attr := TDOMAttrEx(AttDef.CloneNode(True)); - Element.SetAttributeNode(Attr); - Spec := False; + // attribute needs defaulting + case AttDef.FDefault of + adDefault, adFixed: begin + if FStandalone and AttDef.FExternallyDeclared then + StandaloneError; + // Cloning TDOMAttrDef creates TDOMAttr. DataType is copied. + Attr := TDOMAttr(AttDef.CloneNode(True)); + TDOMAttrDef(Attr).FSpecified := False; // Dirty hack... + TDOMAttrDef(Attr).FDeclared := True; + Element.SetAttributeNode(Attr); + end; + adRequired: ValidationError('Required attribute ''%s'' of element ''%s'' is missing',[AttDef.Name, Element.TagName]) end; - if Assigned(Attr) then - begin - Attr.FSpecified := Spec; - Attr.FNormalize := (AttDef.FDataType <> DT_CDATA); + end + else + begin + TDOMAttrDef(Attr).FDeclared := True; + AttValue := Attr.Value; // unnormalized + // now assign DataType so that value is correctly normalized + TDOMAttrDef(Attr).FDataType := AttDef.FDataType; + if FStandalone and AttDef.FExternallyDeclared and (Attr.Value <> AttValue) then + StandaloneError; + AttValue := Attr.Value; // recalculate + // TODO: what about normalization of AttDef.Value? (Currently it IS normalized) + if (AttDef.FDefault = adFixed) and (AttDef.Value <> AttValue) then + ValidationError('Value of attribute ''%s'' does not match its fixed default',[AttDef.Name]); + + if not ValidateAttrSyntax(AttDef, AttValue) then + ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name]); + end; + + if Attr = nil then + Continue; + L := Length(AttValue); + case Attr.DataType of + dtId: if not Doc.AddID(Attr) then + ValidationError('The ID ''%s'' is not unique', [AttValue]); + + dtIdRef, dtIdRefs: begin + StartPos := 1; + while StartPos <= L do + begin + EndPos := StartPos; + while (EndPos <= L) and (AttValue[EndPos] <> #32) do + Inc(EndPos); + + AddIdRef(@AttValue[StartPos], EndPos-StartPos); + StartPos := EndPos + 1; + end; + end; + + dtEntity, dtEntities: begin + StartPos := 1; + while StartPos <= L do + begin + EndPos := StartPos; + while (EndPos <= L) and (AttValue[EndPos] <> #32) do + Inc(EndPos); + Entity := TDOMEntity(FDocType.Entities.GetNamedItem(Copy(AttValue, StartPos, EndPos-StartPos))); + if (Entity = nil) or (Entity.NotationName = '') then + ValidationError('Attribute ''%s'' type mismatch', [Attr.Name]); + StartPos := EndPos + 1; + end; end; end; end; end; + // Now report undeclared attributes + if Assigned(FDocType) and Element.HasAttributes then + begin + Map := Element.Attributes; + for I := 0 to Map.Length-1 do + begin + Attr := TDOMAttr(Map[I]); + if not TDOMAttrDef(Attr).FDeclared then + ValidationError('Using undeclared attribute ''%s'' on element ''%s''',[Attr.Name, Element.TagName]); + end; + end; end; function TXMLReader.ParsePEReference: Boolean; // [69] @@ -2378,6 +2699,7 @@ begin Result := CheckForChar('%'); if Result then begin + MarkTokenStart; SkipName; ExpectChar(';'); end; @@ -2390,7 +2712,7 @@ begin begin ExpectString('SYSTEM'); ExpectWhitespace; - SkipSystemLiteral(SysID, True); + SkipSystemLiteral(SysID); Result := True; end else if FCurChar = 'P' then @@ -2399,35 +2721,513 @@ begin ExpectWhitespace; SkipPubidLiteral; SetString(PubID, FValue.Buffer, FValue.Length); + NormalizeSpaces(PubID); if SysIdOptional then begin SkipWhitespace; - SkipSystemLiteral(SysID, False); + if SkipQuotedLiteral then + SetString(SysID, FValue.Buffer, FValue.Length); end else begin ExpectWhitespace; - SkipSystemLiteral(SysID, True); + SkipSystemLiteral(SysID); end; Result := True; end else Result := False; end; -procedure TXMLReader.ValidationError(const Msg: string; - const args: array of const); +procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const); +var + E: EXMLReadError; +begin + if not FValidate then + Exit; + FDocNotValid := True; + E := EXMLReadError.CreateFmt(Msg, Args); + // TODO -cErrorReporting: No location for validity errors is reported yet + E.FErrorMessage := E.Message; + E.FSeverity := esError; + CallErrorHandler(E); + // if user handler raises exception, control won't get here + E.Free; +end; + +procedure TXMLReader.CallErrorHandler(E: EXMLReadError); +begin + try + if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then + FCtrl.FOnError(E); + if E.Severity = esFatal then + raise E; + except + if ExceptObject <> E then + E.Free; + raise; + end; +end; + +function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean; +begin + case AttrDef.FDataType of + dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11); + dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11); + dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue); + dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11); + // IsXmlName() not necessary - enum is never empty and contains valid names + dtNotation: Result := AttrDef.HasEnumToken(aValue); + else + Result := True; + end; +end; + +procedure TXMLReader.ValidateElement(Element: TDOMElement); +var + ElDef: TDOMElementDef; + elv: TElementValidator; + + procedure Traverse(node: TDOMNode); + var + cur: TDOMNode; + begin + cur := node.FirstChild; + while Assigned(cur) do + begin + case cur.NodeType of + ELEMENT_NODE: + elv.AddElement(TDOMElement(cur)); + ENTITY_REFERENCE_NODE: + Traverse(cur); + TEXT_NODE: + begin + if not TDOMText(cur).MayBeIgnorable then + ValidationError('Character data is not allowed in element-only content',[]) + else + if FStandalone and ElDef.FExternallyDeclared then + StandaloneError; + end; + end; + cur := cur.NextSibling; + end; + end; + +begin + ElDef := CurrentElementDef; + if Assigned(ElDef) and ElDef.HasElementDecl then + begin + case ElDef.ContentType of + ctEmpty: begin + if Element.HasChildNodes then + ValidationError('Element ''%s'' was declared empty but has content', [Element.TagName]); + end; + ctChildren: begin + elv := TElementValidator(FValStack.Last); + try + Traverse(Element); + if not elv.Match then + ValidationError('Content of element ''%s'' does not match its declaration',[Element.TagName]); + finally + elv.FList.Clear; + end; + end; + end; + end + else // if no DocType, a corresponding error will be reported. + if Assigned(FDocType) then + ValidationError('Using undeclared element ''%s''',[Element.TagName]); +end; + +// TODO: this should be method of TDOMDocumentTypeEx, but we must pass ErrorHandler in... +procedure TXMLReader.ValidateDTD; +var + I, J, K: Integer; + Entity: TDOMEntity; + ElDef: TDOMElementDef; + AttDef: TDOMAttrDef; + IdFound, NotationFound, HasDefault: Boolean; +begin + for I := 0 to FDocType.Entities.Length-1 do + begin + Entity := TDOMEntity(FDocType.Entities[I]); + if (Entity.NotationName <> '') then + CheckNotation(Entity.NotationName); + end; + + if Assigned(FDocType.FElementDefs) then + begin + for I := 0 to FDocType.FElementDefs.Length-1 do + begin + ElDef := TDOMElementDef(FDocType.FElementDefs[I]); + // XML spec permits ATTLIST declarations without corresponding ELEMENT. + // Such ATTLISTs are useless for validation, so here we might skip or even + // delete all ElDefs that have HasElementDecl=False. However, doing so + // breaks some stupid tests, namely sun/id04. +{ + if not ElDef.HasElementDecl then + Continue; +} + if not ElDef.HasAttributes then + Continue; + IdFound := False; + NotationFound := False; + for J := 0 to ElDef.Attributes.Length-1 do + begin + AttDef := TDOMAttrDef(ElDef.Attributes[J]); + HasDefault := AttDef.FDefault in [adDefault, adFixed]; + case AttDef.FDataType of + dtId: begin + if IdFound then + ValidationError('Only one attribute of type ID is allowed per element',[]); + IdFound := True; + if HasDefault then + ValidationError('An attribute of type ID cannot have a default value',[]); + end; + dtNotation: begin + for K := 0 to Length(AttDef.FEnumeration)-1 do + CheckNotation(AttDef.FEnumeration[K]); + if NotationFound then + ValidationError('Only one attribute of type NOTATION is allowed per element',[]); + NotationFound := True; + if ElDef.ContentType = ctEmpty then + ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]); + end; + end; // case + if HasDefault and not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then + ValidationError('Illegal attribute default', []); + end; + end; + end; +end; + +procedure TXMLReader.DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean); +var + TextNode: TDOMText; +begin + // Validating filter part (disabled for the following two reasons): + // TODO: per SAX, attribute text should not go here. + // ElDefStack is invalid in this case, and we fail... + // TODO: for testing whitespace CharRefs, they are contained in internal entities. + // Parsing first reports them to Entity, and then they are cloned to real parent + // so this method isn't called :( +{ + if FCursor.NodeType in [ELEMENT_NODE, ENTITY_REFERENCE_NODE] then + begin + ElDef := CurrentElementDef; + if Assigned(ElDef) and (ElDef.ContentType = ctChildren) then + begin + if not Whitespace then + ValidationError('Character data is not allowed in element-only content',[]) + else + if FStandalone and ElDef.FExternallyDeclared then + StandaloneError; + end; + end; +} + // Document builder part + TextNode := Doc.CreateTextNodeBuf(ch, Count); + TextNode.MayBeIgnorable := Whitespace; + FCursor.AppendChild(TextNode); +end; + +procedure TXMLReader.DoComment(ch: PWideChar; Count: Integer); +var + ElDef: TDOMElementDef; +begin + // validation filter part + if FValidate then + begin + ElDef := CurrentElementDef; + if Assigned(ElDef) and (ElDef.ContentType = ctEmpty) then + ValidationError('Comments are not allowed within EMPTY elements', []); + end; + + // DOM builder part + if (not FIgnoreComments) and Assigned(FCursor) then + FCursor.AppendChild(Doc.CreateCommentBuf(ch, Count)); +end; + +procedure TXMLReader.DoCDSect(ch: PWideChar; Count: Integer); +var + s: WideString; + ElDef: TDOMElementDef; +begin + if FValidate then + begin + ElDef := CurrentElementDef; + if Assigned(ElDef) and (ElDef.ContentType = ctChildren) then + ValidationError('CDATA sections are not allowed in element-only content',[]); + end; + if not FCDSectionsAsText then + begin + SetString(s, ch, Count); + // SAX: LexicalHandler.StartCDATA; + // SAX: ContentHandler.Characters(...); + FCursor.AppendChild(doc.CreateCDATASection(s)); + // SAX: LexicalHandler.EndCDATA; + end + else + FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count)); +end; + +procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString); +var + Notation: TDOMNotationEx; +begin + if FDocType.Notations.GetNamedItem(aName) = nil then + begin + Notation := TDOMNotationEx(TDOMNotation.Create(doc)); + Notation.FName := aName; + Notation.FPublicID := aPubID; + Notation.FSystemID := aSysID; + FDocType.Notations.SetNamedItem(Notation); + end + else + ValidationError('Duplicate notation declaration: ''%s''', [aName]); +end; + +procedure TXMLReader.PushVC(aElDef: TDOMElementDef); +begin + FValStack.Add(TElementValidator.Create(aElDef)); +end; + +procedure TXMLReader.PopVC; +var + Validator: TObject; +begin + with FValStack do + begin + Validator := TObject(Last); + Delete(Count-1); + Validator.Free; + end; +end; + +function TXMLReader.CurrentElementDef: TDOMElementDef; +begin + if FValStack.Count > 0 then + Result := TElementValidator(FValStack.Last).FElementDef + else + Result := nil; +end; + +{ TDOMAttrDef } + +function TDOMAttrDef.AddEnumToken(const aValue: WideString): Boolean; +var + I, L: Integer; +begin + // TODO: this implementaion is the slowest possible... + Result := False; + L := Length(FEnumeration); + for I := 0 to L-1 do + begin + if aValue = FEnumeration[I] then + Exit; + end; + SetLength(FEnumeration, L+1); + FEnumeration[L] := aValue; + Result := True; +end; + +function TDOMAttrDef.HasEnumToken(const aValue: WideString): Boolean; +var + I: Integer; +begin + Result := True; + if Length(FEnumeration) = 0 then + Exit; + for I := 0 to Length(FEnumeration)-1 do + begin + if FEnumeration[I] = aValue then + Exit; + end; + Result := False; +end; + +{ TElementValidator } + +procedure TElementValidator.AddElement(aNode: TDOMElement); +begin + FList.Add(aNode); +end; + +constructor TElementValidator.Create(aElDef: TDOMElementDef); +begin + inherited Create; + FElementDef := aElDef; + if Assigned(FElementDef) then + FCurCP := FElementDef.RootCP; + FList := TList.Create; +end; + +destructor TElementValidator.Destroy; +begin + FList.Free; + inherited Destroy; +end; + +function TElementValidator.IsElementAllowed(const aName: DOMString): Boolean; +var + I: Integer; +begin + Inc(FChildElementCount); + Result := True; + // if element is not declared, non-validity has been already reported, no need to report again... + if FElementDef = nil then + Exit; + { for mixed content type it is easy } + if FElementDef.ContentType = ctMixed then + begin + for I := 0 to FElementDef.RootCP.ChildCount-1 do + begin + if aName = FElementDef.RootCP.Children[I].Name then + Exit; + end; + Result := False; + Exit; + end; + { for empty, even more easier } + if FElementDef.ContentType = ctEmpty then + begin + Result := False; + Exit; + end; + + +end; + +function TElementValidator.Match: Boolean; begin - // TODO: just a stub now - FInvalid := True; + FIndex := 0; + Result := (FElementDef.RootCP.MatchNodeList(FList, FIndex)) and (FIndex = FList.Count); end; +{ TContentParticle } +function TContentParticle.Add: TContentParticle; +begin + Result := TContentParticle.Create; + Result.FParent := Self; + FChildren.Add(Result); +end; +constructor TContentParticle.Create; +begin + inherited Create; + FChildren := TList.Create; +end; + +destructor TContentParticle.Destroy; +var + I: Integer; +begin + for I := FChildren.Count-1 downto 0 do + TObject(FChildren[I]).Free; + FChildren.Free; + inherited Destroy; +end; + +function TContentParticle.GetChild(Index: Integer): TContentParticle; +begin + Result := TContentParticle(FChildren[Index]); +end; + +function TContentParticle.GetChildCount: Integer; +begin + Result := FChildren.Count; +end; + +function TContentParticle.InternalMatch(List: TList; var Index: Integer): Boolean; +var + I: Integer; + TempIndex, RestIndex, MatchNumber: Integer; +begin + if CPType = ctName then + begin + Result := (Index < List.Count) and (TDOMElement(List[Index]).TagName = Name); + if Result then + Inc(Index); + end + else if CPType = ctChoice then + begin + RestIndex := Index; + Result := False; + MatchNumber := 0; + for I := 0 to ChildCount-1 do + begin + TempIndex := Index; + if Children[I].MatchNodeList(List, TempIndex) then + begin + Result := True; + if Index <> TempIndex then // Do not count matching empty expressions + begin + Inc(MatchNumber); + if MatchNumber > 1 then + Break; + RestIndex := TempIndex; + end; + end else if MatchNumber > 1 then Break; + end; + + if Result then + Index := RestIndex; + end + else // ctSeq + begin + MatchNumber := 0; + TempIndex := Index; + Result := False; + for I := 0 to ChildCount-1 do + begin + Result := Children[I].MatchNodeList(List, TempIndex); + if not Result then Break; + end; + + if Result then + Index := TempIndex; + if MatchNumber > 1 then + Result := False; + end; +end; + +function TContentParticle.MatchNodeList(List: TList; var Index: Integer): Boolean; +var + Saved: Integer; +begin + Result := InternalMatch(List, Index) or not (CPQuant in [cqOnce, cqOnceOrMore]); + if Result and (CPQuant in [cqZeroOrMore, cqOnceOrMore]) then + begin + Saved := Index; + while Index < List.Count do + begin + if not InternalMatch(List, Saved) or (Index = Saved) then + Break; + Index := Saved; + end; + end; +end; + +{ TDOMElementDef } + +constructor TDOMElementDef.Create(aOwner: TDOMDocument); +begin + inherited Create(aOwner); + RootCP := TContentParticle.Create; +end; + +destructor TDOMElementDef.Destroy; +begin + RootCP.Free; + inherited Destroy; +end; + +{ plain calls } procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin ADoc := nil; Src := TXMLFileInputSource.Create(f); @@ -2444,7 +3244,7 @@ end; procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin ADoc := nil; Reader := TXMLReader.Create; @@ -2479,7 +3279,7 @@ end; procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin Reader := TXMLReader.Create; try @@ -2494,7 +3294,7 @@ end; procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin Reader := TXMLReader.Create; try @@ -2527,7 +3327,7 @@ end; procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin ADoc := nil; Reader := TXMLReader.Create; @@ -2544,7 +3344,7 @@ end; procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); var Reader: TXMLReader; - Src: TXMLInputSource; + Src: TXMLCharSource; begin ADoc := nil; Reader := TXMLReader.Create; @@ -2577,4 +3377,6 @@ begin end; + + end. diff --git a/packages/fcl-xml/src/xmlutils.pp b/packages/fcl-xml/src/xmlutils.pp new file mode 100644 index 0000000000..3f39d6e6a9 --- /dev/null +++ b/packages/fcl-xml/src/xmlutils.pp @@ -0,0 +1,222 @@ +{ + This file is part of the Free Component Library + + XML utility routines. + Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru + + See the file COPYING.FPC, included in this distribution, + for details about the copyright. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + **********************************************************************} +unit xmlutils; + +interface + +uses + SysUtils; + +function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean; +function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean; +function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean; +function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean; +function IsValidXmlEncoding(const Value: WideString): Boolean; +function Xml11NamePages: PByteArray; +procedure NormalizeSpaces(var Value: WideString); + +{$i names.inc} + +implementation + +var + Xml11Pg: PByteArray = nil; + +function Xml11NamePages: PByteArray; +var + I: Integer; + p: PByteArray; +begin + if Xml11Pg = nil then + begin + GetMem(p, 512); + for I := 0 to 255 do + p^[I] := ord(Byte(I) in Xml11HighPages); + p^[0] := 2; + p^[3] := $2c; + p^[$20] := $2a; + p^[$21] := $2b; + p^[$2f] := $29; + p^[$30] := $2d; + p^[$fd] := $28; + + Move(p^, p^[256], 256); + p^[$100] := $19; + p^[$103] := $2E; + p^[$120] := $2F; + Xml11Pg := p; + end; + Result := Xml11Pg; +end; + +function IsXml11Char(const Value: WideString; var Index: Integer): Boolean; +begin + if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then + begin + Inc(Index); + Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF); + end + else + Result := False; +end; + +function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean; +var + Pages: PByteArray; + I: Integer; +begin + Result := False; + if Xml11 then + Pages := Xml11NamePages + else + Pages := @NamePages; + + I := 1; + if (Value = '') or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or + (Xml11 and IsXml11Char(Value, I))) then + Exit; + Inc(I); + while I <= Length(Value) do + begin + if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or + (Xml11 and IsXml11Char(Value, I))) then + Exit; + Inc(I); + end; + Result := True; +end; + +function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean; +var + Pages: PByteArray; + I: Integer; + Offset: Integer; +begin + if Xml11 then + Pages := Xml11NamePages + else + Pages := @NamePages; + Result := False; + if Value = '' then + Exit; + I := 1; + Offset := 0; + while I <= Length(Value) do + begin + if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or + (Xml11 and IsXml11Char(Value, I))) then + begin + if (I = Length(Value)) or (Value[I] <> #32) then + Exit; + Offset := 0; + Inc(I); + Continue; + end; + Offset := $100; + Inc(I); + end; + Result := True; +end; + +function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean; +var + I: Integer; + Pages: PByteArray; +begin + if Xml11 then + Pages := Xml11NamePages + else + Pages := @NamePages; + Result := False; + if Value = '' then + Exit; + I := 1; + while I <= Length(Value) do + begin + if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or + (Xml11 and IsXml11Char(Value, I))) then + Exit; + Inc(I); + end; + Result := True; +end; + +function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean; +var + I: Integer; + Pages: PByteArray; +begin + if Xml11 then + Pages := Xml11NamePages + else + Pages := @NamePages; + I := 1; + Result := False; + if Value = '' then + Exit; + while I <= Length(Value) do + begin + if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or + (Xml11 and IsXml11Char(Value, I))) then + begin + if (I = Length(Value)) or (Value[I] <> #32) then + Exit; + end; + Inc(I); + end; + Result := True; +end; + +function IsValidXmlEncoding(const Value: WideString): Boolean; +var + I: Integer; +begin + Result := False; + if (Value = '') or (Value[1] > #255) or not (char(Value[1]) in ['A'..'Z', 'a'..'z']) then + Exit; + for I := 2 to Length(Value) do + if (Value[I] > #255) or not (char(Value[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then + Exit; + Result := True; +end; + +procedure NormalizeSpaces(var Value: WideString); +var + I, J: Integer; +begin + I := Length(Value); + // speed: trim only whed needed + if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then + Value := Trim(Value); + I := 1; + while I < Length(Value) do + begin + if Value[I] = #32 then + begin + J := I+1; + while (J <= Length(Value)) and (Value[J] = #32) do Inc(J); + if J-I > 1 then Delete(Value, I+1, J-I-1); + end; + Inc(I); + end; +end; + +initialization + +finalization + if Assigned(Xml11Pg) then + FreeMem(Xml11Pg); + +end. diff --git a/packages/fcl-xml/src/xmlwrite.pp b/packages/fcl-xml/src/xmlwrite.pp index b3314d2128..95a420cb7b 100644 --- a/packages/fcl-xml/src/xmlwrite.pp +++ b/packages/fcl-xml/src/xmlwrite.pp @@ -44,7 +44,6 @@ implementation uses SysUtils; type - TCharacters = set of Char; TSpecialCharCallback = procedure(c: WideChar) of object; TXMLWriter = class(TObject) @@ -64,7 +63,7 @@ type procedure wrtLineEnd; {$IFDEF HAS_INLINE} inline; {$ENDIF} procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF} procedure wrtQuotedLiteral(const ws: WideString); - procedure ConvWrite(const s: WideString; const SpecialChars: TCharacters; + procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar; const SpecialCharCallback: TSpecialCharCallback); procedure AttrSpecialCharCallback(c: WideChar); procedure TextNodeSpecialCharCallback(c: WideChar); @@ -297,7 +296,7 @@ const AttrSpecialChars = ['<', '"', '&', #9, #10, #13]; TextSpecialChars = ['<', '>', '&']; -procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TCharacters; +procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar; const SpecialCharCallback: TSpecialCharCallback); var StartPos, EndPos: Integer; @@ -362,6 +361,7 @@ begin COMMENT_NODE: VisitComment(node); DOCUMENT_NODE: VisitDocument(node); DOCUMENT_TYPE_NODE: VisitDocumentType(node); + ENTITY_NODE, DOCUMENT_FRAGMENT_NODE: VisitFragment(node); end; end; @@ -384,7 +384,8 @@ begin for i := 0 to node.Attributes.Length - 1 do begin attr := node.Attributes.Item[i]; - VisitAttribute(attr); + if TDOMAttr(attr).Specified then + VisitAttribute(attr); end; Child := node.FirstChild; if Child = nil then @@ -532,18 +533,19 @@ procedure TXMLWriter.VisitDocumentType(Node: TDOMNode); begin wrtStr('<!DOCTYPE '); wrtStr(Node.NodeName); + wrtChr(' '); with TDOMDocumentType(Node) do begin if PublicID <> '' then begin - wrtStr(' PUBLIC '); + wrtStr('PUBLIC '); wrtQuotedLiteral(PublicID); wrtChr(' '); wrtQuotedLiteral(SystemID); end else if SystemID <> '' then begin - wrtStr(' SYSTEM '); + wrtStr('SYSTEM '); wrtQuotedLiteral(SystemID); end; if InternalSubset <> '' then @@ -560,6 +562,7 @@ procedure TXMLWriter.VisitFragment(Node: TDOMNode); var Child: TDOMNode; begin + // TODO: TextDecl is probably needed // Fragment itself should not be written, only its children should... Child := Node.FirstChild; while Assigned(Child) do diff --git a/packages/fcl-xml/tests/xmlts.pp b/packages/fcl-xml/tests/xmlts.pp new file mode 100644 index 0000000000..62903d8a0a --- /dev/null +++ b/packages/fcl-xml/tests/xmlts.pp @@ -0,0 +1,778 @@ +{ + This file is part of the Free Component Library (FCL) + + FCL test runner for OASIS/NIST XML test suite + It is somewhat based on 'harness.js' script + (see http://xmlconf.sourceforge.net) + Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru + + See the file COPYING.FPC, included in this distribution, + for details about the copyright. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + **********************************************************************} + +program xmlts; + +{$IFDEF FPC} +{$MODE OBJFPC}{$H+} +{$ENDIF} +{$APPTYPE CONSOLE} + +uses + SysUtils, + Classes, + DOM, + XMLRead, + XMLWrite, + UriParser; + +const + harness = 'Pascal version'; + version = '0.0.1 alpha :)'; + parser = 'FCL XML parser'; + parserName = parser; + os = 'Unknown OS'; + runtime = 'FPC RTL'; + + +type + TDiagCategory = (dcInfo, dcNegfail, dcFail, dcPass); + + TTestSuite = class + private + FTemplate: TXMLDocument; + FParser: TDOMParser; + FPassed, FFailCount: Integer; + FFalsePasses: Integer; + FRootUri: string; + FTemplateName: string; + FSuiteName: string; + FDoc: TXMLDocument; + FValidating: Boolean; + FSuiteTitle: DOMString; + FState: DOMString; + FSkipped: Integer; + FTotal: Integer; + table_valid: TDOMNode; + table_output: TDOMNode; + table_invalid: TDOMNode; + table_not_wf: TDOMNode; + table_informative: TDOMNode; + FValError: string; + FTestID: DOMString; + procedure LoadTemplate(const Name: string); + procedure HandleTemplatePIs(Element: TDOMNode); + procedure Diagnose(Element, Table: TDOMNode; Category: TDiagCategory; const Error: DOMString); + procedure DiagnoseOut(const ErrorMsg: DOMString); + function CompareNodes(actual, correct: TDOMNode; out Msg: string): Boolean; + procedure Canonicalize(node: TDOMNode); + procedure ErrorHandler(Error: EXMLReadError); + public + constructor Create; + procedure Run(const Tests: string); + procedure RunTest(Element: TDOMElement); + destructor Destroy; override; + end; + +function GetBaseURI(Element: TDOMNode; const DocumentURI: string): string; +var + Ent: TDOMNode; + Uri1, Uri2, s: WideString; +begin + case Element.NodeType of + ELEMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE, + PROCESSING_INSTRUCTION_NODE, COMMENT_NODE, DOCUMENT_TYPE_NODE: + if Assigned(Element.ParentNode) + then Result := GetBaseURI(Element.ParentNode, DocumentURI) + else Result := ''; + + ATTRIBUTE_NODE: begin + Result := ''; + if Assigned(TDomAttr(Element).OwnerElement) then + begin + Result := GetBaseURI(TDomAttr(Element).OwnerElement, DocumentURI); + end; + end; + + ENTITY_REFERENCE_NODE: begin + Ent := Element.OwnerDocument.DocType.Entities.GetNamedItem(Element.NodeName); + if Assigned(Ent) and (TDOMEntity(Ent).SystemID <> '') then + begin + Uri1 := TDOMEntity(Ent).SystemID; + if IsAbsoluteURI(Uri1) then + begin + Result := Uri1; + end else begin + Uri2 := GetBaseURI(Element.ParentNode, DocumentUri); + ResolveRelativeUri(Uri2, Uri1, s); + Result := s; + end; + end + else + begin + if Assigned(Element.ParentNode) + then Result := GetBaseURI(Element.ParentNode, DocumentURI) + else Result := ''; + end; + end; + + DOCUMENT_NODE: Result := DocumentURI; + else + Result := ''; + end; +end; + +{ TTestSuite } + +constructor TTestSuite.Create; +begin + inherited Create; + FParser := TDOMParser.Create; + FParser.Options.PreserveWhitespace := True; +end; + +procedure TTestSuite.ErrorHandler(Error: EXMLReadError); +begin + if Error.Severity = esError then + begin + FValError := Error.Message; +{ uncomment the line below to verify that the suite correctly handles + exception raised from the handler } +// Abort; + end; +end; + +procedure TTestSuite.LoadTemplate(const Name: string); +var + tables: TDOMNodeList; + I: Integer; + id: DOMString; + el: TDOMElement; +begin + ReadXMLFile(FTemplate, Name); + tables := FTemplate.DocumentElement.GetElementsByTagName('table'); + try + for I := 0 to tables.Count-1 do + begin + el := TDOMElement(tables.Item[I]); + id := el['id']; + if id = 'valid' then + table_valid := el + else if ((id = 'invalid-negative') and FValidating) or ((id = 'invalid-positive') and not FValidating) then + table_invalid := el + else if id = 'valid-output' then + table_output := el + else if id = 'not-wf' then + table_not_wf := el + else if id = 'error' then + table_informative := el; + end; + finally + tables.Free; + end; +end; + +destructor TTestSuite.Destroy; +begin + FDoc.Free; + FTemplate.Free; + FParser.Free; + inherited; +end; + +procedure TTestSuite.HandleTemplatePIs(Element: TDOMNode); +var + Children: TDOMNodeList; + Child: TDOMNode; + NewChild: TDOMNode; + Remove: Boolean; + Index: Integer; + Data: DOMString; +begin + Children := element.childNodes; + Remove := False; + Index := 0; + + repeat + Child := Children.Item[Index]; + if Child = nil then Break; + Inc(index); + + // inside a rejected <?if ...?>...<?endif?> + if Remove and (child.nodeType <> PROCESSING_INSTRUCTION_NODE) then + begin + Element.removeChild(child); + Dec(Index); + Continue; + end; + if Child.hasChildNodes then + begin + HandleTemplatePIs(Child); + Continue; + end; + + if Child.nodeType <> PROCESSING_INSTRUCTION_NODE then + Continue; + + Data := Child.NodeValue; + + if Child.NodeName = 'run-id' then + begin + if Data = 'name' then + newChild := FTemplate.createTextNode(parser) + else if Data = 'description' then + newChild := FTemplate.createTextNode (parserName) + else if Data = 'general-entities' then + newChild := FTemplate.createTextNode('included') + else if Data = 'parameter-entities' then + newChild := FTemplate.createTextNode ('included') + else if Data = 'type' then + begin + if FValidating then + Data := 'Validating' + else + Data := 'Non-Validating'; + newChild := FTemplate.createTextNode(Data); + end + // ... test run description + else if Data = 'date' then + newChild := FTemplate.createTextNode(DateTimeToStr(Now)) + else if Data = 'harness' then + newChild := FTemplate.createTextNode(harness) + else if Data = 'java' then + newChild := FTemplate.createTextNode(runtime) + else if Data = 'os' then + newChild := FTemplate.createTextNode(os) + else if Data = 'testsuite' then + newChild := FTemplate.createTextNode(FSuiteTitle) + else if Data = 'version' then + newChild := FTemplate.createTextNode(version) + // ... test result info + else if Data = 'failed' then + newChild := FTemplate.createTextNode(IntToStr(FFailCount)) + else if Data = 'passed' then + newChild := FTemplate.createTextNode(IntToStr(FPassed)) + else if Data = 'passed-negative' then + newChild := FTemplate.createTextNode(IntToStr(FFalsePasses)) + else if Data = 'skipped' then + newChild := FTemplate.createTextNode(IntToStr(FSkipped)) + else if Data = 'status' then + newChild := FTemplate.createTextNode (FState); + + Element.replaceChild (newChild, child); + Continue; + end + + // if/endif don't nest, and always have the same parent + // we rely on those facts here! + else if Child.NodeName = 'if' then + begin + Remove := not (((Data = 'validating') and FValidating) or + ((Data = 'nonvalidating') and not FValidating)); + element.removeChild(child); + Dec(Index); + Continue; + end + else if Child.NodeName = 'endif' then + begin + Remove := False; + element.removeChild(child); + Dec(Index); + Continue; + end; + until False; + Children.Free; +end; + + +procedure TTestSuite.Run(const Tests: string); +var + Cases: TDOMNodeList; + I: Integer; +begin + FRootURI := FilenameToURI(Tests); + ReadXMLFile(FDoc, Tests); + FSuiteTitle := FDoc.DocumentElement['PROFILE']; + Cases := FDoc.DocumentElement.GetElementsByTagName('TEST'); + writeln('Using test suite: ', Tests); + writeln; + writeln('Testing, validation = ', FValidating); + try + for I := 0 to Cases.Count-1 do + RunTest(Cases.Item[I] as TDOMElement); + I := Cases.Count; + finally + Cases.Free; + end; + + FPassed := FTotal-FFailCount; + Dec(FPassed, FSkipped); + + writeln('Found ', I, ' basic test cases.'); + writeln('Found ', FTotal, ' overall test cases.'); + writeln('Skipped: ', FSkipped); + writeln('Passed: ', FPassed); + writeln('Failed: ', FFailCount); + writeln('Negative passes: ', FFalsePasses, ' (need examination).'); + writeln; + + if FPassed = 0 then + FState := 'N/A' + else if FPassed = FTotal then + FState := 'CONFORMS (provisionally)' + else + FState := 'DOES NOT CONFORM'; + +end; + +procedure TTestSuite.RunTest(Element: TDOMElement); +var + s: UTF8string; + TestType: DOMString; + TempDoc, RefDoc: TXMLDocument; + table: TDOMNode; + Positive: Boolean; + outURI: UTF8string; + FailMsg: string; + docNode, refNode: TDOMNode; + docMap, refMap: TDOMNamedNodeMap; + docN, refN: TDOMNotation; + I: Integer; + root: UTF8String; +begin + FTestID := Element['ID']; + TestType := Element['TYPE']; + root := GetBaseURI(Element, FRootUri); + ResolveRelativeURI(root, UTF8Encode(Element['URI']), s); + + table := nil; + outURI := ''; + if TestType = 'not-wf' then + begin + table := table_not_wf; + Positive := False; + end + else if TestType = 'error' then + begin + table := table_informative; + Positive := False; + end + else if TestType = 'valid' then + begin + if Element.hasAttribute('OUTPUT') then + ResolveRelativeURI(root, UTF8Encode(Element['OUTPUT']), outURI); + table := table_valid; + Positive := True; + end + else if TestType = 'invalid' then + begin + table := table_invalid; + Positive := not FValidating; + end; + + if TestType <> 'error' then + begin + Inc(FTotal); + if outURI <> '' then Inc(FTotal); + end; + + FailMsg := ''; + FValError := ''; + TempDoc := nil; + try + try + FParser.Options.Validate := FValidating; + FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler; + FParser.ParseUri(s, TempDoc); + except + on E: Exception do + if E.ClassType <> EAbort then + FailMsg := E.Message; + end; + if FailMsg <> '' then // fatal errors take precedence + FValError := ''; + + if not Positive then // must have been failed + begin + if TestType = 'error' then + begin + if FailMsg <> '' then + Diagnose(element, table, dcInfo, FailMsg) + else + Diagnose(element, table, dcInfo, FValError); + end + else if (FailMsg = '') and (FValError = '') then + begin + Inc(FFailCount); + Diagnose(element, table, dcNegfail, ''); + end + else // FailMsg <> '' or FValError <> '' -> actually failed + begin + Inc(FFalsePasses); + if FailMsg <> '' then + Diagnose(Element, table, dcPass, FailMsg) + else + Diagnose(Element, table, dcPass, FValError); + end; + Exit; + end + else // must have been succeeded + if (FailMsg <> '') or (FValError <> '') then + begin + Inc(FFailCount); + if FailMsg <> '' then + Diagnose(Element, table, dcFail, FailMsg) + else + Diagnose(Element, table, dcFail, FValError); + if (outURI <> '') and (FailMsg <> '') then + begin + Inc(FFailCount); + DiagnoseOut('[ input failed, no output to test ]'); + end; + Exit; + end; + + if outURI = '' then Exit; + Canonicalize(TempDoc); + TempDoc.DocumentElement.Normalize; + try + // reference data must be parsed in non-validating mode because it contains DTDs + // only when Notations need to be reported + FParser.Options.Validate := False; + FParser.ParseUri(outURI, RefDoc); + try + docNode := TempDoc.FirstChild; + refNode := RefDoc.FirstChild; + repeat + if refNode = nil then + begin + if docNode <> nil then + begin + Inc(FFailCount); + DiagnoseOut('Extra data: ' + docNode.NodeName + ' / ' + docNode.NodeValue); + end; + Exit; + end; + if docNode = nil then + begin + Inc(FFailCount); + DiagnoseOut('Missing data: ' + refNode.NodeName + ' / ' + refNode.NodeValue); + Exit; + end; + + if refNode.NodeType = DOCUMENT_TYPE_NODE then + begin + if docNode.NodeType <> DOCUMENT_TYPE_NODE then + begin + Inc(FFailCount); + DiagnoseOut('[ no doctype from parsing testcase ]'); + Exit; + end; + + refMap := TDOMDocumentType(refNode).Notations; + docMap := TDOMDocumentType(docNode).Notations; + + for I := 0 to refMap.Length-1 do + begin + refN := TDOMNotation(refMap[I]); + docN := TDOMNotation(docMap.GetNamedItem(refMap[I].NodeName)); + if not Assigned(docN) then + begin + Inc(FFailCount); + DiagnoseOut('missing notation declaration: ' + refN.NodeName); + Exit; + end; + if (refN.PublicID <> docN.PublicID) or (refN.SystemID <> docN.SystemID) then + begin + Inc(FFailCount); + DiagnoseOut('incorrect notation declaration: ' + refN.NodeName); + Exit; + end; + end; + + refNode := refNode.NextSibling; + docNode := docNode.NextSibling; + Continue; + end; + + if docNode.NodeType = DOCUMENT_TYPE_NODE then // skip DocType + docNode := docNode.NextSibling; + + if not CompareNodes(docNode, refNode, FailMsg) then + begin + Inc(FFailCount); + DiagnoseOut(FailMsg); + Exit; + end; + + docNode := docNode.NextSibling; + refNode := refNode.NextSibling; + until False; + finally + RefDoc.Free; + end; + except + on E: Exception do + begin + Inc(FFailCount); + DiagnoseOut('[ can''t read reference data: '+E.Message+' ]'); + end; + end; + finally + TempDoc.Free; + end; +end; + + +procedure TTestSuite.Diagnose(Element, Table: TDOMNode; Category: TDiagCategory; + const Error: DOMString); +var + tr, td, txt, tmp: TDOMNode; + s: DOMString; +begin + tr := FTemplate.CreateElement('tr'); + if Assigned(Element) then // column 1: section/chapter, if known + begin + s := TDOMElement(Element)['SECTIONS']; + td := FTemplate.CreateElement('td'); + td.AppendChild(FTemplate.CreateTextNode(s)); + tr.AppendChild(td); + end; + + td := FTemplate.CreateElement('td'); // column 2: test ID + td.AppendChild(FTemplate.CreateTextNode(FTestID)); + tr.AppendChild(td); + // third column is description + if Assigned(Element) then + begin + td := FTemplate.CreateElement('td'); + txt := Element.FirstChild; + while Assigned(txt) do + begin + td.AppendChild(txt.CloneNode(true, FTemplate)); + txt := txt.NextSibling; + end; + tr.AppendChild(td); + end; + // fourth column is reason + td := FTemplate.CreateElement('td'); + if Element = nil then + s := Error + else if Category <> dcInfo then + begin + if Error <> '' then + begin + if FValError <> '' then + s := '(error) ' + Error + else + s := '(fatal) ' + Error; + end + else + s := '[wrongly accepted]'; + end + else // informative + begin + if Error <> '' then + s := Error + else + s := '[accepted]'; + end; + // TODO: use   if text is empty + txt := FTemplate.CreateTextNode(s); + + if (Category <> dcPass) and (Category <> dcInfo) then + begin + tmp := FTemplate.CreateElement('em'); + tmp.AppendChild(txt); + txt := tmp; + TDOMElement(td)['bgcolor'] := '#ffaacc'; + end; + td.AppendChild(txt); + tr.AppendChild(td); + + table.AppendChild(tr); +end; + +procedure TTestSuite.DiagnoseOut(const ErrorMsg: DOMString); +var + tr, td, txt: TDOMNode; +begin + tr := FTemplate.CreateElement('tr'); + + td := FTemplate.CreateElement('td'); + td.AppendChild(FTemplate.CreateTextNode(FTestID)); + tr.AppendChild(td); + + td := FTemplate.CreateElement('td'); + txt := FTemplate.CreateElement('em'); + txt.AppendChild(FTemplate.CreateTextNode(ErrorMsg)); + td.AppendChild(txt); + TDOMElement(td)['bgcolor'] := '#ffaacc'; + tr.AppendChild(td); + table_output.AppendChild(tr); +end; + + +procedure TTestSuite.Canonicalize(node: TDOMNode); +var + child, work: TDOMNode; + Frag: TDOMDocumentFragment; +begin + child := node.FirstChild; + while Assigned(child) do + begin + if child.NodeType = CDATA_SECTION_NODE then + begin + work := node.OwnerDocument.CreateTextNode(child.NodeValue); + node.ReplaceChild(work, child); + child := work; + end + else if child.NodeType = COMMENT_NODE then + begin + work := child.NextSibling; + node.RemoveChild(child); + child := work; + Continue; + end + else if child.NodeType = ENTITY_REFERENCE_NODE then + begin + Frag := node.OwnerDocument.CreateDocumentFragment; + try + work := child.FirstChild; + while Assigned(work) do + begin + Frag.AppendChild(work.CloneNode(true)); + work := work.NextSibling; + end; + work := Frag.FirstChild; // references may be nested + if work = nil then + work := Child.PreviousSibling; + + node.ReplaceChild(Frag, child); + child := work; + finally + Frag.Free; + end; + Continue; + end; + if child.HasChildNodes then + Canonicalize(child); + child := child.NextSibling; + end; +end; + +function TTestSuite.CompareNodes(actual, correct: TDOMNode; + out Msg: string): Boolean; +var + actAtts, refAtts: TDOMNamedNodeMap; + actList, refList: TDOMNodeList; + I: Integer; + s1, s2: DOMString; +begin + Msg := ''; + Result := False; + if actual.NodeType <> correct.NodeType then + FmtStr(Msg, 'actual.NodeType (%d) != correct.NodeType (%d)', [actual.NodeType, correct.NodeType]) + else if actual.NodeName <> correct.NodeName then + FmtStr(Msg, 'actual.NodeName (%s) != correct.NodeName (%s)', [actual.NodeName, correct.NodeName]) + else if actual.NodeValue <> correct.NodeValue then + FmtStr(Msg, 'actual.NodeValue (%s) != correct.NodeValue (%s)', [actual.NodeValue, correct.NodeValue]); + if Msg <> '' then + Exit; + + if actual.NodeType = ELEMENT_NODE then + begin + // first, compare attributes + actAtts := actual.Attributes; + refAtts := correct.Attributes; + if actAtts.Length <> refAtts.Length then + begin + FmtStr(Msg, 'Element ''%s'': attributes.length (%d) != %d', [actual.NodeName, actAtts.Length, refAtts.Length]); + Exit; + end; + for I := 0 to actAtts.Length -1 do + begin + s1 := refAtts.GetNamedItem(actAtts[I].NodeName).NodeValue; + s2 := actAtts[I].NodeValue; + if s1 <> s2 then + begin + FmtStr(Msg, 'Element ''%s'', attribute ''%s'': actual.AttValue (%s) != correct.AttValue (%s)', [actual.NodeName, actAtts[I].NodeName, s2, s1]); + Exit; + end; + end; + // next, compare children + actList := actual.ChildNodes; + refList := correct.ChildNodes; + try + if actList.Count <> refList.Count then + begin + FmtStr(Msg, 'Element ''%s'': actual.ChildNodeCount (%d) != correct.ChildNodeCount (%d)', [actual.NodeName, actList.Count, refList.Count]); + Exit; + end; + for I := 0 to actList.Count -1 do + if not CompareNodes(actList[I], refList[I], Msg) then + Exit; + finally + actList.Free; + refList.Free; + end; + end; + Result := True; +end; + + + +var + i: Integer; + s: string; + SuiteName, ReportName, TemplateName: string; + Validation: Boolean; +begin + writeln('FCL driver for OASIS/NIST XML Test Suite'); + writeln('Copyright (c) 2006 by Sergei Gorelkin'); + TemplateName := ExtractFilePath(ParamStr(0)) + 'template.xml'; + if ParamCount < 2 then + begin + writeln; + writeln('Usage: ', ParamStr(0), ' <suite> <report> [-t template][-v]'); + writeln(' -t: specify report template'); + writeln(' -v: validating mode'); + Exit; + end; + + SuiteName := ExpandFilename(ParamStr(1)); + ReportName := ExpandFilename(ParamStr(2)); + i := 3; + Validation := False; + while i <= ParamCount do + begin + s := Lowercase(ParamStr(i)); + if s = '-v' then + Validation := True + else if s = '-t' then + TemplateName := ExpandFileName(ParamStr(i+1)); + Inc(i); + end; + + with TTestSuite.Create do + try + FSuiteName := SuiteName; + FTemplateName := TemplateName; + FValidating := Validation; + LoadTemplate(FTemplateName); + if Assigned(FTemplate) then + begin + Run(FSuiteName); + HandleTemplatePIs(FTemplate.DocumentElement); + writeln('Writing report to: ', ReportName); + WriteXMLFile(FTemplate, ReportName); + end; + finally + Free; + end; + +end. |