summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormichael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2>2007-03-10 16:34:09 +0000
committermichael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2>2007-03-10 16:34:09 +0000
commit356f0425b78e204cc4d44c344b82ceb639187da4 (patch)
tree142d51a48b6f8dcca0ee0c40deccce88ad281322
parent1a4a6e70d69b9caa6380c03c7dca889e57b4ea76 (diff)
downloadfpc-356f0425b78e204cc4d44c344b82ceb639187da4.tar.gz
Merged revisions 6749 via svnmerge from
svn+ssh://svn.freepascal.org/FPC/svn/fpc/trunk ........ r6749 | michael | 2007-03-08 10:40:00 +0100 (Thu, 08 Mar 2007) | 11 lines * Patch from Sergei Gorelkin + DTD validation + Correct reporting of the position of most fatal errors + TDOMDocument.CreateElement and others check their arguments for validity (INVALID_CHARACTER_ERR is reported where specification says) + property TDOMAttr.DataType + implemented TDOMDocument.GetElementByID * Common code moved to xmlutils.pp * whitespace in PublicID literals is normalized ........ git-svn-id: http://svn.freepascal.org/svn/fpc/branches/fixes_2_2@6771 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r--packages/fcl-xml/Makefile105
-rw-r--r--packages/fcl-xml/Makefile.fpc2
-rw-r--r--packages/fcl-xml/src/dom.pp240
-rw-r--r--packages/fcl-xml/src/sax_html.pp3
-rw-r--r--packages/fcl-xml/src/xmlread.pp2028
-rw-r--r--packages/fcl-xml/src/xmlutils.pp222
-rw-r--r--packages/fcl-xml/src/xmlwrite.pp15
-rw-r--r--packages/fcl-xml/tests/xmlts.pp778
8 files changed, 2673 insertions, 720 deletions
diff --git a/packages/fcl-xml/Makefile b/packages/fcl-xml/Makefile
index 0e62405ccb..80d3e64b10 100644
--- a/packages/fcl-xml/Makefile
+++ b/packages/fcl-xml/Makefile
@@ -233,157 +233,160 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
override PACKAGE_NAME=fcl-xml
override PACKAGE_VERSION=2.0.0
ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-go32v2)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-os2)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-beos)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-netbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-solaris)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-qnx)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-netware)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-openbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-wdosx)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-darwin)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-emx)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-watcom)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-netwlibc)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-wince)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-symbian)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-freebsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-netbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-amiga)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-atari)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-openbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-palmos)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),m68k-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-netbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-amiga)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-macos)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-darwin)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-morphos)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),sparc-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),sparc-netbsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),sparc-solaris)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),sparc-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),x86_64-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),x86_64-freebsd)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),x86_64-win64)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),x86_64-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-palmos)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-wince)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-gba)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-nds)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),arm-symbian)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc64-linux)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+endif
+ifeq ($(FULL_TARGET),powerpc64-darwin)
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),powerpc64-embedded)
-override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+override TARGET_UNITS+=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
endif
ifeq ($(FULL_TARGET),i386-linux)
override TARGET_RSTS+=sax xpath
diff --git a/packages/fcl-xml/Makefile.fpc b/packages/fcl-xml/Makefile.fpc
index cbe75d6c47..2beddcb06d 100644
--- a/packages/fcl-xml/Makefile.fpc
+++ b/packages/fcl-xml/Makefile.fpc
@@ -7,7 +7,7 @@ name=fcl-xml
version=2.0.0
[target]
-units=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
+units=sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath
rsts=sax xpath
[require]
diff --git a/packages/fcl-xml/src/dom.pp b/packages/fcl-xml/src/dom.pp
index 752137ab17..b0c6f1eed5 100644
--- a/packages/fcl-xml/src/dom.pp
+++ b/packages/fcl-xml/src/dom.pp
@@ -109,8 +109,10 @@ type
// DOMString
// -------------------------------------------------------
+ TSetOfChar = set of Char;
DOMString = WideString;
DOMPChar = PWideChar;
+ PDOMString = ^DOMString;
EDOMError = class(Exception)
public
@@ -205,6 +207,8 @@ type
function GetNodeType: Integer; virtual; abstract;
function GetTextContent: DOMString; virtual;
procedure SetTextContent(const AValue: DOMString); virtual;
+ function GetLocalName: DOMString; virtual;
+ function GetNamespaceURI: DOMString; virtual;
public
constructor Create(AOwner: TDOMDocument);
destructor Destroy; override;
@@ -239,15 +243,13 @@ type
function HasAttributes: Boolean; virtual;
procedure Normalize;
- (*
- // TODO: What is that Java NULL for strings ???
// always '' for nodes other than ELEMENT and ATTRIBUTE
// as well as for nodes created with DOM 1 methods
- property NamespaceURI: DOMString read GetNamespaceURI;
-
+ //property NamespaceURI: DOMString read GetNamespaceURI;
+ //property LocalName: DOMString read GetLocalName;
+ (*
// Prefix may only be changed if it was specified at creation time.
property Prefix: DOMString read FPrefix (write SetPrefix?);
- property LocalName: DOMString read FLocalName;
*)
// DOM level 3
property TextContent: DOMString read GetTextContent write SetTextContent;
@@ -406,12 +408,17 @@ type
TDOMDocument = class(TDOMNode_WithChildren)
protected
+ FIDList: TList;
FRevision: Integer;
+ FXML11: Boolean;
FImplementation: TDOMImplementation;
function GetDocumentElement: TDOMElement;
function GetDocType: TDOMDocumentType;
function GetNodeType: Integer; override;
function GetNodeName: DOMString; override;
+ function IndexOfNS(const nsURI: DOMString): Integer;
+ function FindID(const aID: DOMString; out Index: LongWord): Boolean;
+ procedure ClearIDList;
public
property DocType: TDOMDocumentType read GetDocType;
property Impl: TDOMImplementation read FImplementation;
@@ -444,17 +451,24 @@ type
// Extensions to DOM interface:
// TODO: obsolete now, but must check for usage dependencies
constructor Create;
+ destructor Destroy; override;
+ function AddID(Attr: TDOMAttr): Boolean;
+ procedure RemoveID(Attr: TDOMAttr);
end;
TXMLDocument = class(TDOMDocument)
+ private
+ FXMLVersion: DOMString;
+ procedure SetXMLVersion(const aValue: DOMString);
public
// These fields are extensions to the DOM interface:
- XMLVersion, Encoding, StylesheetType, StylesheetHRef: DOMString;
+ Encoding, StylesheetType, StylesheetHRef: DOMString;
function CreateCDATASection(const data: DOMString): TDOMCDATASection; override;
function CreateProcessingInstruction(const target, data: DOMString):
TDOMProcessingInstruction; override;
function CreateEntityReference(const name: DOMString): TDOMEntityReference; override;
+ property XMLVersion: DOMString read FXMLVersion write SetXMLVersion;
end;
@@ -462,12 +476,27 @@ type
// Attr
// -------------------------------------------------------
+ TAttrDataType = (
+ dtCdata,
+ dtId,
+ dtIdRef,
+ dtIdRefs,
+ dtEntity,
+ dtEntities,
+ dtNmToken,
+ dtNmTokens,
+ dtNotation
+ );
+
TDOMAttr = class(TDOMNode_WithChildren)
protected
FName: DOMString;
- FSpecified: Boolean;
- FNormalize: Boolean;
FOwnerElement: TDOMElement;
+ // TODO: following 3 - replace with a link to AttDecl ??
+ // ('specified' isn't related...)
+ FSpecified: Boolean;
+ FDeclared: Boolean;
+ FDataType: TAttrDataType;
function GetNodeValue: DOMString; override;
function GetNodeType: Integer; override;
function GetNodeName: DOMString; override;
@@ -477,10 +506,10 @@ type
property Name: DOMString read FName;
property Specified: Boolean read FSpecified;
property Value: DOMString read GetNodeValue write SetNodeValue;
- // Introduced in DOM level 2:
property OwnerElement: TDOMElement read FOwnerElement;
// extensions
function CompareName(const AName: DOMString): Integer; override;
+ property DataType: TAttrDataType read FDataType;
end;
@@ -532,11 +561,16 @@ type
TDOMText = class(TDOMCharacterData)
protected
+ // set by parser if text contains only literal whitespace (i.e. not coming from CharRefs)
+ FMayBeIgnorable: Boolean;
function GetNodeType: Integer; override;
function GetNodeName: DOMString; override;
+ procedure SetNodeValue(const aValue: DOMString); override;
public
function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
function SplitText(offset: LongWord): TDOMText;
+ // Extension
+ property MayBeIgnorable: Boolean read FMayBeIgnorable write FMayBeIgnorable;
end;
@@ -577,10 +611,12 @@ type
FSystemID: DOMString;
FInternalSubset: DOMString;
FEntities, FNotations: TDOMNamedNodeMap;
+ FElementDefs: TDOMNamedNodeMap;
function GetEntities: TDOMNamedNodeMap;
function GetNotations: TDOMNamedNodeMap;
function GetNodeType: Integer; override;
function GetNodeName: DOMString; override;
+ function GetElementDefs: TDOMNamedNodeMap;
public
destructor Destroy; override;
function CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode; overload; override;
@@ -591,6 +627,8 @@ type
property PublicID: DOMString read FPublicID;
property SystemID: DOMString read FSystemID;
property InternalSubset: DOMString read FInternalSubset;
+ // extensions
+ property ElementDefs: TDOMNamedNodeMap read GetElementDefs;
end;
@@ -669,6 +707,15 @@ type
implementation
+uses
+ xmlutils;
+
+type
+ PIDItem = ^TIDItem;
+ TIDItem = record
+ ID: WideString;
+ Element: TDOMElement;
+ end;
constructor TRefClass.Create;
begin
@@ -881,7 +928,8 @@ begin
if Assigned(Txt) then
begin
tmp := Child.NextSibling;
- Txt.AppendData(Child.nodeValue);
+ Txt.AppendData(TDOMText(Child).Data);
+ Txt.FMayBeIgnorable := Txt.FMayBeIgnorable and TDOMText(Child).FMayBeIgnorable;
RemoveChild(Child);
Child := tmp;
end
@@ -910,6 +958,16 @@ begin
NodeValue := AValue;
end;
+function TDOMNode.GetNamespaceURI: DOMString;
+begin
+ Result := '';
+end;
+
+function TDOMNode.GetLocalName: DOMString;
+begin
+ Result := '';
+end;
+
function CompareDOMStrings(const s1, s2: DOMPChar; l1, l2: integer): integer;
var i: integer;
begin
@@ -935,16 +993,12 @@ end;
function CompareDOMNodeWithDOMNode(Node1, Node2: Pointer): integer;
begin
- Result:=CompareDOMStrings(DOMPChar(TDOMNode(Node1).NodeName),
- DOMPChar(TDOMNode(Node2).NodeName),
- length(TDOMNode(Node1).NodeName),
- length(TDOMNode(Node2).NodeName)
- );
+ Result := TDOMNode(Node1).CompareName(TDOMNode(Node2).NodeName);
end;
function CompareDOMStringWithDOMNode(AKey, ANode: Pointer): integer;
begin
- Result := TDOMNode(ANode).CompareName(DOMString(AKey));
+ Result := TDOMNode(ANode).CompareName(PDOMString(AKey)^);
end;
@@ -1047,14 +1101,11 @@ end;
function TDOMNode_WithChildren.ReplaceChild(NewChild, OldChild: TDOMNode):
TDOMNode;
begin
-// Inc(FOwnerDocument.FRevision); // invalidate nodelists (will happen anyway)
-
RemoveFromChildNodeTree(OldChild);
InsertBefore(NewChild, OldChild);
if Assigned(OldChild) then
RemoveChild(OldChild);
- // TODO: must return OldChild, if I understand that right...
- // but OldChild is destroyed!
+ // TODO: per DOM spec, must return OldChild, but OldChild is destroyed
Result := NewChild;
end;
@@ -1167,7 +1218,7 @@ var AVLNode: TAVLTreeNode;
begin
Result:=nil;
if FChildNodeTree<>nil then begin
- AVLNode:=FChildNodeTree.FindKey(DOMPChar(ANodeName),
+ AVLNode:=FChildNodeTree.FindKey(Pointer(ANodeName),
@CompareDOMStringWithDOMNode);
if AVLNode<>nil then
Result:=TDOMNode(AVLNode.Data);
@@ -1289,7 +1340,7 @@ begin
BuildList;
if index < LongWord(FList.Count) then
- Result := TDOMNode(FList[index])
+ Result := TDOMNode(FList.List^[index])
else
Result := nil;
end;
@@ -1618,6 +1669,77 @@ begin
inherited Create(nil);
// TODO: DOM lvl 2 states that Document should be unowned. Any dependencies?
FOwnerDocument := Self;
+ FIDList := TList.Create;
+end;
+
+destructor TDOMDocument.Destroy;
+begin
+ ClearIDList;
+ FIDList.Free;
+ inherited Destroy;
+end;
+
+function TDOMDocument.AddID(Attr: TDOMAttr): Boolean;
+var
+ I: Cardinal;
+ Item: PIDItem;
+begin
+ New(Item);
+ Item^.ID := Attr.Value;
+ Item^.Element := Attr.OwnerElement;
+ if not FindID(Item^.ID, I) then
+ begin
+ FIDList.Insert(I, Item);
+ Result := True;
+ end
+ else
+ begin
+ Dispose(Item);
+ Result := False;
+ end;
+end;
+
+procedure TDOMDocument.RemoveID(Attr: TDOMAttr);
+begin
+ // TODO: Implement this
+end;
+
+function TDOMDocument.FindID(const aID: DOMString; out Index: LongWord): Boolean;
+var
+ L, H, I, C: Integer;
+ P: PIDItem;
+begin
+ Result := False;
+ L := 0;
+ H := FIDList.Count - 1;
+ while L <= H do
+ begin
+ I := (L + H) shr 1;
+ P := PIDItem(FIDList.List^[I]);
+ C := CompareDOMStrings(PWideChar(aID), PWideChar(P^.ID), Length(aID), Length(P^.ID));
+ if C > 0 then L := I + 1 else
+ begin
+ H := I - 1;
+ if C = 0 then
+ begin
+ Result := True;
+ L := I;
+ end;
+ end;
+ end;
+ Index := L;
+end;
+
+procedure TDOMDocument.ClearIDList;
+var
+ I: Integer;
+begin
+ if Assigned(FIDList) then
+ begin
+ for I := 0 to FIDList.Count-1 do
+ Dispose(PIDItem(FIDList.List^[I]));
+ FIDList.Clear;
+ end;
end;
function TDOMDocument.GetNodeType: Integer;
@@ -1652,6 +1774,8 @@ end;
function TDOMDocument.CreateElement(const tagName: DOMString): TDOMElement;
begin
+ if not IsXmlName(tagName, FXML11) then
+ raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateElement');
Result := TDOMElement.Create(Self);
Result.FNodeName := tagName;
// TODO: attach default attributes
@@ -1710,6 +1834,8 @@ end;
function TDOMDocument.CreateAttribute(const name: DOMString): TDOMAttr;
begin
+ if not IsXmlName(name, FXML11) then
+ raise EDOMError.Create(INVALID_CHARACTER_ERR, 'DOMDocument.CreateAttribute');
Result := TDOMAttr.Create(Self);
Result.FName := name;
end;
@@ -1718,6 +1844,7 @@ function TDOMDocument.CreateAttributeBuf(Buf: DOMPChar; Length: Integer): TDOMAt
begin
Result := TDOMAttr.Create(Self);
SetString(Result.FName, Buf, Length);
+ Result.FSpecified := True;
end;
function TDOMDocument.CreateEntityReference(const name: DOMString):
@@ -1753,13 +1880,15 @@ begin
Result := nil;
end;
-function TDOMDocument.GetElementById(
- const ElementID: DOMString): TDOMElement;
+function TDOMDocument.GetElementById(const ElementID: DOMString): TDOMElement;
+var
+ I: Cardinal;
begin
// TODO: Implement TDOMDocument.GetElementById
- // "Implementations that do not know whether attributes are
- // of type ID or not are expected to return null"
- Result := nil;
+ if FindID(ElementID, I) then
+ Result := PIDItem(FIDList.List^[I])^.Element
+ else
+ Result := nil;
end;
function TDOMDocument.ImportNode(ImportedNode: TDOMNode;
@@ -1770,6 +1899,12 @@ begin
Result := nil;
end;
+function TDOMDocument.IndexOfNS(const nsURI: DOMString): Integer;
+begin
+ // TODO: implement
+ Result := -1;
+end;
+
function TXMLDocument.CreateCDATASection(const data: DOMString):
TDOMCDATASection;
@@ -1781,6 +1916,8 @@ end;
function TXMLDocument.CreateProcessingInstruction(const target,
data: DOMString): TDOMProcessingInstruction;
begin
+ if not IsXmlName(target, FXML11) then
+ raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateProcessingInstruction');
Result := TDOMProcessingInstruction.Create(Self);
Result.FTarget := target;
Result.FNodeValue := data;
@@ -1789,10 +1926,17 @@ end;
function TXMLDocument.CreateEntityReference(const name: DOMString):
TDOMEntityReference;
begin
+ if not IsXmlName(name, FXML11) then
+ raise EDOMError.Create(INVALID_CHARACTER_ERR, 'XMLDocument.CreateEntityReference');
Result := TDOMEntityReference.Create(Self);
Result.FName := name;
end;
+procedure TXMLDocument.SetXMLVersion(const aValue: DOMString);
+begin
+ FXMLVersion := aValue;
+ FXML11 := (aValue = '1.1');
+end;
// -------------------------------------------------------
// Attr
@@ -1813,31 +1957,16 @@ begin
// Cloned attribute is always specified and carries its children
Result := ACloneOwner.CreateAttribute(FName);
TDOMAttr(Result).FSpecified := True;
- TDOMAttr(Result).FNormalize := FNormalize;
+ TDOMAttr(Result).FDataType := FDataType;
+ // Declared = ?
CloneChildren(Result, ACloneOwner);
end;
function TDOMAttr.GetNodeValue: DOMString;
-var
- I,J: Integer;
begin
Result := GetTextContent;
- // TODO: probably must be speed optimized
- if FNormalize then
- begin
- Result := Trim(Result);
- I := 1;
- while I < Length(Result) do
- begin
- if Result[I] = #32 then
- begin
- J := I+1;
- while (J <= Length(Result)) and (Result[J] = #32) do Inc(J);
- if J-I > 1 then Delete(Result, I+1, J-I-1);
- end;
- Inc(I);
- end;
- end;
+ if FDataType <> dtCdata then
+ NormalizeSpaces(Result);
end;
procedure TDOMAttr.SetNodeValue(const AValue: DOMString);
@@ -1945,7 +2074,7 @@ procedure TDOMElement.RemoveAttributeNS(const namespaceURI,
localName: DOMString);
begin
// TODO: Implement TDOMElement.RemoveAttributeNS
- raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS');
+ raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS');
end;
procedure TDOMElement.SetAttributeNS(const namespaceURI, qualifiedName,
@@ -2057,9 +2186,17 @@ begin
Result := '#text';
end;
+procedure TDOMText.SetNodeValue(const aValue: DOMString);
+begin
+ // TODO: may analyze aValue, but this will slow things down...
+ FMayBeIgnorable := False;
+ FNodeValue := aValue;
+end;
+
function TDOMText.CloneNode(deep: Boolean; ACloneOwner: TDOMDocument): TDOMNode;
begin
Result := ACloneOwner.CreateTextNode(FNodeValue);
+ TDOMText(Result).FMayBeIgnorable := FMayBeIgnorable;
end;
function TDOMText.SplitText(offset: LongWord): TDOMText;
@@ -2069,6 +2206,7 @@ begin
Result := TDOMText.Create(FOwnerDocument);
Result.FNodeValue := Copy(FNodeValue, offset + 1, Length);
+ Result.FMayBeIgnorable := FMayBeIgnorable;
FNodeValue := Copy(FNodeValue, 1, offset);
FParentNode.InsertBefore(Result, FNextSibling);
end;
@@ -2132,6 +2270,7 @@ destructor TDOMDocumentType.Destroy;
begin
FEntities.Free;
FNotations.Free;
+ FElementDefs.Free;
inherited Destroy;
end;
@@ -2159,6 +2298,13 @@ begin
Result := FNotations;
end;
+function TDOMDocumentType.GetElementDefs: TDOMNamedNodeMap;
+begin
+ if FElementDefs = nil then
+ FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE);
+ Result := FElementDefs;
+end;
+
// -------------------------------------------------------
// Notation
// -------------------------------------------------------
diff --git a/packages/fcl-xml/src/sax_html.pp b/packages/fcl-xml/src/sax_html.pp
index f210be8b91..9db74b2bcc 100644
--- a/packages/fcl-xml/src/sax_html.pp
+++ b/packages/fcl-xml/src/sax_html.pp
@@ -29,7 +29,7 @@ unit SAX_HTML;
interface
-uses SysUtils, Classes, SAX, DOM, DOM_HTML;
+uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs;
type
@@ -111,7 +111,6 @@ procedure ReadHTMLFragment(AParentNode: TDOMNode; var f: TStream);
implementation
-uses HTMLDefs;
const
WhitespaceChars = [#9, #10, #13, ' '];
diff --git a/packages/fcl-xml/src/xmlread.pp b/packages/fcl-xml/src/xmlread.pp
index 0c247279b7..3a99dba008 100644
--- a/packages/fcl-xml/src/xmlread.pp
+++ b/packages/fcl-xml/src/xmlread.pp
@@ -26,7 +26,20 @@ uses
SysUtils, Classes, DOM;
type
- EXMLReadError = class(Exception);
+ TErrorSeverity = (esWarning, esError, esFatal);
+
+ EXMLReadError = class(Exception)
+ private
+ FSeverity: TErrorSeverity;
+ FErrorMessage: string;
+ FLine: Integer;
+ FLinePos: Integer;
+ public
+ property Severity: TErrorSeverity read FSeverity;
+ property ErrorMessage: string read FErrorMessage;
+ property Line: Integer read FLine;
+ property LinePos: Integer read FLinePos;
+ end;
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
@@ -43,97 +56,135 @@ procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
+type
+ TDOMParseOptions = class(TObject)
+ private
+ FValidate: Boolean;
+ FPreserveWhitespace: Boolean;
+ FExpandEntities: Boolean;
+ FIgnoreComments: Boolean;
+ FCDSectionsAsText: Boolean;
+ public
+ property Validate: Boolean read FValidate write FValidate;
+ property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
+ property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
+ property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
+ property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
+ end;
+
+ // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
+ TXMLContextAction = (xaAppendAsChildren, xaReplaceChildren, xaInsertBefore,
+ xaInsertAfter, xaReplace);
+
+ TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
+
+ // This may be augmented with ByteOffset, UTF8Offset, etc.
+ TLocation = record
+ Line: Integer;
+ LinePos: Integer;
+ end;
+
+ TXMLInputSource = class(TObject)
+ private
+ FStream: TStream;
+ FStringData: string;
+// FBaseURI: WideString;
+ FSystemID: WideString;
+ FPublicID: WideString;
+// FEncoding: string;
+ public
+ constructor Create(AStream: TStream); overload;
+ constructor Create(const AStringData: string); overload;
+ property Stream: TStream read FStream;
+ property StringData: string read FStringData;
+// property BaseURI: WideString read FBaseURI write FBaseURI;
+ property SystemID: WideString read FSystemID write FSystemID;
+ property PublicID: WideString read FPublicID write FPublicID;
+// property Encoding: string read FEncoding write FEncoding;
+ end;
+
+ TDOMParser = class(TObject)
+ private
+ FOptions: TDOMParseOptions;
+ FOnError: TXMLErrorEvent;
+ public
+ constructor Create;
+ destructor Destroy; override;
+ procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
+ procedure ParseUri(const URI: WideString; out ADoc: TXMLDocument);
+ function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
+ Action: TXMLContextAction): TDOMNode;
+ property Options: TDOMParseOptions read FOptions;
+ property OnError: TXMLErrorEvent read FOnError write FOnError;
+ end;
+
+
// =======================================================
implementation
uses
- UriParser;
-
-type
- TSetOfChar = set of Char;
+ UriParser, xmlutils;
const
- Letter = ['A'..'Z', 'a'..'z'];
- Digit = ['0'..'9'];
PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
'-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
'#', '@', '$', '_', '%'];
type
TDOMNotationEx = class(TDOMNotation);
- TDOMAttrEx = class(TDOMAttr);
-
- TXMLInputSource = class;
+ TDOMDocumentTypeEx = class(TDOMDocumentType);
TDOMElementDef = class;
+ TDOMAttrDef = class;
TDOMEntityEx = class(TDOMEntity)
protected
- FInternal: Boolean;
+ FExternallyDeclared: Boolean;
FResolved: Boolean;
FOnStack: Boolean;
FReplacementText: DOMString;
+ FStartLocation: TLocation;
end;
- // TODO: Do I need PEMap in DocType? Maybe move it to Reader itself?
- // (memory usage - they are not needed after parsing)
- TDOMDocumentTypeEx = class(TDOMDocumentType)
- private
- FHasPERefs: Boolean;
- FPEMap: TDOMNamedNodeMap;
- FElementDefs: TDOMNamedNodeMap;
- function GetPEMap: TDOMNamedNodeMap;
- function GetElementDefs: TDOMNamedNodeMap;
- protected
- property PEMap: TDOMNamedNodeMap read GetPEMap;
- property ElementDefs: TDOMNamedNodeMap read GetElementDefs;
- property HasPERefs: Boolean read FHasPERefs write FHasPERefs;
- public
- destructor Destroy; override;
- end;
-
- TXMLReader = class;
TDecoder = class;
TDecoderRef = class of TDecoder;
- TXMLInputSource = class
+ TXMLCharSource = class(TObject)
private
FBuf: PChar;
FBufEnd: PChar;
FEof: Boolean;
FSurrogate: WideChar;
- FReader: TXMLReader;
- FParent: TXMLInputSource;
+ FReader: TObject; // weak reference
+ FParent: TXMLCharSource;
FEntity: TObject; // weak reference
FCursor: TObject; // weak reference
- FLine: Integer;
- FColumn: Integer;
+ FSavedLocation: TLocation;
FSystemID: WideString;
FPublicID: WideString;
function GetSystemID: WideString;
function GetPublicID: WideString;
- protected
- procedure FetchData; virtual;
public
constructor Create(const AData: WideString);
function NextChar: WideChar; virtual;
procedure Initialize; virtual;
- procedure SetEncoding(const AEncoding: string); virtual;
+ function SetEncoding(const AEncoding: string): Boolean; virtual;
property SystemID: WideString read GetSystemID write FSystemID;
property PublicID: WideString read GetPublicID write FPublicID;
end;
- TXMLDecodingSource = class(TXMLInputSource)
+ TXMLDecodingSource = class(TXMLCharSource)
private
FDecoder: TDecoder;
FSeenCR: Boolean;
function InternalNextChar: WideChar;
- procedure DecodingError(const Msg: string); overload;
- procedure DecodingError(const Msg: string; const Args: array of const); overload;
+ procedure DecodingError(const Msg: string);
+ protected
+ procedure FetchData; virtual;
public
destructor Destroy; override;
function NextChar: WideChar; override;
- procedure SetEncoding(const AEncoding: string); override;
+ function SetEncoding(const AEncoding: string): Boolean; override;
procedure Initialize; override;
end;
@@ -195,51 +246,120 @@ type
MaxLength: Integer;
end;
- TEntityResolveEvent = procedure(const PubID, SysID: WideString; var Source: TXMLInputSource) of object;
+ PWideStrWrapper = ^TWideStrWrapper;
+ TWideStrWrapper = record
+ Value: WideString;
+ end;
+
TDeclType = (dtNone, dtXml, dtText);
+ TCPType = (ctName, ctChoice, ctSeq);
+ TCPQuant = (cqOnce, cqZeroOrOnce, cqZeroOrMore, cqOnceOrMore);
+
+ TContentParticle = class(TObject)
+ private
+ FParent: TContentParticle;
+ FChildren: TList;
+ function InternalMatch(List: TList; var Index: Integer): Boolean;
+ function GetChildCount: Integer;
+ function GetChild(Index: Integer): TContentParticle;
+ public
+ CPType: TCPType;
+ CPQuant: TCPQuant;
+ Name: WideString;
+ constructor Create;
+ destructor Destroy; override;
+ function Add: TContentParticle;
+ function MatchNodeList(List: TList; var Index: Integer): Boolean;
+ property ChildCount: Integer read GetChildCount;
+ property Children[Index: Integer]: TContentParticle read GetChild;
+ end;
+
+ // This class is intended to store context information during parsing
+ // However, right now it's written to validate completely parsed elements
+ TElementValidator = class(TObject)
+ private
+ FParent: TElementValidator;
+ // to be deleted
+ FList: TList;
+ FElementDef: TDOMElementDef;
+ FIndex: Integer;
+ FChildElementCount: Integer;
+ FCurCP: TContentParticle;
+ FAmbiguous: Boolean;
+ public
+ constructor Create(aElDef: TDOMElementDef);
+ destructor Destroy; override;
+ // to be deleted
+ procedure AddElement(aNode: TDOMElement);
+ // to be deleted
+ function Match: Boolean;
+ function IsElementAllowed(const aName: DOMString): Boolean;
+ property Parent: TElementValidator read FParent write FParent;
+ end;
+
TXMLReader = class
private
- FSource: TXMLInputSource;
+ FSource: TXMLCharSource;
+ FCtrl: TDOMParser;
FCurChar: WideChar;
- FWhitespace: Boolean;
FXML11: Boolean;
+ FIntSubset: Boolean;
+ FDtdParsed: Boolean;
+ FInsideRoot: Boolean;
+ FRecognizePE: Boolean;
+ FHavePERefs: Boolean;
+ FDocNotValid: Boolean;
FValue: TWideCharBuf;
FName: TWideCharBuf;
FCopyBuf: PWideCharBuf;
- FIntSubset: Boolean;
FAllowedDecl: TDeclType;
- FDtdParsed: Boolean;
- FRecognizePE: Boolean;
+ FLocation: TLocation;
+ FTokenStart: TLocation;
FStandalone: Boolean; // property of Doc ?
- FInvalid: Boolean;
- // TODO: This array must be stored globally, not per instance
FNamePages: PByteArray;
FForbiddenAscii: TSetOfChar;
FDocType: TDOMDocumentTypeEx; // a shortcut
- FEntityLevel: Integer;
+ FPEMap: TDOMNamedNodeMap;
+ FIDRefs: TList;
+
+ FValidate: Boolean; // parsing options, copy of FCtrl.Options
FPreserveWhitespace: Boolean;
- FCreateEntityRefs: Boolean;
+ FExpandEntities: Boolean;
+ FIgnoreComments: Boolean;
+ FCDSectionsAsText: Boolean;
+
procedure RaiseExpectedQmark;
procedure GetChar;
procedure GetCharRaw;
- procedure Unget(wc: WideChar);
- procedure Initialize(ASource: TXMLInputSource);
- procedure InitializeRoot(ASource: TXMLInputSource);
+ procedure UngetCurChar;
+ procedure Initialize(ASource: TXMLCharSource);
+ procedure InitializeRoot(ASource: TXMLCharSource);
procedure DoParseAttValue(Delim: WideChar);
procedure DoParseFragment;
- procedure DoParseExtSubset(ASource: TXMLInputSource);
+ procedure DoParseExtSubset(ASource: TXMLCharSource);
function ContextPush(AEntity: TDOMEntityEx): Boolean;
function ContextPop: Boolean;
procedure XML11_BuildTables;
function XML11_CheckName: Boolean;
+ procedure ParseQuantity(CP: TContentParticle);
+ procedure MarkTokenStart;
+ function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
+ procedure AddIdRef(Buf: PWideChar; Length: Integer);
+ procedure ClearIdRefs;
+ procedure ValidateIdRefs;
+ procedure StandaloneError;
+ procedure CallErrorHandler(E: EXMLReadError);
protected
FCursor: TDOMNode;
+ // TODO: probably TObjectList
+ FValStack: TList; // validation: keep track of models
- procedure RaiseExc(const descr: String); overload;
- procedure RaiseExc(const descr: string; const args: array of const); overload;
- procedure RaiseExc(Expected: WideChar); overload;
+ procedure FatalError(const descr: String; AtTokenStart: Boolean=False); overload;
+ procedure FatalError(const descr: string; const args: array of const; AtTokenStart: Boolean=False); overload;
+ procedure FatalError(Expected: WideChar); overload;
function SkipWhitespace: Boolean;
+ function SkipWhitespaceRaw: Boolean;
procedure ExpectWhitespace;
procedure ExpectString(const s: String);
procedure ExpectChar(wc: WideChar);
@@ -255,14 +375,13 @@ type
function SkipQuotedLiteral: Boolean;
procedure ExpectAttValue; // [10]
procedure SkipPubidLiteral; // [12]
- procedure SkipSystemLiteral(out Literal: WideString; Required: Boolean);
+ procedure SkipSystemLiteral(out Literal: WideString);
procedure ParseComment; // [15]
procedure ParsePI; // [16]
procedure ParseCDSect; // [18]
procedure ParseXmlOrTextDecl(TextDecl: Boolean);
function ParseEq: Boolean; // [25]
procedure ExpectEq;
- procedure ParseMisc; // [27]
procedure ParseDoctypeDecl; // [28]
procedure ParseMarkupDecl; // [29]
procedure ParseElement; // [39]
@@ -277,57 +396,72 @@ type
SysIdOptional: Boolean): Boolean;
procedure ProcessTextAndRefs;
- procedure AssertPENesting(CurrentLevel: Integer);
+ procedure BadPENesting;
procedure ParseEntityDecl;
procedure ParseEntityDeclValue(Delim: WideChar);
procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq;
- procedure ParseMixedOrChildren;
+ procedure ExpectChoiceOrSeq(CP: TContentParticle);
procedure ParseElementDecl;
procedure ParseNotationDecl;
- function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
+ function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLCharSource): Boolean;
procedure ProcessDefaultAttributes(Element: TDOMElement);
+
+ procedure PushVC(aElDef: TDOMElementDef);
+ procedure PopVC;
+ function CurrentElementDef: TDOMElementDef;
+ procedure ValidateElement(Element: TDOMElement);
+ procedure ValidateDTD;
procedure ValidationError(const Msg: string; const args: array of const);
+ procedure CheckNotation(const Name: WideString);
+ // Some SAX-alike stuff (at a very early stage)
+ procedure DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean=False);
+ procedure DoComment(ch: PWideChar; Count: Integer);
+ procedure DoCDSect(ch: PWideChar; Count: Integer);
+ procedure DoNotationDecl(const aName, aPubID, aSysID: WideString);
public
doc: TDOMDocument;
- constructor Create;
+ constructor Create; overload;
+ constructor Create(AParser: TDOMParser); overload;
destructor Destroy; override;
- procedure ProcessXML(ASource: TXMLInputSource); // [1]
- procedure ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
- procedure ProcessDTD(ASource: TXMLInputSource); // ([29])
- end;
-
- // AttDef/ElementDef support
- TAttrDataType = (
- DT_CDATA,
- DT_ID,
- DT_IDREF,
- DT_IDREFS,
- DT_ENTITY,
- DT_ENTITIES,
- DT_NMTOKEN,
- DT_NMTOKENS,
- DT_NOTATION
- );
+ procedure ProcessXML(ASource: TXMLCharSource); // [1]
+ procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
+ procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
+ end;
+
+ // Attribute/Element declarations
TAttrDefault = (
- AD_IMPLIED,
- AD_DEFAULT,
- AD_REQUIRED,
- AD_FIXED
+ adImplied,
+ adDefault,
+ adRequired,
+ adFixed
+ );
+
+ TElementContentType = (
+ ctEmpty,
+ ctAny,
+ ctMixed,
+ ctChildren
);
TDOMAttrDef = class(TDOMAttr)
protected
- FDataType: TAttrDataType;
+ FExternallyDeclared: Boolean;
FDefault: TAttrDefault;
- // FEnumeration: TWideStringList? array of WideStrings?
+ FEnumeration: array of WideString;
+ function AddEnumToken(const aValue: WideString): Boolean;
+ function HasEnumToken(const aValue: WideString): Boolean;
end;
- TDOMElementDef = class(TDOMElement);
-
-
-{$i names.inc}
+ TDOMElementDef = class(TDOMElement)
+ public
+ FExternallyDeclared: Boolean;
+ ContentType: TElementContentType;
+ HasElementDecl: Boolean;
+ RootCP: TContentParticle;
+ constructor Create(aOwner: TDOMDocument);
+ destructor Destroy; override;
+ end;
// TODO: List of registered/supported decoders
function FindDecoder(const Encoding: string): TDecoderRef;
@@ -343,12 +477,12 @@ procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
begin
ABuffer.MaxLength := ALength;
ABuffer.Length := 0;
- ABuffer.Buffer:=AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
+ ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
end;
procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
var
- OldLength : integer;
+ OldLength: Integer;
begin
if ABuffer.Length >= ABuffer.MaxLength then
begin
@@ -361,67 +495,102 @@ begin
Inc(ABuffer.Length);
end;
-function IsValidEncName(const s: WideString): Boolean;
-var
- I: Integer;
+{ TXMLInputSource }
+
+constructor TXMLInputSource.Create(AStream: TStream);
begin
- Result := False;
- if (s = '') or (s[1] > #255) or not (char(s[1]) in ['A'..'Z', 'a'..'z']) then
- Exit;
- for I := 2 to Length(s) do
- if (s[I] > #255) or not (char(s[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
- Exit;
- Result := True;
+ inherited Create;
+ FStream := AStream;
end;
-{ TDOMDocumentTypeEx }
+constructor TXMLInputSource.Create(const AStringData: string);
+begin
+ inherited Create;
+ FStringData := AStringData;
+end;
-destructor TDOMDocumentTypeEx.Destroy;
+{ TDOMParser }
+
+constructor TDOMParser.Create;
begin
- FPEMap.Free;
- FElementDefs.Free;
+ FOptions := TDOMParseOptions.Create;
+end;
+
+destructor TDOMParser.Destroy;
+begin
+ FOptions.Free;
inherited Destroy;
end;
-function TDOMDocumentTypeEx.GetElementDefs: TDOMNamedNodeMap;
+procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
+var
+ InputSrc: TXMLCharSource;
begin
- if FElementDefs = nil then
- FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE);
- Result := FElementDefs;
+ with TXMLReader.Create(Self) do
+ try
+ InputSrc := nil;
+ if Assigned(Src) then
+ begin
+ if Assigned(Src.FStream) then
+ InputSrc := TXMLStreamInputSource.Create(Src.FStream, False)
+ else if Src.FStringData <> '' then
+ InputSrc := TXMLStreamInputSource.Create(TStringStream.Create(Src.FStringData), True)
+ else if (Src.SystemID <> '') then
+ ResolveEntity(Src.SystemID, Src.PublicID, InputSrc);
+ end;
+ if Assigned(InputSrc) then
+ ProcessXML(InputSrc)
+ else
+ FatalError('No input source specified');
+ finally
+ ADoc := TXMLDocument(doc);
+ Free;
+ end;
end;
-function TDOMDocumentTypeEx.GetPEMap: TDOMNamedNodeMap;
+procedure TDOMParser.ParseUri(const URI: WideString; out ADoc: TXMLDocument);
+var
+ Src: TXMLCharSource;
begin
- if FPEMap = nil then
- FPEMap := TDOMNamedNodeMap.Create(Self, ENTITY_NODE);
- Result := FPEMap;
+ ADoc := nil;
+ with TXMLReader.Create(Self) do
+ try
+ if ResolveEntity(URI, '', Src) then
+ ProcessXML(Src);
+ finally
+ ADoc := TXMLDocument(doc);
+ Free;
+ end;
end;
+function TDOMParser.ParseWithContext(Src: TXMLInputSource;
+ Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
+begin
+ // TODO: implement
+ Result := nil;
+end;
// TODO: These classes still cannot be considered as the final solution...
{ TXMLInputSource }
-constructor TXMLInputSource.Create(const AData: WideString);
+constructor TXMLCharSource.Create(const AData: WideString);
begin
inherited Create;
FBuf := PChar(PWideChar(AData));
FBufEnd := FBuf + Length(AData) * sizeof(WideChar);
end;
-procedure TXMLInputSource.Initialize;
+procedure TXMLCharSource.Initialize;
begin
- FLine := 1;
- FColumn := 0;
end;
-function TXMLInputSource.NextChar: WideChar;
+function TXMLCharSource.NextChar: WideChar;
begin
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
- end
+ Result := FSurrogate;
+ FSurrogate := #0;
+ if Result <> #0 then
+ Exit
else if FBufEnd <= FBuf then
begin
Result := #0;
@@ -432,38 +601,14 @@ begin
Result := PWideChar(FBuf)^;
Inc(FBuf, sizeof(WideChar));
end;
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
-end;
-
-procedure TXMLDecodingSource.DecodingError(const Msg: string);
-begin
- FReader.RaiseExc(Msg);
end;
-procedure TXMLDecodingSource.DecodingError(const Msg: string;
- const Args: array of const);
+function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
begin
- FReader.RaiseExc(Msg, Args);
+ Result := True; // always succeed
end;
-procedure TXMLInputSource.FetchData;
-begin
- FEof := True;
-end;
-
-procedure TXMLInputSource.SetEncoding(const AEncoding: string);
-begin
- // do nothing
-end;
-
-function TXMLInputSource.GetPublicID: WideString;
+function TXMLCharSource.GetPublicID: WideString;
begin
if FPublicID <> '' then
Result := FPublicID
@@ -473,7 +618,7 @@ begin
Result := '';
end;
-function TXMLInputSource.GetSystemID: WideString;
+function TXMLCharSource.GetSystemID: WideString;
begin
if FSystemID <> '' then
Result := FSystemID
@@ -491,21 +636,27 @@ begin
inherited Destroy;
end;
+procedure TXMLDecodingSource.DecodingError(const Msg: string);
+begin
+ TXMLReader(FReader).FatalError(Msg);
+end;
+
+procedure TXMLDecodingSource.FetchData;
+begin
+ FEof := True;
+end;
+
function TXMLDecodingSource.InternalNextChar: WideChar;
begin
// TODO: find a place for it, finally...
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
+ Result := FSurrogate;
+ FSurrogate := #0;
+ if Result <> #0 then
Exit;
- end;
if FBufEnd <= FBuf then
FetchData;
if not FEof then
- Result := FDecoder.DecodeNext
- else
- Result := #0;
+ Result := FDecoder.DecodeNext;
end;
function TXMLDecodingSource.NextChar: WideChar;
@@ -513,7 +664,7 @@ begin
Result := InternalNextChar;
if FSeenCR then
begin
- if (Result = #10) or ((Result = #$85) and FReader.FXML11) then
+ if (Result = #10) or ((Result = #$85) and TXMLReader(FReader).FXML11) then
Result := InternalNextChar;
FSeenCR := False;
end;
@@ -524,21 +675,12 @@ begin
end;
#$85, #$2028:
- if FReader.FXML11 then
+ if TXMLReader(FReader).FXML11 then
Result := #10;
end;
- if (Result < #256) and (char(Result) in FReader.FForbiddenAscii) or
- ((ord(Result) or 1) = $FFFF) then
+ if (Result < #256) and (char(Result) in TXMLReader(FReader).FForbiddenAscii) or
+ (Result >= #$FFFE) then
DecodingError('Invalid character');
-
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
end;
procedure TXMLDecodingSource.Initialize;
@@ -570,20 +712,19 @@ begin
FDecoder := TUTF8Decoder.Create(Self);
end;
-procedure TXMLDecodingSource.SetEncoding(const AEncoding: string);
+function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
var
NewDecoder: TDecoderRef;
begin
+ Result := True;
if FDecoder.Supports(AEncoding) then // no change needed
Exit;
// hardcoded stuff - special case of UCS2
if FDecoder is TUCS2Decoder then
begin
// check for 'UTF-16LE' or 'UTF-16BE'
- if SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding) then
- Exit
- else
- DecodingError('Current encoding cannot be switched to ''%s''', [AEncoding]);
+ Result := SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding);
+ Exit;
end;
NewDecoder := FindDecoder(AEncoding);
if Assigned(NewDecoder) then
@@ -592,7 +733,7 @@ begin
FDecoder := NewDecoder.Create(Self);
end
else
- DecodingError('Encoding ''%s'' is not supported', [AEncoding]);
+ Result := False;
end;
@@ -725,16 +866,16 @@ begin
Inc(FBuf);
if Result < #$80 then
Exit;
- if Byte(Result) and $40 = 0 then
- DecodingError('Invalid UTF8 sequence start byte');
+ if Word(Result) and $40 = 0 then
+ DecodingError('Invalid UTF-8 sequence start byte');
bc := 1;
- if Byte(Result) and $20 <> 0 then
+ if Word(Result) and $20 <> 0 then
begin
Inc(bc);
- if Byte(Result) and $10 <> 0 then
+ if Word(Result) and $10 <> 0 then
begin
Inc(bc);
- if Byte(Result) and $8 <> 0 then
+ if Word(Result) and $8 <> 0 then
DecodingError('UCS4 character out of supported range');
end;
end;
@@ -747,7 +888,7 @@ begin
while bc > 0 do
begin
if ord(FBuf[0]) and $C0 <> $80 then
- DecodingError('Invalid byte in UTF8 sequence');
+ DecodingError('Invalid byte in UTF-8 sequence');
Value := (Value shl 6) or (Cardinal(FBuf[0]) and $3F);
Inc(FBuf);
Dec(bc);
@@ -755,7 +896,7 @@ begin
Value := Value and MaxCode[I];
// RFC2279 check
if Value <= MaxCode[I-1] then
- DecodingError('Invalid UTF8 sequence');
+ DecodingError('Invalid UTF-8 sequence');
case Value of
0..$D7FF, $E000..$FFFF:
begin
@@ -780,17 +921,27 @@ end;
{ TXMLReader }
-function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
+procedure TXMLReader.MarkTokenStart;
+begin
+ FTokenStart := FLocation;
+end;
+
+function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLCharSource): Boolean;
var
AbsSysID: WideString;
Filename: string;
Stream: TStream;
begin
- Result := False;
+ Result := True;
+ if Assigned(FSource) then
+ Result := ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID)
+ else
+ AbsSysID := SystemID;
- if ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID) then
+ if Result then
begin
Source := nil;
+ Result := False;
// TODO: alternative resolvers
if URIToFilename(AbsSysID, Filename) then
begin
@@ -808,102 +959,130 @@ begin
end;
end;
-procedure TXMLReader.InitializeRoot(ASource: TXMLInputSource);
+procedure TXMLReader.InitializeRoot(ASource: TXMLCharSource);
begin
Initialize(ASource);
GetChar;
// TODO: presence of BOM must prevent UTF-8 encoding from being changed
- CheckForChar(#$FEFF); // skip BOM, if one is present
+ if CheckForChar(#$FEFF) then // skip BOM, if one is present
+ Dec(FLocation.LinePos);
end;
-procedure TXMLReader.Initialize(ASource: TXMLInputSource);
+procedure TXMLReader.Initialize(ASource: TXMLCharSource);
begin
FSource := ASource;
FSource.FReader := Self;
+ FLocation.Line := 1;
+ FLocation.LinePos := 0; // TODO: or 1?
FSource.Initialize;
end;
procedure TXMLReader.GetCharRaw;
begin
FCurChar := FSource.NextChar;
- FWhitespace := (FCurChar = #32) or (FCurChar = #10) or
- (FCurChar = #9) or (FCurChar = #13);
- // Used for handling the internal DTD subset
- if Assigned(FCopyBuf) and (FSource.FParent = nil) then
- BufAppend(FCopyBuf^, FCurChar);
+ if FCurChar = #10 then
+ begin
+ Inc(FLocation.Line);
+ FLocation.LinePos := 0;
+ end
+ else
+ Inc(FLocation.LinePos);
end;
procedure TXMLReader.GetChar;
begin
GetCharRaw;
+ // Used for handling the internal DTD subset
+ if Assigned(FCopyBuf) and (FSource.FParent = nil) then
+ BufAppend(FCopyBuf^, FCurChar);
if not FRecognizePE then
Exit;
if (FCurChar = #0) and ContextPop then
begin
- Unget(FCurChar);
+ UngetCurChar;
FCurChar := #32;
- FWhitespace := True;
end
else if FCurChar = '%' then
begin
- FCurChar := FSource.NextChar;
+ GetCharRaw;
if not CheckName then
begin
- Unget(FCurChar);
+ UngetCurChar;
FCurChar := '%';
Exit;
end;
if FCurChar = ';' then // "%pe1;%pe2" - must not recognize pe2 immediately!
GetCharRaw
else
- RaiseExc(WideChar(';'));
+ FatalError(WideChar(';'));
StartPE;
FCurChar := #32;
- FWhitespace := True;
end;
end;
-procedure TXMLReader.Unget(wc: WideChar);
+procedure TXMLReader.UngetCurChar;
begin
- FSource.FSurrogate := wc;
+ FSource.FSurrogate := FCurChar;
end;
procedure TXMLReader.RaiseExpectedQmark;
begin
- RaiseExc('Expected single or double quote');
+ FatalError('Expected single or double quote');
end;
-procedure TXMLReader.RaiseExc(Expected: WideChar);
+procedure TXMLReader.FatalError(Expected: WideChar);
begin
// FIX: don't output what is found - anything may be found, including exploits...
- RaiseExc('Expected "%1s"', [string(Expected)]);
+ FatalError('Expected "%1s"', [string(Expected)]);
end;
-procedure TXMLReader.RaiseExc(const descr: String);
+procedure TXMLReader.FatalError(const descr: String; AtTokenStart: Boolean);
+var
+ RealLocation: ^TLocation;
+ E: EXMLReadError;
begin
- raise EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, FSource.FLine, FSource.FColumn, descr]);
+ if AtTokenStart then
+ RealLocation := @FTokenStart
+ else
+ RealLocation := @FLocation;
+ E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, RealLocation^.Line, RealLocation^.LinePos, descr]);
+ E.FSeverity := esFatal;
+ E.FErrorMessage := descr;
+ E.FLine := RealLocation^.Line;
+ E.FLinePos := RealLocation^.LinePos;
+ CallErrorHandler(E);
end;
-procedure TXMLReader.RaiseExc(const descr: string; const args: array of const);
+procedure TXMLReader.FatalError(const descr: string; const args: array of const; AtTokenStart: Boolean);
begin
- RaiseExc(Format(descr, args));
+ FatalError(Format(descr, args), AtTokenStart);
end;
function TXMLReader.SkipWhitespace: Boolean;
begin
Result := False;
- while FWhitespace do
+ while (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) do
begin
GetChar;
Result := True;
end;
end;
+function TXMLReader.SkipWhitespaceRaw: Boolean;
+begin
+ Result := False;
+ while (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) do
+ begin
+ GetCharRaw;
+ Result := True;
+ end;
+end;
+
procedure TXMLReader.ExpectWhitespace;
begin
if not SkipWhitespace then
- RaiseExc('Expected whitespace');
+ FatalError('Expected whitespace');
end;
procedure TXMLReader.ExpectChar(wc: WideChar);
@@ -911,7 +1090,7 @@ begin
if FCurChar = wc then
GetChar
else
- RaiseExc(wc);
+ FatalError(wc);
end;
procedure TXMLReader.ExpectString(const s: String);
@@ -921,7 +1100,7 @@ begin
for I := 1 to Length(s) do
begin
if FCurChar <> WideChar(s[i]) then
- RaiseExc('Expected "%s"', [s]);
+ FatalError('Expected "%s"', [s]);
GetChar;
end;
end;
@@ -952,83 +1131,83 @@ end;
constructor TXMLReader.Create;
begin
inherited Create;
- // Naming bitmap: Point to static data for XML 1.0,
- // and allocate buffer in XML11_BuildTables when necessary.
- FNamePages := @NamePages;
BufAllocate(FName, 128);
BufAllocate(FValue, 512);
+ FIDRefs := TList.Create;
+ FValStack := TList.Create;
+
+ // Set char rules to XML 1.0
+ FNamePages := @NamePages;
FForbiddenAscii := [#1..#8, #11..#12, #14..#31];
- // TODO: put under user control
- FPreserveWhitespace := True;
- FCreateEntityRefs := True;
+end;
+
+constructor TXMLReader.Create(AParser: TDOMParser);
+begin
+ Create;
+ FCtrl := AParser;
+ FValidate := FCtrl.Options.Validate;
+ FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
+ FExpandEntities := FCtrl.Options.ExpandEntities;
+ FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
+ FIgnoreComments := FCtrl.Options.IgnoreComments;
end;
destructor TXMLReader.Destroy;
+var
+ I: Integer;
begin
- if FXML11 then
- FreeMem(FNamePages);
FreeMem(FName.Buffer);
FreeMem(FValue.Buffer);
while ContextPop do; // clean input stack
FSource.Free;
+ FPEMap.Free;
+ for I := FValStack.Count-1 downto 0 do
+ TObject(FValStack[I]).Free;
+ FValStack.Free;
+ ClearIDRefs;
+ FIDRefs.Free;
inherited Destroy;
end;
procedure TXMLReader.XML11_BuildTables;
-var
- I: Integer;
begin
- if not FXML11 then
- GetMem(FNamePages, 512);
+ FNamePages := Xml11NamePages;
FXML11 := True;
- for I := 0 to 255 do
- FNamePages^[I] := ord(Byte(I) in Xml11HighPages);
- FNamePages^[0] := 2;
- FNamePages^[3] := $2c;
- FNamePages^[$20] := $2a;
- FNamePages^[$21] := $2b;
- FNamePages^[$2f] := $29;
- FNamePages^[$30] := $2d;
- FNamePages^[$fd] := $28;
-
- Move(FNamePages^, FNamePages^[256], 256);
- FNamePages^[$100] := $19;
- FNamePages^[$103] := $2E;
- FNamePages^[$120] := $2F;
FForbiddenAscii := [#1..#8, #11..#12, #14..#31, #$7F..#$84, #$86..#$9F];
end;
-
-procedure TXMLReader.ProcessXML(ASource: TXMLInputSource);
+procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
begin
doc := TXMLDocument.Create;
FCursor := doc;
InitializeRoot(ASource);
-
FAllowedDecl := dtXml;
- ParseMisc;
- FDtdParsed := True;
- if FDocType = nil then
- ValidationError('Missing DTD', []);
- if CheckName then
- ParseElement
- else
- RaiseExc('Expected element');
- ParseMisc;
- if Assigned(FDocType) and (doc.DocumentElement.TagName <> FDocType.Name) then
- ValidationError('DTD name does not match root element', []);
+ DoParseFragment; // case FCurChar <> #0 is handled
- if FCurChar <> #0 then
- RaiseExc('Text after end of document element found');
+ if doc.DocumentElement = nil then
+ FatalError('Root element is missing');
+
+ if FValidate then
+ begin
+ if Assigned(FDocType) then
+ begin
+ if doc.DocumentElement.TagName <> FDocType.Name then
+ ValidationError('DTD name does not match root element', []);
+ ValidateIdRefs;
+ end
+ else
+ ValidationError('Missing DTD', []);
+ end;
end;
-procedure TXMLReader.ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
+procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
begin
doc := AOwner.OwnerDocument;
FCursor := AOwner;
InitializeRoot(ASource);
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
FAllowedDecl := dtText;
+ FInsideRoot := True;
DoParseFragment;
end;
@@ -1038,7 +1217,8 @@ begin
if (FCurChar >= #$D800) and (FCurChar <= #$DB7F) then
begin
BufAppend(FName, FCurChar);
- GetCharRaw;
+ // TODO: do I need to update Location here???
+ FCurChar := FSource.NextChar;
Result := (FCurChar >= #$DC00) and (FCurChar <= #$DFFF);
end
else
@@ -1073,7 +1253,11 @@ end;
procedure TXMLReader.RaiseNameNotFound;
begin
- RaiseExc('Name starts with invalid character');
+ // Coming at no cost, this allows more user-friendly error messages
+ if (FCurChar = #32) or (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) then
+ FatalError('Whitespace is not allowed here')
+ else
+ FatalError('Name starts with invalid character');
end;
function TXMLReader.ExpectName: WideString;
@@ -1110,6 +1294,7 @@ function TXMLReader.ParseCharRef: Boolean; // [66]
var
Value: Integer;
begin
+ GetCharRaw; // skip '&'
Result := FCurChar = '#';
if Result then
begin
@@ -1143,7 +1328,7 @@ begin
if FXML11 then
BufAppend(FValue, WideChar(Value))
else
- RaiseExc('Invalid character reference');
+ FatalError('Invalid character reference');
$09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
BufAppend(FValue, WideChar(Value));
$10000..$10FFFF:
@@ -1152,7 +1337,7 @@ begin
BufAppend(FValue, WideChar($DC00 xor (Value and $3FF)));
end;
else
- RaiseExc('Invalid character reference');
+ FatalError('Invalid character reference');
end;
end;
end;
@@ -1165,17 +1350,16 @@ begin
while (FCurChar <> Delim) and (FCurChar <> #0) do
begin
if FCurChar = '<' then
- RaiseExc('Literal "<" in attribute value')
+ FatalError('Character ''<'' is not allowed in attribute value')
else if FCurChar <> '&' then
begin
- if FWhitespace then
+ if (FCurChar = #10) or (FCurChar = #9) or (FCurChar = #13) then
FCurChar := #32;
BufAppend(FValue, FCurChar);
GetCharRaw;
end
else
begin
- GetCharRaw; // skip '&'
if ParseCharRef then
Continue;
@@ -1184,12 +1368,12 @@ begin
begin
if FValue.Length > 0 then
begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
+ DoText(FValue.Buffer, FValue.Length);
FValue.Length := 0;
end;
if RefNode.SystemID <> '' then
- RaiseExc('External entity reference is not allowed in attribute value');
+ FatalError('External entity reference is not allowed in attribute value', True);
IncludeEntity(RefNode, True);
end;
@@ -1197,22 +1381,24 @@ begin
end; // while
if FValue.Length > 0 then
begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
+ DoText(FValue.Buffer, FValue.Length);
FValue.Length := 0;
end;
end;
procedure TXMLReader.DoParseFragment;
begin
+ // SAX: ContentHandler.StartDocument() - here?
ParseContent;
if FCurChar <> #0 then
- RaiseExc('Closing tag not allowed here');
+ FatalError('Closing tag is not allowed here');
+ // SAX: ContentHandler.EndDocument() - here? or somewhere in destructor?
end;
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
var
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
if AEntity.SystemID <> '' then
begin
@@ -1227,35 +1413,37 @@ begin
FAllowedDecl := dtText;
end
else
- Src := TXMLInputSource.Create(AEntity.FReplacementText);
+ Src := TXMLCharSource.Create(AEntity.FReplacementText);
AEntity.FOnStack := True;
Src.FEntity := AEntity;
Src.FParent := FSource;
Src.FCursor := FCursor;
- Unget(FCurChar); // remember FCurChar in previous context
+ UngetCurChar; // remember FCurChar and current location in previous context
+ Src.FSavedLocation := FLocation;
- Inc(FEntityLevel);
Initialize(Src);
Result := True;
end;
function TXMLReader.ContextPop: Boolean;
var
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
+ TmpLocation: TLocation;
begin
Result := Assigned(FSource.FParent);
if Result then
begin
Src := FSource.FParent;
+ TmpLocation := FSource.FSavedLocation;
if Assigned(FSource.FEntity) then
TDOMEntityEx(FSource.FEntity).FOnStack := False;
FCursor := TDOMNode(FSource.FCursor);
FSource.Free;
FSource := Src;
- Dec(FEntityLevel);
GetChar; // re-classify - case of "%pe1;%pe2;"
+ FLocation := TmpLocation;
end;
end;
@@ -1266,12 +1454,13 @@ begin
if not AEntity.FResolved then
begin
if AEntity.FOnStack then
- RaiseExc('Entity ''%s'' recursively references itself', [AEntity.NodeName]);
+ FatalError('Entity ''%s'' recursively references itself', [AEntity.NodeName]);
if ContextPush(AEntity) then
begin
GetCharRaw;
- CheckForChar(#$FEFF);
+ if CheckForChar(#$FEFF) then
+ Dec(FLocation.LinePos);
FCursor := AEntity; // build child node tree for the entity
try
@@ -1287,7 +1476,7 @@ begin
end;
end;
Node := FCursor;
- if FCreateEntityRefs or (not AEntity.FResolved) then
+ if (not FExpandEntities) or (not AEntity.FResolved) then
begin
Node := doc.CreateEntityReference(AEntity.NodeName);
FCursor.AppendChild(Node);
@@ -1307,7 +1496,9 @@ var
PEnt: TDOMEntityEx;
begin
SetString(PEName, FName.Buffer, FName.Length);
- PEnt := FDocType.PEMap.GetNamedItem(PEName) as TDOMEntityEx;
+ PEnt := nil;
+ if Assigned(FPEMap) then
+ PEnt := FPEMap.GetNamedItem(PEName) as TDOMEntityEx;
if PEnt = nil then // TODO -cVC: Referencing undefined PE
begin // (These are classified as 'optional errors'...)
// ValidationError('Undefined parameter entity referenced: %s', [PEName]);
@@ -1315,7 +1506,7 @@ begin
end;
if PEnt.FOnStack then
- RaiseExc('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
+ FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
ContextPush(PEnt);
end;
@@ -1326,6 +1517,7 @@ var
Predef: WideChar;
begin
Result := nil;
+ MarkTokenStart;
RefName := ExpectName;
ExpectChar(';');
Predef := ResolvePredefined(RefName);
@@ -1338,17 +1530,17 @@ begin
if Result = nil then
begin
- if FStandalone or (FDocType = nil) or not (FDocType.HasPERefs or (FDocType.SystemID <> '')) then
- RaiseExc('Undefined entity ''%s'' referenced', [RefName])
+ if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then
+ FatalError('Reference to undefined entity ''%s''', [RefName], True)
else
ValidationError('Undefined entity ''%s'' referenced', [RefName]);
end
else
begin
- if FStandalone and (not Result.FInternal) then
- RaiseExc('Standalone constraint violation');
+ if FStandalone and Result.FExternallyDeclared then
+ FatalError('Standalone constraint violation', True);
if Result.NotationName <> '' then
- RaiseExc('Reference to unparsed entity ''%s''', [RefName]);
+ FatalError('Reference to unparsed entity ''%s''', [RefName], True);
end;
end;
end;
@@ -1356,43 +1548,43 @@ end;
procedure TXMLReader.ProcessTextAndRefs;
var
nonWs: Boolean;
- last: WideChar;
RefNode: TDOMEntityEx;
begin
FValue.Length := 0;
nonWs := False;
- FAllowedDecl := dtNone;
+ MarkTokenStart;
while (FCurChar <> '<') and (FCurChar <> #0) do
begin
if FCurChar <> '&' then
begin
- if not FWhitespace then
+ if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
nonWs := True;
BufAppend(FValue, FCurChar);
if FCurChar = '>' then
with FValue do
- if (Length >= 3) and
- (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
- RaiseExc('Literal '']]>'' is not allowed in text');
+ if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
+ begin
+ Dec(FLocation.LinePos, 2);
+ FatalError('Literal '']]>'' is not allowed in text');
+ end;
GetCharRaw;
end
else
begin
- GetCharRaw; // skip '&'
+ if not FInsideRoot then
+ FatalError('Illegal at document level');
if ParseCharRef then
begin
- last := FValue.Buffer[FValue.Length-1];
- if (last <> #9) and (last <> #10) and (last <> #13) and (last <> #32) then
- nonWs := True;
+ nonWs := True; // CharRef to whitespace is not considered whitespace
Continue;
end;
- nonWs := True;
RefNode := ParseReference;
if Assigned(RefNode) then
begin
if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
+ // 'Reference illegal at root' is checked above, no need to check here
+ DoText(FValue.Buffer, FValue.Length, not nonWs);
FValue.Length := 0;
nonWs := False;
end;
@@ -1400,11 +1592,16 @@ begin
end;
end;
end; // while
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
+ if FInsideRoot then
begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
+ if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
+ begin
+ DoText(FValue.Buffer, FValue.Length, not nonWs);
+ FValue.Length := 0;
+ end;
+ end
+ else if nonWs then
+ FatalError('Illegal at document level', True);
end;
procedure TXMLReader.ExpectAttValue; // [10]
@@ -1429,6 +1626,7 @@ begin
begin
Delim := FCurChar;
GetCharRaw; // skip quote
+ MarkTokenStart;
FValue.Length := 0;
while (FCurChar <> Delim) and (FCurChar <> #0) do
begin
@@ -1442,28 +1640,35 @@ end;
procedure TXMLReader.SkipPubidLiteral; // [12]
var
I: Integer;
+ wc: WideChar;
begin
if SkipQuotedLiteral then
begin
for I := 0 to FValue.Length-1 do
- if (FValue.Buffer[I] > #255) or not (Char(FValue.Buffer[I]) in PubidChars) then
- RaiseExc('Illegal Public ID literal')
+ begin
+ wc := FValue.Buffer[I];
+ if (wc > #255) or not (Char(wc) in PubidChars) then
+ FatalError('Illegal Public ID literal', True);
+ if (wc = #10) or (wc = #13) then
+ FValue.Buffer[I] := #32;
+ end;
end
else
RaiseExpectedQMark;
end;
-procedure TXMLReader.SkipSystemLiteral(out Literal: WideString; Required: Boolean);
+procedure TXMLReader.SkipSystemLiteral(out Literal: WideString);
begin
if SkipQuotedLiteral then
SetString(Literal, FValue.Buffer, FValue.Length)
- else if Required then
+ else
RaiseExpectedQMark;
end;
procedure TXMLReader.ParseComment; // [15]
begin
ExpectString('--');
+ MarkTokenStart;
FValue.Length := 0;
repeat
BufAppend(FValue, FCurChar);
@@ -1472,14 +1677,13 @@ begin
if (Length >= 2) and (Buffer[Length-1] = '-') and
(Buffer[Length-2] = '-') then
begin
- Dec(Length, 2);
- if Assigned(FCursor) then
- FCursor.AppendChild(doc.CreateCommentBuf(Buffer, Length));
ExpectChar('>');
+ Dec(Length, 2);
+ DoComment(Buffer, Length);
Exit;
end;
until FCurChar = #0;
- RaiseExc('Unterminated comment');
+ FatalError('Unterminated comment', True);
end;
procedure TXMLReader.ParsePI; // [16]
@@ -1487,6 +1691,7 @@ var
Name, Value: WideString;
begin
GetCharRaw; // skip '?'
+ MarkTokenStart;
Name := ExpectName;
with FName do
@@ -1496,21 +1701,21 @@ begin
((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
begin
if Name <> 'xml' then
- RaiseExc('''xml'' is a reserved word; it must be lowercase');
- if FAllowedDecl <> dtNone then
+ FatalError('''xml'' is a reserved word; it must be lowercase', True);
+
+ // Declaration is allowed only at the very beginning of the _external_ entity
+ if (FTokenStart.Line = 1) and (FTokenStart.LinePos = 3) and (FSource.FSystemID <> '') then
begin
ParseXmlOrTextDecl(FAllowedDecl = dtText);
- FAllowedDecl := dtNone;
Exit;
end
else
- RaiseExc('XML declaration not allowed here');
+ FatalError('XML declaration is not allowed here', True);
end;
if FCurChar <> '?' then
ExpectWhitespace;
- FAllowedDecl := dtNone;
FValue.Length := 0;
repeat
BufAppend(FValue, FCurChar);
@@ -1521,12 +1726,13 @@ begin
begin
Dec(Length, 2);
SetString(Value, Buffer, Length);
+ // SAX: ContentHandler.ProcessingInstruction(Name, Value);
if Assigned(FCursor) then
FCursor.AppendChild(Doc.CreateProcessingInstruction(Name, Value));
Exit;
end;
until FCurChar = #0;
- RaiseExc('Unterminated processing instruction');
+ FatalError('Unterminated processing instruction', True);
end;
@@ -1542,12 +1748,12 @@ begin
begin
ExpectString('version'); // [24]
ExpectEq;
- SkipSystemLiteral(TmpStr, True);
+ SkipSystemLiteral(TmpStr);
IsXML11 := False;
if TmpStr = '1.1' then // Checking for bad chars is implied
IsXML11 := True
else if TmpStr <> '1.0' then
- RaiseExc('Illegal version number');
+ FatalError('Illegal version number', True);
if not TextDecl then
begin
@@ -1558,7 +1764,7 @@ begin
end
else // parsing external entity
if IsXML11 and not FXML11 then
- RaiseExc('XML 1.0 document cannot invoke XML 1.1 entities');
+ FatalError('XML 1.0 document cannot invoke XML 1.1 entities', True);
if FCurChar <> '?' then
ExpectWhitespace;
@@ -1569,12 +1775,13 @@ begin
begin
ExpectString('encoding');
ExpectEq;
- SkipSystemLiteral(TmpStr, True);
+ SkipSystemLiteral(TmpStr);
- if not IsValidEncName(TmpStr) then
- RaiseExc('Illegal encoding name');
+ if not IsValidXmlEncoding(TmpStr) then
+ FatalError('Illegal encoding name', True);
- FSource.SetEncoding(TmpStr); // <-- Wide2Ansi conversion here
+ if not FSource.SetEncoding(TmpStr) then // <-- Wide2Ansi conversion here
+ FatalError('Encoding ''%s'' is not supported', [TmpStr], True);
// getting here means that specified encoding is supported
// TODO: maybe assign the 'preferred' encoding name?
if not TextDecl and doc.InheritsFrom(TXMLDocument) then
@@ -1589,12 +1796,12 @@ begin
begin
ExpectString('standalone');
ExpectEq;
- SkipSystemLiteral(TmpStr, True);
+ SkipSystemLiteral(TmpStr);
if TmpStr = 'yes' then
FStandalone := True
else if TmpStr <> 'no' then
- RaiseExc('Only "yes" or "no" are permitted as values of "standalone"');
- SkipWhitespace;
+ FatalError('Only "yes" or "no" are permitted as values of "standalone"', True);
+ SkipWhitespaceRaw;
end;
ExpectString('?>');
@@ -1603,17 +1810,15 @@ end;
procedure TXMLReader.ParseDoctypeDecl; // [28]
var
IntSubset: TWideCharBuf;
- Src, OldSrc: TXMLInputSource;
+ Src, OldSrc: TXMLCharSource;
begin
- FAllowedDecl := dtNone;
-
if FDtdParsed then
- RaiseExc('Markup declaration not allowed here');
+ FatalError('Markup declaration is not allowed here');
- ExpectString('DOCTYPE'); // gives possibly incorrect error message
+ ExpectString('DOCTYPE');
ExpectWhitespace;
- FDocType := TDOMDocumentTypeEx.Create(doc);
+ FDocType := TDOMDocumentTypeEx(TDOMDocumentType.Create(doc));
FDtdParsed := True;
{ To comply with certain output tests, we must insert PIs coming from internal
subset before DocType node. This looks very synthetic, but let it be...
@@ -1622,7 +1827,7 @@ begin
FDocType.FName := ExpectName;
ExpectWhitespace;
ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
- SkipWhitespace;
+ SkipWhitespaceRaw;
if FCurChar = '[' then
begin
@@ -1640,132 +1845,102 @@ begin
FCopyBuf := nil;
FreeMem(IntSubset.Buffer);
end;
- SkipWhitespace;
+ SkipWhitespaceRaw;
end;
ExpectChar('>');
- if FDocType.SystemID <> '' then
+ if (FDocType.SystemID <> '') and ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then
begin
- if ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then
- begin
- OldSrc := FSource;
- Unget(FCurChar);
- FCursor := nil;
- try
- DoParseExtSubset(Src);
- finally
- while ContextPop do; // Cleanup after possible exceptions
- FSource.Free;
- FSource := OldSrc;
- GetChar;
- FCursor := Doc;
- end;
+ // DTD parsing code assumes that FSource is RootSource,
+ // therefore we cannot use ContextPush here...
+ OldSrc := FSource;
+ UngetCurChar;
+ FCursor := nil;
+ try
+ DoParseExtSubset(Src);
+ finally
+ while ContextPop do; // Cleanup after possible exceptions
+ FSource.Free;
+ FSource := OldSrc;
+ GetChar;
+ FCursor := Doc;
end;
end;
finally
doc.AppendChild(FDocType);
end;
-end;
-
-procedure TXMLReader.ParseMisc;
-begin
- repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
- if not CheckForChar('<') then
- Break;
- if CheckForChar('!') then
- begin
- FAllowedDecl := dtNone;
- if FCurChar = '-' then
- ParseComment
- else
- ParseDoctypeDecl;
- end
- else
- if FCurChar = '?' then
- ParsePI
- else
- Break;
- until FCurChar = #0;
- FAllowedDecl := dtNone;
+ ValidateDTD;
end;
function TXMLReader.ParseEq: Boolean; // [25]
begin
- while FWhitespace do GetCharRaw;
+ SkipWhitespaceRaw;
Result := FCurChar = '=';
if Result then
begin
GetCharRaw;
- while FWhitespace do GetCharRaw;
+ SkipWhitespaceRaw;
end;
end;
procedure TXMLReader.ExpectEq;
begin
if not ParseEq then
- RaiseExc('Expected "="');
+ FatalError('Expected "="');
end;
{ DTD stuff }
-procedure TXMLReader.AssertPENesting(CurrentLevel: Integer);
+procedure TXMLReader.BadPENesting;
begin
- if CurrentLevel <> FEntityLevel then
- ValidationError('Parameter entities must be properly nested', []);
+ ValidationError('Parameter entities must be properly nested', []);
end;
-// content model
-
-type
- TElementContentType = (
- ctEmpty,
- ctAny,
- ctMixed,
- ctName,
- ctChoice,
- ctSeq
- );
+procedure TXMLReader.StandaloneError;
+begin
+ ValidationError('Standalone constriant violation', []);
+end;
- TElementContentQuant = (
- cqNone,
- cqOpt,
- cqReq,
- cqPlus
- );
+procedure TXMLReader.CheckNotation(const Name: WideString);
+begin
+ if FDocType.Notations.GetNamedItem(Name) = nil then
+ ValidationError('Notation ''%s'' is not declared', [Name]);
+end;
-{
- TElementContent = record
- ContentType: TElementContentType;
- ContentQuant: TElementContentQuant;
- Name: WideString;
- Children: array of TElementContent;
- end;
-}
+procedure TXMLReader.ParseQuantity(CP: TContentParticle);
+begin
+ if CheckForChar('?') then
+ CP.CPQuant := cqZeroOrOnce
+ else if CheckForChar('*') then
+ CP.CPQuant := cqZeroOrMore
+ else if CheckForChar('+') then
+ CP.CPQuant := cqOnceOrMore;
+end;
-procedure TXMLReader.ExpectChoiceOrSeq(); // [49], [50]
+procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle); // [49], [50]
var
Delim: WideChar;
- PELevel: Integer;
+ CurrentEntity: TObject;
+ CurrentCP: TContentParticle;
begin
Delim := #0;
repeat
+ CurrentCP := CP.Add;
SkipWhitespace;
if FCurChar = '(' then
begin
- PELevel := FEntityLevel;
+ CurrentEntity := FSource.FEntity;
GetChar;
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
+ ExpectChoiceOrSeq(CurrentCP);
+ if CurrentEntity <> FSource.FEntity then
+ BadPENesting;
GetChar;
end
else
- SkipName;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
+ CurrentCP.Name := ExpectName;
+
+ ParseQuantity(CurrentCP);
SkipWhitespace;
if FCurChar = ')' then
@@ -1775,93 +1950,115 @@ begin
if (FCurChar = '|') or (FCurChar = ',') then
Delim := FCurChar
else
- RaiseExc('Expected "|" or ","');
+ FatalError('Expected "|" or ","');
end
else
if FCurChar <> Delim then
- RaiseExc(Delim);
+ FatalError(Delim);
GetChar; // skip delimiter
until False;
+ if Delim = '|' then
+ CP.CPType := ctChoice
+ else
+ CP.CPType := ctSeq; // '(foo)' is a sequence!
end;
-procedure TXMLReader.ParseMixedOrChildren;
+procedure TXMLReader.ParseElementDecl; // [45]
var
- PELevel: Integer;
+ ElName: WideString;
+ ElDef: TDOMElementDef;
NeedAsterisk: Boolean;
+ CurrentCP: TContentParticle;
+ CurrentEntity: TObject;
+ I: Integer;
begin
- PELevel := FEntityLevel;
- GetChar; // starting bracket
- SkipWhitespace;
- if CheckForChar('#') then // Mixed section [51]
- begin
- ExpectString('PCDATA');
- SkipWhitespace;
- NeedAsterisk := False;
- while FCurChar <> ')' do
- begin
- ExpectChar('|');
- NeedAsterisk := True;
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- end;
- AssertPENesting(PELevel);
- GetChar;
- if NeedAsterisk then
- ExpectChar('*')
- else
- CheckForChar('*');
- end
- else // Parse Children section [47]
+ ElName := ExpectName;
+ ExpectWhitespace;
+ ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(ElName));
+ if Assigned(ElDef) and ElDef.HasElementDecl then
+ ValidationError('Duplicate declaration of element ''%s''', [ElName]);
+ if ElDef = nil then
begin
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
- GetChar;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
+ ElDef := TDOMElementDef.Create(doc);
+ ElDef.FNodeName := ElName;
+ FDocType.ElementDefs.SetNamedItem(ElDef);
end;
-end;
-
-procedure TXMLReader.ParseElementDecl; // [45]
-begin
- SkipName;
- ExpectWhitespace;
+ ElDef.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil));
+ ElDef.HasElementDecl := True;
- // Get contentspec [46]
if FCurChar = 'E' then
- ExpectString('EMPTY')
+ begin
+ ExpectString('EMPTY');
+ ElDef.ContentType := ctEmpty;
+ end
else if FCurChar = 'A' then
- ExpectString('ANY')
+ begin
+ ExpectString('ANY');
+ ElDef.ContentType := ctAny;
+ end
else if FCurChar = '(' then
- ParseMixedOrChildren
+ begin
+ CurrentEntity := FSource.FEntity;
+ GetChar; // starting bracket
+ SkipWhitespace;
+ if FCurChar = '#' then // Mixed section [51]
+ begin
+ ExpectString('#PCDATA');
+ SkipWhitespace;
+ ElDef.ContentType := ctMixed;
+ NeedAsterisk := False;
+ while FCurChar <> ')' do
+ begin
+ ExpectChar('|');
+ NeedAsterisk := True;
+ SkipWhitespace;
+
+ CurrentCP := ElDef.RootCP.Add;
+ CurrentCP.Name := ExpectName;
+ // TODO: rethink this
+ for I := ElDef.RootCP.ChildCount-2 downto 0 do
+ if CurrentCP.Name = ElDef.RootCP.Children[I].Name then
+ ValidationError('Duplicate token in mixed section', []);
+ SkipWhitespace;
+ end;
+ if CurrentEntity <> FSource.FEntity then
+ BadPENesting;
+ GetChar;
+ // TODO: does this asterisk have any real meaning?
+ if NeedAsterisk then
+ begin
+ ExpectChar('*');
+ ElDef.RootCP.CPQuant := cqZeroOrMore;
+ end
+ else
+ if CheckForChar('*') then
+ ElDef.RootCP.CPQuant := cqZeroOrMore;
+ end
+ else // Children section [47]
+ begin
+ ElDef.ContentType := ctChildren;
+ ExpectChoiceOrSeq(ElDef.RootCP);
+ if CurrentEntity <> FSource.FEntity then
+ BadPENesting;
+ GetChar;
+ ParseQuantity(ElDef.RootCP);
+ end;
+ end
else
- RaiseExc('Invalid content specification');
+ FatalError('Invalid content specification');
+ // SAX: DeclHandler.ElementDecl(name, model);
end;
procedure TXMLReader.ParseNotationDecl; // [82]
var
- Notation: TDOMNotationEx;
+ Name, SysID, PubID: WideString;
begin
- Notation := TDOMNotationEx(TDOMNotation.Create(Doc));
- try
- Notation.FName := ExpectName;
- ExpectWhitespace;
- if not ParseExternalID(Notation.FSystemID, Notation.FPublicID, True) then
- RaiseExc('Expected external or public ID');
- except
- Notation.Free;
- raise;
- end;
-
- if FDocType.Notations.GetNamedItem(Notation.FName) = nil then
- FDocType.Notations.SetNamedItem(Notation)
- else
- begin
- ValidationError('Duplicate notation declaration: %s', [Notation.FName]);
- Notation.Free;
- end;
+ Name := ExpectName;
+ ExpectWhitespace;
+ if not ParseExternalID(SysID, PubID, True) then
+ FatalError('Expected external or public ID');
+ DoNotationDecl(Name, PubID, SysID);
end;
procedure TXMLReader.ParseAttlistDecl; // [52]
@@ -1876,7 +2073,7 @@ begin
ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Token));
if ElDef = nil then
begin
- // TODO -cVC: must distinguish ElementDef created here from one explicitly declared
+ // DONE -cVC: must distinguish ElementDef created here from one explicitly declared
ElDef := TDOMElementDef.Create(doc);
ElDef.FNodeName := Token;
FDocType.ElementDefs.SetNamedItem(ElDef);
@@ -1884,36 +2081,38 @@ begin
SkipWhitespace;
while FCurChar <> '>' do
begin
- SkipWhitespace; { !!! }
AttDef := TDOMAttrDef.Create(doc);
+ AttDef.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil));
try
AttDef.FName := ExpectName;
ExpectWhitespace;
Token := GetString(['A'..'Z']); // Get AttType [54], [55], [56]
if Token = 'CDATA' then
- AttDef.FDataType := DT_CDATA
+ AttDef.FDataType := dtCdata
else if Token = 'ID' then
- AttDef.FDataType := DT_ID
+ AttDef.FDataType := dtId
else if Token = 'IDREF' then
- AttDef.FDataType := DT_IDREF
+ AttDef.FDataType := dtIdRef
else if Token = 'IDREFS' then
- AttDef.FDataType := DT_IDREFS
+ AttDef.FDataType := dtIdRefs
else if Token = 'ENTITY' then
- AttDef.FDataType := DT_ENTITY
+ AttDef.FDataType := dtEntity
else if Token = 'ENTITIES' then
- AttDef.FDataType := DT_ENTITIES
+ AttDef.FDataType := dtEntities
else if Token = 'NMTOKEN' then
- AttDef.FDataType := DT_NMTOKEN
+ AttDef.FDataType := dtNmToken
else if Token = 'NMTOKENS' then
- AttDef.FDataType := DT_NMTOKENS
+ AttDef.FDataType := dtNmTokens
else if Token = 'NOTATION' then // [57], [58]
begin
- AttDef.FDataType := DT_NOTATION;
+ AttDef.FDataType := dtNotation;
ExpectWhitespace;
ExpectChar('(');
repeat
SkipWhitespace;
- SkipName;
+ MarkTokenStart;
+ if not AttDef.AddEnumToken(ExpectName) then
+ ValidationError('Duplicate token in NOTATION attribute declaration',[]);
SkipWhitespace;
until not CheckForChar('|');
ExpectChar(')');
@@ -1921,58 +2120,60 @@ begin
else
if CheckForChar('(') then // [59]
begin
- AttDef.FDataType := DT_NMTOKEN;
+ AttDef.FDataType := dtNmToken;
repeat
SkipWhitespace;
+ MarkTokenStart;
if not CheckNmToken then
RaiseNameNotFound; // not completely correct error message
+ SetString(Token, FName.Buffer, FName.Length);
+ if not AttDef.AddEnumToken(Token) then
+ ValidationError('Duplicate token in enumerated attibute declaration', []);
SkipWhitespace;
until not CheckForChar('|');
ExpectChar(')');
end else
- RaiseExc('Invalid tokenized type');
+ FatalError('Illegal attribute type', True);
ExpectWhitespace;
- // Get DefaultDecl [60]
ValueRequired := False;
+ MarkTokenStart;
if CheckForChar('#') then
begin
Token := GetString(['A'..'Z']);
if Token = 'REQUIRED' then
- AttDef.FDefault := AD_REQUIRED
+ AttDef.FDefault := adRequired
else if Token = 'IMPLIED' then
- AttDef.FDefault := AD_IMPLIED
+ AttDef.FDefault := adImplied
else if Token = 'FIXED' then
begin
- AttDef.FDefault := AD_FIXED;
+ AttDef.FDefault := adFixed;
ExpectWhitespace;
ValueRequired := True;
end
else
- RaiseExc('Illegal attribute default');
+ FatalError('Illegal attribute default', True);
end
else
begin
- AttDef.FDefault := AD_DEFAULT;
+ AttDef.FDefault := adDefault;
ValueRequired := True;
end;
-
+
if ValueRequired then
begin
SaveCurNode := FCursor;
FCursor := AttDef;
-// tricky moment, no tests for that
-{ FRecognizePE := False; } // TODO: shall it really be disabled?
+// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
+// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
try
ExpectAttValue;
finally
FCursor := SaveCurNode;
-{ FRecognizePE := not FIntSubset; }
end;
- if AttDef.FDataType = DT_ID then
- ValidationError('Attributes of type ID must not have a default value',[]);
end;
+ // SAX: DeclHandler.AttributeDecl(...)
// First declaration is binding, subsequent should be ignored
if Assigned(ElDef.GetAttributeNode(AttDef.Name)) then
@@ -1990,7 +2191,7 @@ end;
procedure TXMLReader.ParseEntityDeclValue(Delim: WideChar); // [9]
var
I: Integer;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
Src := FSource;
// "Included in literal": process until delimiter hit IN SAME context
@@ -1998,13 +2199,12 @@ begin
if ParsePEReference then
begin
if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('PE references in internal subset not allowed inside declarations');
+ FatalError('PE references in internal subset are not allowed inside declarations', True);
StartPE;
GetCharRaw;
end
else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass
begin
- GetCharRaw;
if not ParseCharRef then
begin
BufAppend(FValue, '&');
@@ -2037,12 +2237,14 @@ begin
begin
ExpectWhitespace;
NDataAllowed := False;
- Map := FDocType.PEMap;
+ if FPEMap = nil then
+ FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
+ Map := FPEMap;
end;
Entity := TDOMEntityEx.Create(Doc);
try
- Entity.FInternal := FIntSubset and (FSource.FParent = nil);
+ Entity.FExternallyDeclared := not (FIntSubset and (FSource.FParent = nil));
Entity.FName := ExpectName;
ExpectWhitespace;
@@ -2050,6 +2252,7 @@ begin
begin
NDataAllowed := False;
Delim := FCurChar;
+ Entity.FStartLocation := FLocation;
FRecognizePE := False; // PERef right after delimiter should not be recognized
GetCharRaw; // at char level - we process it 'manually'
FValue.Length := 0;
@@ -2059,7 +2262,7 @@ begin
end
else
if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
- RaiseExc('Expected entity value or external ID');
+ FatalError('Expected entity value or external ID');
if NDataAllowed then // [76]
begin
@@ -2070,10 +2273,8 @@ begin
ExpectString('NDATA');
ExpectWhitespace;
SkipName;
- // TODO -cVC: Notation declared. Here or after all has been read?
SetString(Entity.FNotationName, FName.Buffer, FName.Length);
- if FDocType.Notations.GetNamedItem(Entity.NotationName) = nil then
- ValidationError('Reference to undeclared notation ''%s''', [Entity.NotationName]);
+ // SAX: DTDHandler.UnparsedEntityDecl(...);
end;
end;
except
@@ -2094,19 +2295,16 @@ var
Token: WideString;
IncludeLevel: Integer;
IgnoreLevel: Integer;
- PELevel: Integer;
+ CurrentEntity: TObject;
begin
IncludeLevel := 0;
IgnoreLevel := 0;
repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
+ SkipWhitespace;
if ParsePEReference then // PERef between declarations should always be recognized
begin
- FAllowedDecl := dtNone;
- if Assigned(FDocType) then
- FDocType.HasPERefs := True;
+ FHavePERefs := True;
StartPE;
GetChar;
Continue;
@@ -2125,18 +2323,17 @@ begin
if FCurChar <> '<' then
Break;
- PELevel := FEntityLevel;
- GetCharRaw;
+ CurrentEntity := FSource.FEntity;
+ GetChar;
if CheckForChar('!') then
begin
- FAllowedDecl := dtNone;
if FCurChar = '-' then
ParseComment
else if FCurChar = '[' then
begin
if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('Conditional sections not allowed in internal subset');
+ FatalError('Conditional sections are not allowed in internal subset');
FRecognizePE := not FIntSubset;
GetChar; // skip '['
@@ -2149,8 +2346,9 @@ begin
else if Token = 'IGNORE' then
IgnoreLevel := 1
else
- RaiseExc('Expected "INCLUDE" or "IGNORE"');
- AssertPENesting(PELevel);
+ FatalError('Expected "INCLUDE" or "IGNORE"');
+ if CurrentEntity <> FSource.FEntity then
+ BadPENesting;
ExpectChar('[');
if IgnoreLevel > 0 then
repeat
@@ -2165,6 +2363,7 @@ begin
else
begin
FRecognizePE := not FIntSubset;
+ MarkTokenStart;
Token := GetString(['A'..'Z']);
ExpectWhitespace;
if Token = 'ELEMENT' then
@@ -2176,7 +2375,7 @@ begin
else if Token = 'NOTATION' then
ParseNotationDecl
else
- RaiseExc('Illegal markup declaration');
+ FatalError('Illegal markup declaration', True);
SkipWhitespace;
FRecognizePE := False; // ! Don't auto-pop context on last markup delimiter
@@ -2186,29 +2385,31 @@ begin
MarkupDecl starting in PE and ending in root is a WFC [28a]
MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
}
- if PELevel > FEntityLevel then
- RaiseExc('Parameter entities must be properly nested')
- else
- AssertPENesting(PELevel);
+ // TODO: what if statrs in PE1 and ends in PE2, and other cases?
+ if CurrentEntity <> FSource.FEntity then
+ if Assigned(FSource.FEntity) then { ends in PE }
+ BadPENesting
+ else
+ FatalError('Parameter entities must be properly nested');
end
else if FCurChar = '?' then
ParsePI;
until False;
FRecognizePE := False;
if (IncludeLevel > 0) or (IgnoreLevel > 0) then
- RaiseExc('Conditional section not closed');
+ FatalError('Conditional section is not closed');
end;
-procedure TXMLReader.DoParseExtSubset(ASource: TXMLInputSource);
+procedure TXMLReader.DoParseExtSubset(ASource: TXMLCharSource);
begin
InitializeRoot(ASource);
FAllowedDecl := dtText;
ParseMarkupDecl;
if FCurChar <> #0 then
- RaiseExc('Illegal character in DTD');
+ FatalError('Illegal character in DTD');
end;
-procedure TXMLReader.ProcessDTD(ASource: TXMLInputSource);
+procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
begin
doc := TXMLDocument.Create;
FDocType := TDOMDocumentTypeEx.Create(doc);
@@ -2219,10 +2420,11 @@ begin
end;
procedure TXMLReader.ParseCDSect; // [18]
-var
- name: WideString;
begin
ExpectString('[CDATA[');
+ MarkTokenStart;
+ if not FInsideRoot then
+ FatalError('Illegal at document level');
FValue.Length := 0;
repeat
BufAppend(FValue, FCurChar);
@@ -2231,13 +2433,11 @@ begin
if (Length >= 3) and (Buffer[Length-1] = '>') and
(Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
begin
- Dec(Length, 3);
- SetString(name, Buffer, Length);
- FCursor.AppendChild(doc.CreateCDATASection(name));
+ DoCDSect(Buffer, Length-3);
Exit;
end;
until FCurChar = #0;
- RaiseExc('Unterminated CDATA section');
+ FatalError('Unterminated CDATA section', True);
end;
procedure TXMLReader.ParseContent;
@@ -2246,18 +2446,18 @@ begin
if FCurChar = '<' then
begin
GetCharRaw;
+ MarkTokenStart;
if CheckName then
ParseElement
else if FCurChar = '!' then
begin
GetCharRaw;
- FAllowedDecl := dtNone;
if FCurChar = '[' then
ParseCDSect
else if FCurChar = '-' then
ParseComment
else
- ParseDoctypeDecl; // actually will raise error
+ ParseDoctypeDecl;
end
else if FCurChar = '?' then
ParsePI
@@ -2273,104 +2473,225 @@ end;
procedure TXMLReader.ParseElement; // [39] [40] [44]
var
NewElem: TDOMElement;
+ ElDef: TDOMElementDef;
+ ElVal: TElementValidator;
IsEmpty: Boolean;
attr, OldAttr: TDOMNode;
begin
+ if (FCursor = doc) and Assigned(doc.DocumentElement) then
+ FatalError('Only one top-level element allowed', True)
+ else
+ FInsideRoot := True;
+
NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
+ // First check if NewElem is allowed in this context
+ if FValStack.Count > 0 then
+ ElVal := TElementValidator(FValStack.Last)
+ else
+ ElVal := nil;
+ if FValidate and Assigned(ElVal) and not ElVal.IsElementAllowed(NewElem.TagName) then
+ ValidationError('Element ''%s'' is not allowed in this context',[NewElem.TagName]);
+
FCursor.AppendChild(NewElem);
- Assert(NewElem.ParentNode = FCursor, 'AppendChild did not set ParentNode');
- FCursor := NewElem;
+
+ // Then update ElementDef - it is needed to process attributes
+ ElDef := nil;
+ if Assigned(FDocType) then
+ ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(NewElem.TagName));
IsEmpty := False;
- while FCurChar <> '>' do
+ if SkipWhitespaceRaw then
begin
- if FCurChar = '/' then
+ while (FCurChar <> '>') and (FCurChar <> '/') do
begin
- GetCharRaw;
- IsEmpty := True;
- FCursor := FCursor.ParentNode;
- Break;
- end;
-
- // Get Attribute [41]
- ExpectWhitespace;
- if not CheckName then // allow stuff like <element >, <element />
- Continue;
-
- attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
+ MarkTokenStart;
+ ExpectName;
+ attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
- // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
- OldAttr := NewElem.Attributes.SetNamedItem(Attr);
- if Assigned(OldAttr) then
- begin
- OldAttr.Free;
- RaiseExc('Duplicate attribute');
- end;
- ExpectEq;
- Assert(TDOMAttr(attr).OwnerElement = NewElem, 'DOMAttr.OwnerElement not set correctly');
- FCursor := attr;
- ExpectAttValue;
- FCursor := NewElem;
+ // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
+ OldAttr := NewElem.Attributes.SetNamedItem(Attr);
+ if Assigned(OldAttr) then
+ begin
+ OldAttr.Free;
+ FatalError('Duplicate attribute', True);
+ end;
+ ExpectEq;
+ FCursor := attr;
+ ExpectAttValue;
+ if (FCurChar <> '>') and (FCurChar <> '/') then
+ ExpectWhitespace;
+ end; // while
+ end;
+ if FCurChar = '/' then
+ begin
+ IsEmpty := True;
+ GetCharRaw;
end;
ExpectChar('>');
- ProcessDefaultAttributes(NewElem);
+
+ PushVC(ElDef);
+ // SAX: ContentHandler.StartElement(...)
+ // SAX: ContentHandler.StartPrefixMapping(...)
if not IsEmpty then
begin
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipWhitespace;
+ FCursor := NewElem;
+ if not FPreserveWhitespace then // critical for testsuite compliance
+ SkipWhitespaceRaw;
ParseContent;
if FCurChar = '/' then // Get ETag [42]
begin
GetCharRaw;
+ MarkTokenStart;
if ExpectName <> NewElem.TagName then
- RaiseExc('Unmatching element end tag (expected "</%s>")', [NewElem.TagName]);
- SkipWhitespace;
+ FatalError('Unmatching element end tag (expected "</%s>")', [NewElem.TagName], True);
+ SkipWhitespaceRaw;
ExpectChar('>');
- FCursor := FCursor.ParentNode;
end
else if FCurChar <> #0 then
RaiseNameNotFound
else // End of stream in content
- RaiseExc('Document element not closed');
+ FatalError('Closing tag is missing for ''%s''', [NewElem.TagName]);
end;
+ // SAX: ContentHandler.EndElement(...)
+ // SAX: ContentHandler.EndPrefixMapping(...)
+ FCursor := NewElem.ParentNode;
+ if FCursor = doc then
+ FInsideRoot := False;
+ ProcessDefaultAttributes(NewElem);
+ if FValidate then
+ ValidateElement(NewElem);
+ PopVC;
end;
-procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement);
+procedure TXMLReader.AddIdRef(Buf: PWideChar; Length: Integer);
+var
+ w: PWideStrWrapper;
+begin
+ New(w);
+ SetString(w^.Value, Buf, Length);
+ FIDRefs.Add(w);
+end;
+
+procedure TXMLReader.ClearIdRefs;
+var
+ I: Integer;
+begin
+ for I := 0 to FIDRefs.Count-1 do
+ Dispose(PWideStrWrapper(FIDRefs.List^[I]));
+ FIDRefs.Clear;
+end;
+
+procedure TXMLReader.ValidateIdRefs;
var
I: Integer;
+begin
+ for I := 0 to FIDRefs.Count-1 do
+ if Doc.GetElementById(PWideStrWrapper(FIDRefs.List^[I])^.Value) = nil then
+ ValidationError('The ID ''%s'' does not match any element', [PWideStrWrapper(FIDRefs.List^[I])^.Value]);
+ ClearIDRefs;
+end;
+
+procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement);
+var
+ I, L, StartPos, EndPos: Integer;
ElDef: TDOMElementDef;
- AttDefs: TDOMNamedNodeMap;
+ Map: TDOMNamedNodeMap;
AttDef: TDOMAttrDef;
- Attr: TDOMAttrEx;
- Spec: Boolean;
+ Attr: TDOMAttr;
+ AttValue: WideString;
+ Entity: TDOMEntity;
begin
- if Assigned(FDocType) then
+ ElDef := CurrentElementDef;
+ if Assigned(ElDef) and ElDef.HasAttributes then
begin
- ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Element.TagName));
- if Assigned(ElDef) and ElDef.HasAttributes then
+ Map := ElDef.Attributes;
+
+ for I := 0 to Map.Length-1 do
begin
- AttDefs := ElDef.Attributes;
- for I := 0 to AttDefs.Length-1 do
+ AttDef := Map[I] as TDOMAttrDef;
+
+ Attr := Element.GetAttributeNode(AttDef.Name);
+ if Attr = nil then
begin
- AttDef := AttDefs[I] as TDOMAttrDef;
- Spec := True;
- // no validity checking yet; just append default values
- Attr := TDOMAttrEx(Element.GetAttributeNode(AttDef.Name));
- if (AttDef.FDefault in [AD_DEFAULT, AD_FIXED]) and (Attr = nil) then
- begin
- Attr := TDOMAttrEx(AttDef.CloneNode(True));
- Element.SetAttributeNode(Attr);
- Spec := False;
+ // attribute needs defaulting
+ case AttDef.FDefault of
+ adDefault, adFixed: begin
+ if FStandalone and AttDef.FExternallyDeclared then
+ StandaloneError;
+ // Cloning TDOMAttrDef creates TDOMAttr. DataType is copied.
+ Attr := TDOMAttr(AttDef.CloneNode(True));
+ TDOMAttrDef(Attr).FSpecified := False; // Dirty hack...
+ TDOMAttrDef(Attr).FDeclared := True;
+ Element.SetAttributeNode(Attr);
+ end;
+ adRequired: ValidationError('Required attribute ''%s'' of element ''%s'' is missing',[AttDef.Name, Element.TagName])
end;
- if Assigned(Attr) then
- begin
- Attr.FSpecified := Spec;
- Attr.FNormalize := (AttDef.FDataType <> DT_CDATA);
+ end
+ else
+ begin
+ TDOMAttrDef(Attr).FDeclared := True;
+ AttValue := Attr.Value; // unnormalized
+ // now assign DataType so that value is correctly normalized
+ TDOMAttrDef(Attr).FDataType := AttDef.FDataType;
+ if FStandalone and AttDef.FExternallyDeclared and (Attr.Value <> AttValue) then
+ StandaloneError;
+ AttValue := Attr.Value; // recalculate
+ // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
+ if (AttDef.FDefault = adFixed) and (AttDef.Value <> AttValue) then
+ ValidationError('Value of attribute ''%s'' does not match its fixed default',[AttDef.Name]);
+
+ if not ValidateAttrSyntax(AttDef, AttValue) then
+ ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name]);
+ end;
+
+ if Attr = nil then
+ Continue;
+ L := Length(AttValue);
+ case Attr.DataType of
+ dtId: if not Doc.AddID(Attr) then
+ ValidationError('The ID ''%s'' is not unique', [AttValue]);
+
+ dtIdRef, dtIdRefs: begin
+ StartPos := 1;
+ while StartPos <= L do
+ begin
+ EndPos := StartPos;
+ while (EndPos <= L) and (AttValue[EndPos] <> #32) do
+ Inc(EndPos);
+
+ AddIdRef(@AttValue[StartPos], EndPos-StartPos);
+ StartPos := EndPos + 1;
+ end;
+ end;
+
+ dtEntity, dtEntities: begin
+ StartPos := 1;
+ while StartPos <= L do
+ begin
+ EndPos := StartPos;
+ while (EndPos <= L) and (AttValue[EndPos] <> #32) do
+ Inc(EndPos);
+ Entity := TDOMEntity(FDocType.Entities.GetNamedItem(Copy(AttValue, StartPos, EndPos-StartPos)));
+ if (Entity = nil) or (Entity.NotationName = '') then
+ ValidationError('Attribute ''%s'' type mismatch', [Attr.Name]);
+ StartPos := EndPos + 1;
+ end;
end;
end;
end;
end;
+ // Now report undeclared attributes
+ if Assigned(FDocType) and Element.HasAttributes then
+ begin
+ Map := Element.Attributes;
+ for I := 0 to Map.Length-1 do
+ begin
+ Attr := TDOMAttr(Map[I]);
+ if not TDOMAttrDef(Attr).FDeclared then
+ ValidationError('Using undeclared attribute ''%s'' on element ''%s''',[Attr.Name, Element.TagName]);
+ end;
+ end;
end;
function TXMLReader.ParsePEReference: Boolean; // [69]
@@ -2378,6 +2699,7 @@ begin
Result := CheckForChar('%');
if Result then
begin
+ MarkTokenStart;
SkipName;
ExpectChar(';');
end;
@@ -2390,7 +2712,7 @@ begin
begin
ExpectString('SYSTEM');
ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
+ SkipSystemLiteral(SysID);
Result := True;
end
else if FCurChar = 'P' then
@@ -2399,35 +2721,513 @@ begin
ExpectWhitespace;
SkipPubidLiteral;
SetString(PubID, FValue.Buffer, FValue.Length);
+ NormalizeSpaces(PubID);
if SysIdOptional then
begin
SkipWhitespace;
- SkipSystemLiteral(SysID, False);
+ if SkipQuotedLiteral then
+ SetString(SysID, FValue.Buffer, FValue.Length);
end
else
begin
ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
+ SkipSystemLiteral(SysID);
end;
Result := True;
end else
Result := False;
end;
-procedure TXMLReader.ValidationError(const Msg: string;
- const args: array of const);
+procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const);
+var
+ E: EXMLReadError;
+begin
+ if not FValidate then
+ Exit;
+ FDocNotValid := True;
+ E := EXMLReadError.CreateFmt(Msg, Args);
+ // TODO -cErrorReporting: No location for validity errors is reported yet
+ E.FErrorMessage := E.Message;
+ E.FSeverity := esError;
+ CallErrorHandler(E);
+ // if user handler raises exception, control won't get here
+ E.Free;
+end;
+
+procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
+begin
+ try
+ if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
+ FCtrl.FOnError(E);
+ if E.Severity = esFatal then
+ raise E;
+ except
+ if ExceptObject <> E then
+ E.Free;
+ raise;
+ end;
+end;
+
+function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
+begin
+ case AttrDef.FDataType of
+ dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11);
+ dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11);
+ dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
+ dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
+ // IsXmlName() not necessary - enum is never empty and contains valid names
+ dtNotation: Result := AttrDef.HasEnumToken(aValue);
+ else
+ Result := True;
+ end;
+end;
+
+procedure TXMLReader.ValidateElement(Element: TDOMElement);
+var
+ ElDef: TDOMElementDef;
+ elv: TElementValidator;
+
+ procedure Traverse(node: TDOMNode);
+ var
+ cur: TDOMNode;
+ begin
+ cur := node.FirstChild;
+ while Assigned(cur) do
+ begin
+ case cur.NodeType of
+ ELEMENT_NODE:
+ elv.AddElement(TDOMElement(cur));
+ ENTITY_REFERENCE_NODE:
+ Traverse(cur);
+ TEXT_NODE:
+ begin
+ if not TDOMText(cur).MayBeIgnorable then
+ ValidationError('Character data is not allowed in element-only content',[])
+ else
+ if FStandalone and ElDef.FExternallyDeclared then
+ StandaloneError;
+ end;
+ end;
+ cur := cur.NextSibling;
+ end;
+ end;
+
+begin
+ ElDef := CurrentElementDef;
+ if Assigned(ElDef) and ElDef.HasElementDecl then
+ begin
+ case ElDef.ContentType of
+ ctEmpty: begin
+ if Element.HasChildNodes then
+ ValidationError('Element ''%s'' was declared empty but has content', [Element.TagName]);
+ end;
+ ctChildren: begin
+ elv := TElementValidator(FValStack.Last);
+ try
+ Traverse(Element);
+ if not elv.Match then
+ ValidationError('Content of element ''%s'' does not match its declaration',[Element.TagName]);
+ finally
+ elv.FList.Clear;
+ end;
+ end;
+ end;
+ end
+ else // if no DocType, a corresponding error will be reported.
+ if Assigned(FDocType) then
+ ValidationError('Using undeclared element ''%s''',[Element.TagName]);
+end;
+
+// TODO: this should be method of TDOMDocumentTypeEx, but we must pass ErrorHandler in...
+procedure TXMLReader.ValidateDTD;
+var
+ I, J, K: Integer;
+ Entity: TDOMEntity;
+ ElDef: TDOMElementDef;
+ AttDef: TDOMAttrDef;
+ IdFound, NotationFound, HasDefault: Boolean;
+begin
+ for I := 0 to FDocType.Entities.Length-1 do
+ begin
+ Entity := TDOMEntity(FDocType.Entities[I]);
+ if (Entity.NotationName <> '') then
+ CheckNotation(Entity.NotationName);
+ end;
+
+ if Assigned(FDocType.FElementDefs) then
+ begin
+ for I := 0 to FDocType.FElementDefs.Length-1 do
+ begin
+ ElDef := TDOMElementDef(FDocType.FElementDefs[I]);
+ // XML spec permits ATTLIST declarations without corresponding ELEMENT.
+ // Such ATTLISTs are useless for validation, so here we might skip or even
+ // delete all ElDefs that have HasElementDecl=False. However, doing so
+ // breaks some stupid tests, namely sun/id04.
+{
+ if not ElDef.HasElementDecl then
+ Continue;
+}
+ if not ElDef.HasAttributes then
+ Continue;
+ IdFound := False;
+ NotationFound := False;
+ for J := 0 to ElDef.Attributes.Length-1 do
+ begin
+ AttDef := TDOMAttrDef(ElDef.Attributes[J]);
+ HasDefault := AttDef.FDefault in [adDefault, adFixed];
+ case AttDef.FDataType of
+ dtId: begin
+ if IdFound then
+ ValidationError('Only one attribute of type ID is allowed per element',[]);
+ IdFound := True;
+ if HasDefault then
+ ValidationError('An attribute of type ID cannot have a default value',[]);
+ end;
+ dtNotation: begin
+ for K := 0 to Length(AttDef.FEnumeration)-1 do
+ CheckNotation(AttDef.FEnumeration[K]);
+ if NotationFound then
+ ValidationError('Only one attribute of type NOTATION is allowed per element',[]);
+ NotationFound := True;
+ if ElDef.ContentType = ctEmpty then
+ ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
+ end;
+ end; // case
+ if HasDefault and not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
+ ValidationError('Illegal attribute default', []);
+ end;
+ end;
+ end;
+end;
+
+procedure TXMLReader.DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean);
+var
+ TextNode: TDOMText;
+begin
+ // Validating filter part (disabled for the following two reasons):
+ // TODO: per SAX, attribute text should not go here.
+ // ElDefStack is invalid in this case, and we fail...
+ // TODO: for testing whitespace CharRefs, they are contained in internal entities.
+ // Parsing first reports them to Entity, and then they are cloned to real parent
+ // so this method isn't called :(
+{
+ if FCursor.NodeType in [ELEMENT_NODE, ENTITY_REFERENCE_NODE] then
+ begin
+ ElDef := CurrentElementDef;
+ if Assigned(ElDef) and (ElDef.ContentType = ctChildren) then
+ begin
+ if not Whitespace then
+ ValidationError('Character data is not allowed in element-only content',[])
+ else
+ if FStandalone and ElDef.FExternallyDeclared then
+ StandaloneError;
+ end;
+ end;
+}
+ // Document builder part
+ TextNode := Doc.CreateTextNodeBuf(ch, Count);
+ TextNode.MayBeIgnorable := Whitespace;
+ FCursor.AppendChild(TextNode);
+end;
+
+procedure TXMLReader.DoComment(ch: PWideChar; Count: Integer);
+var
+ ElDef: TDOMElementDef;
+begin
+ // validation filter part
+ if FValidate then
+ begin
+ ElDef := CurrentElementDef;
+ if Assigned(ElDef) and (ElDef.ContentType = ctEmpty) then
+ ValidationError('Comments are not allowed within EMPTY elements', []);
+ end;
+
+ // DOM builder part
+ if (not FIgnoreComments) and Assigned(FCursor) then
+ FCursor.AppendChild(Doc.CreateCommentBuf(ch, Count));
+end;
+
+procedure TXMLReader.DoCDSect(ch: PWideChar; Count: Integer);
+var
+ s: WideString;
+ ElDef: TDOMElementDef;
+begin
+ if FValidate then
+ begin
+ ElDef := CurrentElementDef;
+ if Assigned(ElDef) and (ElDef.ContentType = ctChildren) then
+ ValidationError('CDATA sections are not allowed in element-only content',[]);
+ end;
+ if not FCDSectionsAsText then
+ begin
+ SetString(s, ch, Count);
+ // SAX: LexicalHandler.StartCDATA;
+ // SAX: ContentHandler.Characters(...);
+ FCursor.AppendChild(doc.CreateCDATASection(s));
+ // SAX: LexicalHandler.EndCDATA;
+ end
+ else
+ FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count));
+end;
+
+procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString);
+var
+ Notation: TDOMNotationEx;
+begin
+ if FDocType.Notations.GetNamedItem(aName) = nil then
+ begin
+ Notation := TDOMNotationEx(TDOMNotation.Create(doc));
+ Notation.FName := aName;
+ Notation.FPublicID := aPubID;
+ Notation.FSystemID := aSysID;
+ FDocType.Notations.SetNamedItem(Notation);
+ end
+ else
+ ValidationError('Duplicate notation declaration: ''%s''', [aName]);
+end;
+
+procedure TXMLReader.PushVC(aElDef: TDOMElementDef);
+begin
+ FValStack.Add(TElementValidator.Create(aElDef));
+end;
+
+procedure TXMLReader.PopVC;
+var
+ Validator: TObject;
+begin
+ with FValStack do
+ begin
+ Validator := TObject(Last);
+ Delete(Count-1);
+ Validator.Free;
+ end;
+end;
+
+function TXMLReader.CurrentElementDef: TDOMElementDef;
+begin
+ if FValStack.Count > 0 then
+ Result := TElementValidator(FValStack.Last).FElementDef
+ else
+ Result := nil;
+end;
+
+{ TDOMAttrDef }
+
+function TDOMAttrDef.AddEnumToken(const aValue: WideString): Boolean;
+var
+ I, L: Integer;
+begin
+ // TODO: this implementaion is the slowest possible...
+ Result := False;
+ L := Length(FEnumeration);
+ for I := 0 to L-1 do
+ begin
+ if aValue = FEnumeration[I] then
+ Exit;
+ end;
+ SetLength(FEnumeration, L+1);
+ FEnumeration[L] := aValue;
+ Result := True;
+end;
+
+function TDOMAttrDef.HasEnumToken(const aValue: WideString): Boolean;
+var
+ I: Integer;
+begin
+ Result := True;
+ if Length(FEnumeration) = 0 then
+ Exit;
+ for I := 0 to Length(FEnumeration)-1 do
+ begin
+ if FEnumeration[I] = aValue then
+ Exit;
+ end;
+ Result := False;
+end;
+
+{ TElementValidator }
+
+procedure TElementValidator.AddElement(aNode: TDOMElement);
+begin
+ FList.Add(aNode);
+end;
+
+constructor TElementValidator.Create(aElDef: TDOMElementDef);
+begin
+ inherited Create;
+ FElementDef := aElDef;
+ if Assigned(FElementDef) then
+ FCurCP := FElementDef.RootCP;
+ FList := TList.Create;
+end;
+
+destructor TElementValidator.Destroy;
+begin
+ FList.Free;
+ inherited Destroy;
+end;
+
+function TElementValidator.IsElementAllowed(const aName: DOMString): Boolean;
+var
+ I: Integer;
+begin
+ Inc(FChildElementCount);
+ Result := True;
+ // if element is not declared, non-validity has been already reported, no need to report again...
+ if FElementDef = nil then
+ Exit;
+ { for mixed content type it is easy }
+ if FElementDef.ContentType = ctMixed then
+ begin
+ for I := 0 to FElementDef.RootCP.ChildCount-1 do
+ begin
+ if aName = FElementDef.RootCP.Children[I].Name then
+ Exit;
+ end;
+ Result := False;
+ Exit;
+ end;
+ { for empty, even more easier }
+ if FElementDef.ContentType = ctEmpty then
+ begin
+ Result := False;
+ Exit;
+ end;
+
+
+end;
+
+function TElementValidator.Match: Boolean;
begin
- // TODO: just a stub now
- FInvalid := True;
+ FIndex := 0;
+ Result := (FElementDef.RootCP.MatchNodeList(FList, FIndex)) and (FIndex = FList.Count);
end;
+{ TContentParticle }
+function TContentParticle.Add: TContentParticle;
+begin
+ Result := TContentParticle.Create;
+ Result.FParent := Self;
+ FChildren.Add(Result);
+end;
+constructor TContentParticle.Create;
+begin
+ inherited Create;
+ FChildren := TList.Create;
+end;
+
+destructor TContentParticle.Destroy;
+var
+ I: Integer;
+begin
+ for I := FChildren.Count-1 downto 0 do
+ TObject(FChildren[I]).Free;
+ FChildren.Free;
+ inherited Destroy;
+end;
+
+function TContentParticle.GetChild(Index: Integer): TContentParticle;
+begin
+ Result := TContentParticle(FChildren[Index]);
+end;
+
+function TContentParticle.GetChildCount: Integer;
+begin
+ Result := FChildren.Count;
+end;
+
+function TContentParticle.InternalMatch(List: TList; var Index: Integer): Boolean;
+var
+ I: Integer;
+ TempIndex, RestIndex, MatchNumber: Integer;
+begin
+ if CPType = ctName then
+ begin
+ Result := (Index < List.Count) and (TDOMElement(List[Index]).TagName = Name);
+ if Result then
+ Inc(Index);
+ end
+ else if CPType = ctChoice then
+ begin
+ RestIndex := Index;
+ Result := False;
+ MatchNumber := 0;
+ for I := 0 to ChildCount-1 do
+ begin
+ TempIndex := Index;
+ if Children[I].MatchNodeList(List, TempIndex) then
+ begin
+ Result := True;
+ if Index <> TempIndex then // Do not count matching empty expressions
+ begin
+ Inc(MatchNumber);
+ if MatchNumber > 1 then
+ Break;
+ RestIndex := TempIndex;
+ end;
+ end else if MatchNumber > 1 then Break;
+ end;
+
+ if Result then
+ Index := RestIndex;
+ end
+ else // ctSeq
+ begin
+ MatchNumber := 0;
+ TempIndex := Index;
+ Result := False;
+ for I := 0 to ChildCount-1 do
+ begin
+ Result := Children[I].MatchNodeList(List, TempIndex);
+ if not Result then Break;
+ end;
+
+ if Result then
+ Index := TempIndex;
+ if MatchNumber > 1 then
+ Result := False;
+ end;
+end;
+
+function TContentParticle.MatchNodeList(List: TList; var Index: Integer): Boolean;
+var
+ Saved: Integer;
+begin
+ Result := InternalMatch(List, Index) or not (CPQuant in [cqOnce, cqOnceOrMore]);
+ if Result and (CPQuant in [cqZeroOrMore, cqOnceOrMore]) then
+ begin
+ Saved := Index;
+ while Index < List.Count do
+ begin
+ if not InternalMatch(List, Saved) or (Index = Saved) then
+ Break;
+ Index := Saved;
+ end;
+ end;
+end;
+
+{ TDOMElementDef }
+
+constructor TDOMElementDef.Create(aOwner: TDOMDocument);
+begin
+ inherited Create(aOwner);
+ RootCP := TContentParticle.Create;
+end;
+
+destructor TDOMElementDef.Destroy;
+begin
+ RootCP.Free;
+ inherited Destroy;
+end;
+
+{ plain calls }
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
ADoc := nil;
Src := TXMLFileInputSource.Create(f);
@@ -2444,7 +3244,7 @@ end;
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
@@ -2479,7 +3279,7 @@ end;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
Reader := TXMLReader.Create;
try
@@ -2494,7 +3294,7 @@ end;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
Reader := TXMLReader.Create;
try
@@ -2527,7 +3327,7 @@ end;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
@@ -2544,7 +3344,7 @@ end;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
var
Reader: TXMLReader;
- Src: TXMLInputSource;
+ Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
@@ -2577,4 +3377,6 @@ begin
end;
+
+
end.
diff --git a/packages/fcl-xml/src/xmlutils.pp b/packages/fcl-xml/src/xmlutils.pp
new file mode 100644
index 0000000000..3f39d6e6a9
--- /dev/null
+++ b/packages/fcl-xml/src/xmlutils.pp
@@ -0,0 +1,222 @@
+{
+ This file is part of the Free Component Library
+
+ XML utility routines.
+ Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
+
+ See the file COPYING.FPC, included in this distribution,
+ for details about the copyright.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+unit xmlutils;
+
+interface
+
+uses
+ SysUtils;
+
+function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean;
+function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean;
+function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean;
+function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean;
+function IsValidXmlEncoding(const Value: WideString): Boolean;
+function Xml11NamePages: PByteArray;
+procedure NormalizeSpaces(var Value: WideString);
+
+{$i names.inc}
+
+implementation
+
+var
+ Xml11Pg: PByteArray = nil;
+
+function Xml11NamePages: PByteArray;
+var
+ I: Integer;
+ p: PByteArray;
+begin
+ if Xml11Pg = nil then
+ begin
+ GetMem(p, 512);
+ for I := 0 to 255 do
+ p^[I] := ord(Byte(I) in Xml11HighPages);
+ p^[0] := 2;
+ p^[3] := $2c;
+ p^[$20] := $2a;
+ p^[$21] := $2b;
+ p^[$2f] := $29;
+ p^[$30] := $2d;
+ p^[$fd] := $28;
+
+ Move(p^, p^[256], 256);
+ p^[$100] := $19;
+ p^[$103] := $2E;
+ p^[$120] := $2F;
+ Xml11Pg := p;
+ end;
+ Result := Xml11Pg;
+end;
+
+function IsXml11Char(const Value: WideString; var Index: Integer): Boolean;
+begin
+ if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
+ begin
+ Inc(Index);
+ Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
+ end
+ else
+ Result := False;
+end;
+
+function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean;
+var
+ Pages: PByteArray;
+ I: Integer;
+begin
+ Result := False;
+ if Xml11 then
+ Pages := Xml11NamePages
+ else
+ Pages := @NamePages;
+
+ I := 1;
+ if (Value = '') or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
+ (Xml11 and IsXml11Char(Value, I))) then
+ Exit;
+ Inc(I);
+ while I <= Length(Value) do
+ begin
+ if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+ (Xml11 and IsXml11Char(Value, I))) then
+ Exit;
+ Inc(I);
+ end;
+ Result := True;
+end;
+
+function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean;
+var
+ Pages: PByteArray;
+ I: Integer;
+ Offset: Integer;
+begin
+ if Xml11 then
+ Pages := Xml11NamePages
+ else
+ Pages := @NamePages;
+ Result := False;
+ if Value = '' then
+ Exit;
+ I := 1;
+ Offset := 0;
+ while I <= Length(Value) do
+ begin
+ if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
+ (Xml11 and IsXml11Char(Value, I))) then
+ begin
+ if (I = Length(Value)) or (Value[I] <> #32) then
+ Exit;
+ Offset := 0;
+ Inc(I);
+ Continue;
+ end;
+ Offset := $100;
+ Inc(I);
+ end;
+ Result := True;
+end;
+
+function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean;
+var
+ I: Integer;
+ Pages: PByteArray;
+begin
+ if Xml11 then
+ Pages := Xml11NamePages
+ else
+ Pages := @NamePages;
+ Result := False;
+ if Value = '' then
+ Exit;
+ I := 1;
+ while I <= Length(Value) do
+ begin
+ if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+ (Xml11 and IsXml11Char(Value, I))) then
+ Exit;
+ Inc(I);
+ end;
+ Result := True;
+end;
+
+function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean;
+var
+ I: Integer;
+ Pages: PByteArray;
+begin
+ if Xml11 then
+ Pages := Xml11NamePages
+ else
+ Pages := @NamePages;
+ I := 1;
+ Result := False;
+ if Value = '' then
+ Exit;
+ while I <= Length(Value) do
+ begin
+ if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
+ (Xml11 and IsXml11Char(Value, I))) then
+ begin
+ if (I = Length(Value)) or (Value[I] <> #32) then
+ Exit;
+ end;
+ Inc(I);
+ end;
+ Result := True;
+end;
+
+function IsValidXmlEncoding(const Value: WideString): Boolean;
+var
+ I: Integer;
+begin
+ Result := False;
+ if (Value = '') or (Value[1] > #255) or not (char(Value[1]) in ['A'..'Z', 'a'..'z']) then
+ Exit;
+ for I := 2 to Length(Value) do
+ if (Value[I] > #255) or not (char(Value[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
+ Exit;
+ Result := True;
+end;
+
+procedure NormalizeSpaces(var Value: WideString);
+var
+ I, J: Integer;
+begin
+ I := Length(Value);
+ // speed: trim only whed needed
+ if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
+ Value := Trim(Value);
+ I := 1;
+ while I < Length(Value) do
+ begin
+ if Value[I] = #32 then
+ begin
+ J := I+1;
+ while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
+ if J-I > 1 then Delete(Value, I+1, J-I-1);
+ end;
+ Inc(I);
+ end;
+end;
+
+initialization
+
+finalization
+ if Assigned(Xml11Pg) then
+ FreeMem(Xml11Pg);
+
+end.
diff --git a/packages/fcl-xml/src/xmlwrite.pp b/packages/fcl-xml/src/xmlwrite.pp
index b3314d2128..95a420cb7b 100644
--- a/packages/fcl-xml/src/xmlwrite.pp
+++ b/packages/fcl-xml/src/xmlwrite.pp
@@ -44,7 +44,6 @@ implementation
uses SysUtils;
type
- TCharacters = set of Char;
TSpecialCharCallback = procedure(c: WideChar) of object;
TXMLWriter = class(TObject)
@@ -64,7 +63,7 @@ type
procedure wrtLineEnd; {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtQuotedLiteral(const ws: WideString);
- procedure ConvWrite(const s: WideString; const SpecialChars: TCharacters;
+ procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
const SpecialCharCallback: TSpecialCharCallback);
procedure AttrSpecialCharCallback(c: WideChar);
procedure TextNodeSpecialCharCallback(c: WideChar);
@@ -297,7 +296,7 @@ const
AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
TextSpecialChars = ['<', '>', '&'];
-procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TCharacters;
+procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
const SpecialCharCallback: TSpecialCharCallback);
var
StartPos, EndPos: Integer;
@@ -362,6 +361,7 @@ begin
COMMENT_NODE: VisitComment(node);
DOCUMENT_NODE: VisitDocument(node);
DOCUMENT_TYPE_NODE: VisitDocumentType(node);
+ ENTITY_NODE,
DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
end;
end;
@@ -384,7 +384,8 @@ begin
for i := 0 to node.Attributes.Length - 1 do
begin
attr := node.Attributes.Item[i];
- VisitAttribute(attr);
+ if TDOMAttr(attr).Specified then
+ VisitAttribute(attr);
end;
Child := node.FirstChild;
if Child = nil then
@@ -532,18 +533,19 @@ procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
begin
wrtStr('<!DOCTYPE ');
wrtStr(Node.NodeName);
+ wrtChr(' ');
with TDOMDocumentType(Node) do
begin
if PublicID <> '' then
begin
- wrtStr(' PUBLIC ');
+ wrtStr('PUBLIC ');
wrtQuotedLiteral(PublicID);
wrtChr(' ');
wrtQuotedLiteral(SystemID);
end
else if SystemID <> '' then
begin
- wrtStr(' SYSTEM ');
+ wrtStr('SYSTEM ');
wrtQuotedLiteral(SystemID);
end;
if InternalSubset <> '' then
@@ -560,6 +562,7 @@ procedure TXMLWriter.VisitFragment(Node: TDOMNode);
var
Child: TDOMNode;
begin
+ // TODO: TextDecl is probably needed
// Fragment itself should not be written, only its children should...
Child := Node.FirstChild;
while Assigned(Child) do
diff --git a/packages/fcl-xml/tests/xmlts.pp b/packages/fcl-xml/tests/xmlts.pp
new file mode 100644
index 0000000000..62903d8a0a
--- /dev/null
+++ b/packages/fcl-xml/tests/xmlts.pp
@@ -0,0 +1,778 @@
+{
+ This file is part of the Free Component Library (FCL)
+
+ FCL test runner for OASIS/NIST XML test suite
+ It is somewhat based on 'harness.js' script
+ (see http://xmlconf.sourceforge.net)
+ Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
+
+ See the file COPYING.FPC, included in this distribution,
+ for details about the copyright.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+program xmlts;
+
+{$IFDEF FPC}
+{$MODE OBJFPC}{$H+}
+{$ENDIF}
+{$APPTYPE CONSOLE}
+
+uses
+ SysUtils,
+ Classes,
+ DOM,
+ XMLRead,
+ XMLWrite,
+ UriParser;
+
+const
+ harness = 'Pascal version';
+ version = '0.0.1 alpha :)';
+ parser = 'FCL XML parser';
+ parserName = parser;
+ os = 'Unknown OS';
+ runtime = 'FPC RTL';
+
+
+type
+ TDiagCategory = (dcInfo, dcNegfail, dcFail, dcPass);
+
+ TTestSuite = class
+ private
+ FTemplate: TXMLDocument;
+ FParser: TDOMParser;
+ FPassed, FFailCount: Integer;
+ FFalsePasses: Integer;
+ FRootUri: string;
+ FTemplateName: string;
+ FSuiteName: string;
+ FDoc: TXMLDocument;
+ FValidating: Boolean;
+ FSuiteTitle: DOMString;
+ FState: DOMString;
+ FSkipped: Integer;
+ FTotal: Integer;
+ table_valid: TDOMNode;
+ table_output: TDOMNode;
+ table_invalid: TDOMNode;
+ table_not_wf: TDOMNode;
+ table_informative: TDOMNode;
+ FValError: string;
+ FTestID: DOMString;
+ procedure LoadTemplate(const Name: string);
+ procedure HandleTemplatePIs(Element: TDOMNode);
+ procedure Diagnose(Element, Table: TDOMNode; Category: TDiagCategory; const Error: DOMString);
+ procedure DiagnoseOut(const ErrorMsg: DOMString);
+ function CompareNodes(actual, correct: TDOMNode; out Msg: string): Boolean;
+ procedure Canonicalize(node: TDOMNode);
+ procedure ErrorHandler(Error: EXMLReadError);
+ public
+ constructor Create;
+ procedure Run(const Tests: string);
+ procedure RunTest(Element: TDOMElement);
+ destructor Destroy; override;
+ end;
+
+function GetBaseURI(Element: TDOMNode; const DocumentURI: string): string;
+var
+ Ent: TDOMNode;
+ Uri1, Uri2, s: WideString;
+begin
+ case Element.NodeType of
+ ELEMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE,
+ PROCESSING_INSTRUCTION_NODE, COMMENT_NODE, DOCUMENT_TYPE_NODE:
+ if Assigned(Element.ParentNode)
+ then Result := GetBaseURI(Element.ParentNode, DocumentURI)
+ else Result := '';
+
+ ATTRIBUTE_NODE: begin
+ Result := '';
+ if Assigned(TDomAttr(Element).OwnerElement) then
+ begin
+ Result := GetBaseURI(TDomAttr(Element).OwnerElement, DocumentURI);
+ end;
+ end;
+
+ ENTITY_REFERENCE_NODE: begin
+ Ent := Element.OwnerDocument.DocType.Entities.GetNamedItem(Element.NodeName);
+ if Assigned(Ent) and (TDOMEntity(Ent).SystemID <> '') then
+ begin
+ Uri1 := TDOMEntity(Ent).SystemID;
+ if IsAbsoluteURI(Uri1) then
+ begin
+ Result := Uri1;
+ end else begin
+ Uri2 := GetBaseURI(Element.ParentNode, DocumentUri);
+ ResolveRelativeUri(Uri2, Uri1, s);
+ Result := s;
+ end;
+ end
+ else
+ begin
+ if Assigned(Element.ParentNode)
+ then Result := GetBaseURI(Element.ParentNode, DocumentURI)
+ else Result := '';
+ end;
+ end;
+
+ DOCUMENT_NODE: Result := DocumentURI;
+ else
+ Result := '';
+ end;
+end;
+
+{ TTestSuite }
+
+constructor TTestSuite.Create;
+begin
+ inherited Create;
+ FParser := TDOMParser.Create;
+ FParser.Options.PreserveWhitespace := True;
+end;
+
+procedure TTestSuite.ErrorHandler(Error: EXMLReadError);
+begin
+ if Error.Severity = esError then
+ begin
+ FValError := Error.Message;
+{ uncomment the line below to verify that the suite correctly handles
+ exception raised from the handler }
+// Abort;
+ end;
+end;
+
+procedure TTestSuite.LoadTemplate(const Name: string);
+var
+ tables: TDOMNodeList;
+ I: Integer;
+ id: DOMString;
+ el: TDOMElement;
+begin
+ ReadXMLFile(FTemplate, Name);
+ tables := FTemplate.DocumentElement.GetElementsByTagName('table');
+ try
+ for I := 0 to tables.Count-1 do
+ begin
+ el := TDOMElement(tables.Item[I]);
+ id := el['id'];
+ if id = 'valid' then
+ table_valid := el
+ else if ((id = 'invalid-negative') and FValidating) or ((id = 'invalid-positive') and not FValidating) then
+ table_invalid := el
+ else if id = 'valid-output' then
+ table_output := el
+ else if id = 'not-wf' then
+ table_not_wf := el
+ else if id = 'error' then
+ table_informative := el;
+ end;
+ finally
+ tables.Free;
+ end;
+end;
+
+destructor TTestSuite.Destroy;
+begin
+ FDoc.Free;
+ FTemplate.Free;
+ FParser.Free;
+ inherited;
+end;
+
+procedure TTestSuite.HandleTemplatePIs(Element: TDOMNode);
+var
+ Children: TDOMNodeList;
+ Child: TDOMNode;
+ NewChild: TDOMNode;
+ Remove: Boolean;
+ Index: Integer;
+ Data: DOMString;
+begin
+ Children := element.childNodes;
+ Remove := False;
+ Index := 0;
+
+ repeat
+ Child := Children.Item[Index];
+ if Child = nil then Break;
+ Inc(index);
+
+ // inside a rejected <?if ...?>...<?endif?>
+ if Remove and (child.nodeType <> PROCESSING_INSTRUCTION_NODE) then
+ begin
+ Element.removeChild(child);
+ Dec(Index);
+ Continue;
+ end;
+ if Child.hasChildNodes then
+ begin
+ HandleTemplatePIs(Child);
+ Continue;
+ end;
+
+ if Child.nodeType <> PROCESSING_INSTRUCTION_NODE then
+ Continue;
+
+ Data := Child.NodeValue;
+
+ if Child.NodeName = 'run-id' then
+ begin
+ if Data = 'name' then
+ newChild := FTemplate.createTextNode(parser)
+ else if Data = 'description' then
+ newChild := FTemplate.createTextNode (parserName)
+ else if Data = 'general-entities' then
+ newChild := FTemplate.createTextNode('included')
+ else if Data = 'parameter-entities' then
+ newChild := FTemplate.createTextNode ('included')
+ else if Data = 'type' then
+ begin
+ if FValidating then
+ Data := 'Validating'
+ else
+ Data := 'Non-Validating';
+ newChild := FTemplate.createTextNode(Data);
+ end
+ // ... test run description
+ else if Data = 'date' then
+ newChild := FTemplate.createTextNode(DateTimeToStr(Now))
+ else if Data = 'harness' then
+ newChild := FTemplate.createTextNode(harness)
+ else if Data = 'java' then
+ newChild := FTemplate.createTextNode(runtime)
+ else if Data = 'os' then
+ newChild := FTemplate.createTextNode(os)
+ else if Data = 'testsuite' then
+ newChild := FTemplate.createTextNode(FSuiteTitle)
+ else if Data = 'version' then
+ newChild := FTemplate.createTextNode(version)
+ // ... test result info
+ else if Data = 'failed' then
+ newChild := FTemplate.createTextNode(IntToStr(FFailCount))
+ else if Data = 'passed' then
+ newChild := FTemplate.createTextNode(IntToStr(FPassed))
+ else if Data = 'passed-negative' then
+ newChild := FTemplate.createTextNode(IntToStr(FFalsePasses))
+ else if Data = 'skipped' then
+ newChild := FTemplate.createTextNode(IntToStr(FSkipped))
+ else if Data = 'status' then
+ newChild := FTemplate.createTextNode (FState);
+
+ Element.replaceChild (newChild, child);
+ Continue;
+ end
+
+ // if/endif don't nest, and always have the same parent
+ // we rely on those facts here!
+ else if Child.NodeName = 'if' then
+ begin
+ Remove := not (((Data = 'validating') and FValidating) or
+ ((Data = 'nonvalidating') and not FValidating));
+ element.removeChild(child);
+ Dec(Index);
+ Continue;
+ end
+ else if Child.NodeName = 'endif' then
+ begin
+ Remove := False;
+ element.removeChild(child);
+ Dec(Index);
+ Continue;
+ end;
+ until False;
+ Children.Free;
+end;
+
+
+procedure TTestSuite.Run(const Tests: string);
+var
+ Cases: TDOMNodeList;
+ I: Integer;
+begin
+ FRootURI := FilenameToURI(Tests);
+ ReadXMLFile(FDoc, Tests);
+ FSuiteTitle := FDoc.DocumentElement['PROFILE'];
+ Cases := FDoc.DocumentElement.GetElementsByTagName('TEST');
+ writeln('Using test suite: ', Tests);
+ writeln;
+ writeln('Testing, validation = ', FValidating);
+ try
+ for I := 0 to Cases.Count-1 do
+ RunTest(Cases.Item[I] as TDOMElement);
+ I := Cases.Count;
+ finally
+ Cases.Free;
+ end;
+
+ FPassed := FTotal-FFailCount;
+ Dec(FPassed, FSkipped);
+
+ writeln('Found ', I, ' basic test cases.');
+ writeln('Found ', FTotal, ' overall test cases.');
+ writeln('Skipped: ', FSkipped);
+ writeln('Passed: ', FPassed);
+ writeln('Failed: ', FFailCount);
+ writeln('Negative passes: ', FFalsePasses, ' (need examination).');
+ writeln;
+
+ if FPassed = 0 then
+ FState := 'N/A'
+ else if FPassed = FTotal then
+ FState := 'CONFORMS (provisionally)'
+ else
+ FState := 'DOES NOT CONFORM';
+
+end;
+
+procedure TTestSuite.RunTest(Element: TDOMElement);
+var
+ s: UTF8string;
+ TestType: DOMString;
+ TempDoc, RefDoc: TXMLDocument;
+ table: TDOMNode;
+ Positive: Boolean;
+ outURI: UTF8string;
+ FailMsg: string;
+ docNode, refNode: TDOMNode;
+ docMap, refMap: TDOMNamedNodeMap;
+ docN, refN: TDOMNotation;
+ I: Integer;
+ root: UTF8String;
+begin
+ FTestID := Element['ID'];
+ TestType := Element['TYPE'];
+ root := GetBaseURI(Element, FRootUri);
+ ResolveRelativeURI(root, UTF8Encode(Element['URI']), s);
+
+ table := nil;
+ outURI := '';
+ if TestType = 'not-wf' then
+ begin
+ table := table_not_wf;
+ Positive := False;
+ end
+ else if TestType = 'error' then
+ begin
+ table := table_informative;
+ Positive := False;
+ end
+ else if TestType = 'valid' then
+ begin
+ if Element.hasAttribute('OUTPUT') then
+ ResolveRelativeURI(root, UTF8Encode(Element['OUTPUT']), outURI);
+ table := table_valid;
+ Positive := True;
+ end
+ else if TestType = 'invalid' then
+ begin
+ table := table_invalid;
+ Positive := not FValidating;
+ end;
+
+ if TestType <> 'error' then
+ begin
+ Inc(FTotal);
+ if outURI <> '' then Inc(FTotal);
+ end;
+
+ FailMsg := '';
+ FValError := '';
+ TempDoc := nil;
+ try
+ try
+ FParser.Options.Validate := FValidating;
+ FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler;
+ FParser.ParseUri(s, TempDoc);
+ except
+ on E: Exception do
+ if E.ClassType <> EAbort then
+ FailMsg := E.Message;
+ end;
+ if FailMsg <> '' then // fatal errors take precedence
+ FValError := '';
+
+ if not Positive then // must have been failed
+ begin
+ if TestType = 'error' then
+ begin
+ if FailMsg <> '' then
+ Diagnose(element, table, dcInfo, FailMsg)
+ else
+ Diagnose(element, table, dcInfo, FValError);
+ end
+ else if (FailMsg = '') and (FValError = '') then
+ begin
+ Inc(FFailCount);
+ Diagnose(element, table, dcNegfail, '');
+ end
+ else // FailMsg <> '' or FValError <> '' -> actually failed
+ begin
+ Inc(FFalsePasses);
+ if FailMsg <> '' then
+ Diagnose(Element, table, dcPass, FailMsg)
+ else
+ Diagnose(Element, table, dcPass, FValError);
+ end;
+ Exit;
+ end
+ else // must have been succeeded
+ if (FailMsg <> '') or (FValError <> '') then
+ begin
+ Inc(FFailCount);
+ if FailMsg <> '' then
+ Diagnose(Element, table, dcFail, FailMsg)
+ else
+ Diagnose(Element, table, dcFail, FValError);
+ if (outURI <> '') and (FailMsg <> '') then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('[ input failed, no output to test ]');
+ end;
+ Exit;
+ end;
+
+ if outURI = '' then Exit;
+ Canonicalize(TempDoc);
+ TempDoc.DocumentElement.Normalize;
+ try
+ // reference data must be parsed in non-validating mode because it contains DTDs
+ // only when Notations need to be reported
+ FParser.Options.Validate := False;
+ FParser.ParseUri(outURI, RefDoc);
+ try
+ docNode := TempDoc.FirstChild;
+ refNode := RefDoc.FirstChild;
+ repeat
+ if refNode = nil then
+ begin
+ if docNode <> nil then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('Extra data: ' + docNode.NodeName + ' / ' + docNode.NodeValue);
+ end;
+ Exit;
+ end;
+ if docNode = nil then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('Missing data: ' + refNode.NodeName + ' / ' + refNode.NodeValue);
+ Exit;
+ end;
+
+ if refNode.NodeType = DOCUMENT_TYPE_NODE then
+ begin
+ if docNode.NodeType <> DOCUMENT_TYPE_NODE then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('[ no doctype from parsing testcase ]');
+ Exit;
+ end;
+
+ refMap := TDOMDocumentType(refNode).Notations;
+ docMap := TDOMDocumentType(docNode).Notations;
+
+ for I := 0 to refMap.Length-1 do
+ begin
+ refN := TDOMNotation(refMap[I]);
+ docN := TDOMNotation(docMap.GetNamedItem(refMap[I].NodeName));
+ if not Assigned(docN) then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('missing notation declaration: ' + refN.NodeName);
+ Exit;
+ end;
+ if (refN.PublicID <> docN.PublicID) or (refN.SystemID <> docN.SystemID) then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('incorrect notation declaration: ' + refN.NodeName);
+ Exit;
+ end;
+ end;
+
+ refNode := refNode.NextSibling;
+ docNode := docNode.NextSibling;
+ Continue;
+ end;
+
+ if docNode.NodeType = DOCUMENT_TYPE_NODE then // skip DocType
+ docNode := docNode.NextSibling;
+
+ if not CompareNodes(docNode, refNode, FailMsg) then
+ begin
+ Inc(FFailCount);
+ DiagnoseOut(FailMsg);
+ Exit;
+ end;
+
+ docNode := docNode.NextSibling;
+ refNode := refNode.NextSibling;
+ until False;
+ finally
+ RefDoc.Free;
+ end;
+ except
+ on E: Exception do
+ begin
+ Inc(FFailCount);
+ DiagnoseOut('[ can''t read reference data: '+E.Message+' ]');
+ end;
+ end;
+ finally
+ TempDoc.Free;
+ end;
+end;
+
+
+procedure TTestSuite.Diagnose(Element, Table: TDOMNode; Category: TDiagCategory;
+ const Error: DOMString);
+var
+ tr, td, txt, tmp: TDOMNode;
+ s: DOMString;
+begin
+ tr := FTemplate.CreateElement('tr');
+ if Assigned(Element) then // column 1: section/chapter, if known
+ begin
+ s := TDOMElement(Element)['SECTIONS'];
+ td := FTemplate.CreateElement('td');
+ td.AppendChild(FTemplate.CreateTextNode(s));
+ tr.AppendChild(td);
+ end;
+
+ td := FTemplate.CreateElement('td'); // column 2: test ID
+ td.AppendChild(FTemplate.CreateTextNode(FTestID));
+ tr.AppendChild(td);
+ // third column is description
+ if Assigned(Element) then
+ begin
+ td := FTemplate.CreateElement('td');
+ txt := Element.FirstChild;
+ while Assigned(txt) do
+ begin
+ td.AppendChild(txt.CloneNode(true, FTemplate));
+ txt := txt.NextSibling;
+ end;
+ tr.AppendChild(td);
+ end;
+ // fourth column is reason
+ td := FTemplate.CreateElement('td');
+ if Element = nil then
+ s := Error
+ else if Category <> dcInfo then
+ begin
+ if Error <> '' then
+ begin
+ if FValError <> '' then
+ s := '(error) ' + Error
+ else
+ s := '(fatal) ' + Error;
+ end
+ else
+ s := '[wrongly accepted]';
+ end
+ else // informative
+ begin
+ if Error <> '' then
+ s := Error
+ else
+ s := '[accepted]';
+ end;
+ // TODO: use &nbsp if text is empty
+ txt := FTemplate.CreateTextNode(s);
+
+ if (Category <> dcPass) and (Category <> dcInfo) then
+ begin
+ tmp := FTemplate.CreateElement('em');
+ tmp.AppendChild(txt);
+ txt := tmp;
+ TDOMElement(td)['bgcolor'] := '#ffaacc';
+ end;
+ td.AppendChild(txt);
+ tr.AppendChild(td);
+
+ table.AppendChild(tr);
+end;
+
+procedure TTestSuite.DiagnoseOut(const ErrorMsg: DOMString);
+var
+ tr, td, txt: TDOMNode;
+begin
+ tr := FTemplate.CreateElement('tr');
+
+ td := FTemplate.CreateElement('td');
+ td.AppendChild(FTemplate.CreateTextNode(FTestID));
+ tr.AppendChild(td);
+
+ td := FTemplate.CreateElement('td');
+ txt := FTemplate.CreateElement('em');
+ txt.AppendChild(FTemplate.CreateTextNode(ErrorMsg));
+ td.AppendChild(txt);
+ TDOMElement(td)['bgcolor'] := '#ffaacc';
+ tr.AppendChild(td);
+ table_output.AppendChild(tr);
+end;
+
+
+procedure TTestSuite.Canonicalize(node: TDOMNode);
+var
+ child, work: TDOMNode;
+ Frag: TDOMDocumentFragment;
+begin
+ child := node.FirstChild;
+ while Assigned(child) do
+ begin
+ if child.NodeType = CDATA_SECTION_NODE then
+ begin
+ work := node.OwnerDocument.CreateTextNode(child.NodeValue);
+ node.ReplaceChild(work, child);
+ child := work;
+ end
+ else if child.NodeType = COMMENT_NODE then
+ begin
+ work := child.NextSibling;
+ node.RemoveChild(child);
+ child := work;
+ Continue;
+ end
+ else if child.NodeType = ENTITY_REFERENCE_NODE then
+ begin
+ Frag := node.OwnerDocument.CreateDocumentFragment;
+ try
+ work := child.FirstChild;
+ while Assigned(work) do
+ begin
+ Frag.AppendChild(work.CloneNode(true));
+ work := work.NextSibling;
+ end;
+ work := Frag.FirstChild; // references may be nested
+ if work = nil then
+ work := Child.PreviousSibling;
+
+ node.ReplaceChild(Frag, child);
+ child := work;
+ finally
+ Frag.Free;
+ end;
+ Continue;
+ end;
+ if child.HasChildNodes then
+ Canonicalize(child);
+ child := child.NextSibling;
+ end;
+end;
+
+function TTestSuite.CompareNodes(actual, correct: TDOMNode;
+ out Msg: string): Boolean;
+var
+ actAtts, refAtts: TDOMNamedNodeMap;
+ actList, refList: TDOMNodeList;
+ I: Integer;
+ s1, s2: DOMString;
+begin
+ Msg := '';
+ Result := False;
+ if actual.NodeType <> correct.NodeType then
+ FmtStr(Msg, 'actual.NodeType (%d) != correct.NodeType (%d)', [actual.NodeType, correct.NodeType])
+ else if actual.NodeName <> correct.NodeName then
+ FmtStr(Msg, 'actual.NodeName (%s) != correct.NodeName (%s)', [actual.NodeName, correct.NodeName])
+ else if actual.NodeValue <> correct.NodeValue then
+ FmtStr(Msg, 'actual.NodeValue (%s) != correct.NodeValue (%s)', [actual.NodeValue, correct.NodeValue]);
+ if Msg <> '' then
+ Exit;
+
+ if actual.NodeType = ELEMENT_NODE then
+ begin
+ // first, compare attributes
+ actAtts := actual.Attributes;
+ refAtts := correct.Attributes;
+ if actAtts.Length <> refAtts.Length then
+ begin
+ FmtStr(Msg, 'Element ''%s'': attributes.length (%d) != %d', [actual.NodeName, actAtts.Length, refAtts.Length]);
+ Exit;
+ end;
+ for I := 0 to actAtts.Length -1 do
+ begin
+ s1 := refAtts.GetNamedItem(actAtts[I].NodeName).NodeValue;
+ s2 := actAtts[I].NodeValue;
+ if s1 <> s2 then
+ begin
+ FmtStr(Msg, 'Element ''%s'', attribute ''%s'': actual.AttValue (%s) != correct.AttValue (%s)', [actual.NodeName, actAtts[I].NodeName, s2, s1]);
+ Exit;
+ end;
+ end;
+ // next, compare children
+ actList := actual.ChildNodes;
+ refList := correct.ChildNodes;
+ try
+ if actList.Count <> refList.Count then
+ begin
+ FmtStr(Msg, 'Element ''%s'': actual.ChildNodeCount (%d) != correct.ChildNodeCount (%d)', [actual.NodeName, actList.Count, refList.Count]);
+ Exit;
+ end;
+ for I := 0 to actList.Count -1 do
+ if not CompareNodes(actList[I], refList[I], Msg) then
+ Exit;
+ finally
+ actList.Free;
+ refList.Free;
+ end;
+ end;
+ Result := True;
+end;
+
+
+
+var
+ i: Integer;
+ s: string;
+ SuiteName, ReportName, TemplateName: string;
+ Validation: Boolean;
+begin
+ writeln('FCL driver for OASIS/NIST XML Test Suite');
+ writeln('Copyright (c) 2006 by Sergei Gorelkin');
+ TemplateName := ExtractFilePath(ParamStr(0)) + 'template.xml';
+ if ParamCount < 2 then
+ begin
+ writeln;
+ writeln('Usage: ', ParamStr(0), ' <suite> <report> [-t template][-v]');
+ writeln(' -t: specify report template');
+ writeln(' -v: validating mode');
+ Exit;
+ end;
+
+ SuiteName := ExpandFilename(ParamStr(1));
+ ReportName := ExpandFilename(ParamStr(2));
+ i := 3;
+ Validation := False;
+ while i <= ParamCount do
+ begin
+ s := Lowercase(ParamStr(i));
+ if s = '-v' then
+ Validation := True
+ else if s = '-t' then
+ TemplateName := ExpandFileName(ParamStr(i+1));
+ Inc(i);
+ end;
+
+ with TTestSuite.Create do
+ try
+ FSuiteName := SuiteName;
+ FTemplateName := TemplateName;
+ FValidating := Validation;
+ LoadTemplate(FTemplateName);
+ if Assigned(FTemplate) then
+ begin
+ Run(FSuiteName);
+ HandleTemplatePIs(FTemplate.DocumentElement);
+ writeln('Writing report to: ', ReportName);
+ WriteXMLFile(FTemplate, ReportName);
+ end;
+ finally
+ Free;
+ end;
+
+end.