summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/dtds/HTML4-f.dtd37
-rw-r--r--tests/dtds/HTML4-s.dtd869
-rw-r--r--tests/dtds/HTML4.dcl88
-rw-r--r--tests/dtds/HTML4.dtd1092
-rw-r--r--tests/dtds/HTML4.soc9
-rw-r--r--tests/dtds/HTMLlat1.ent195
-rw-r--r--tests/dtds/HTMLspec.ent77
-rw-r--r--tests/dtds/HTMLsym.ent241
-rw-r--r--tests/test_html_formatter.py32
9 files changed, 2639 insertions, 1 deletions
diff --git a/tests/dtds/HTML4-f.dtd b/tests/dtds/HTML4-f.dtd
new file mode 100644
index 00000000..95520128
--- /dev/null
+++ b/tests/dtds/HTML4-f.dtd
@@ -0,0 +1,37 @@
+<!--
+ This is the HTML 4.0 Frameset DTD, which should be
+ used for documents with frames. This DTD is identical
+ to the HTML 4.0 Transitional DTD except for the
+ content model of the "HTML" element: in frameset
+ documents, the "FRAMESET" element replaces the "BODY"
+ element.
+
+ Draft: $Date: 1999/05/02 15:37:15 $
+
+ Authors:
+ Dave Raggett <dsr@w3.org>
+ Arnaud Le Hors <lehors@w3.org>
+ Ian Jacobs <ij@w3.org>
+
+ Further information about HTML 4.0 is available at:
+
+ http://www.w3.org/TR/REC-html40.
+-->
+<!ENTITY % HTML.Version "-//W3C//DTD HTML 4.0 Frameset//EN"
+ -- Typical usage:
+
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN"
+ "http://www.w3.org/TR/REC-html40/frameset.dtd">
+ <html>
+ <head>
+ ...
+ </head>
+ <frameset>
+ ...
+ </frameset>
+ </html>
+-->
+
+<!ENTITY % HTML.Frameset "INCLUDE">
+<!ENTITY % HTML4.dtd PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+%HTML4.dtd; \ No newline at end of file
diff --git a/tests/dtds/HTML4-s.dtd b/tests/dtds/HTML4-s.dtd
new file mode 100644
index 00000000..8ce7917b
--- /dev/null
+++ b/tests/dtds/HTML4-s.dtd
@@ -0,0 +1,869 @@
+<!--
+ This is HTML 4.0 Strict DTD, which excludes the presentation
+ attributes and elements that W3C expects to phase out as
+ support for style sheets matures. Authors should use the Strict
+ DTD when possible, but may use the Transitional DTD when support
+ for presentation attribute and elements is required.
+
+ HTML 4.0 includes mechanisms for style sheets, scripting,
+ embedding objects, improved support for right to left and mixed
+ direction text, and enhancements to forms for improved
+ accessibility for people with disabilities.
+
+ Draft: $Date: 1999/05/02 15:37:15 $
+
+ Authors:
+ Dave Raggett <dsr@w3.org>
+ Arnaud Le Hors <lehors@w3.org>
+ Ian Jacobs <ij@w3.org>
+
+ Further information about HTML 4.0 is available at:
+
+ http://www.w3.org/TR/REC-html40
+-->
+<!--
+ Typical usage:
+
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"
+ "http://www.w3.org/TR/REC-html40/strict.dtd">
+ <html>
+ <head>
+ ...
+ </head>
+ <body>
+ ...
+ </body>
+ </html>
+
+ The URI used as a system identifier with the public identifier allows
+ the user agent to download the DTD and entity sets as needed.
+
+ The FPI for the Transitional HTML 4.0 DTD is:
+
+ "-//W3C//DTD HTML 4.0 Transitional//EN
+
+ and its URI is:
+
+ http://www.w3.org/TR/REC-html40/loose.dtd
+
+ If you are writing a document that includes frames, use
+ the following FPI:
+
+ "-//W3C//DTD HTML 4.0 Frameset//EN"
+
+ with the URI:
+
+ http://www.w3.org/TR/REC-html40/frameset.dtd
+
+ The following URIs are supported in relation to HTML 4.0
+
+ "http://www.w3.org/TR/REC-html40/strict.dtd" (Strict DTD)
+ "http://www.w3.org/TR/REC-html40/loose.dtd" (Loose DTD)
+ "http://www.w3.org/TR/REC-html40/frameset.dtd" (Frameset DTD)
+ "http://www.w3.org/TR/REC-html40/HTMLlat1.ent" (Latin-1 entities)
+ "http://www.w3.org/TR/REC-html40/HTMLsymbol.ent" (Symbol entities)
+ "http://www.w3.org/TR/REC-html40/HTMLspecial.ent" (Special entities)
+
+ These URIs point to the latest version of each file. To reference
+ this specific revision use the following URIs:
+
+ "http://www.w3.org/TR/REC-html40-971218/strict.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/loose.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/frameset.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent"
+
+-->
+
+<!--================== Imported Names ====================================-->
+
+<!ENTITY % ContentType "CDATA"
+ -- media type, as per [RFC2045]
+ -->
+
+<!ENTITY % ContentTypes "CDATA"
+ -- comma-separated list of media types, as per [RFC2045]
+ -->
+
+<!ENTITY % Charset "CDATA"
+ -- a character encoding, as per [RFC2045]
+ -->
+
+<!ENTITY % Charsets "CDATA"
+ -- a space separated list of character encodings, as per [RFC2045]
+ -->
+
+<!ENTITY % LanguageCode "NAME"
+ -- a language code, as per [RFC1766]
+ -->
+
+<!ENTITY % Character "CDATA"
+ -- a single character from [ISO10646]
+ -->
+
+<!ENTITY % LinkTypes "CDATA"
+ -- space-separated list of link types
+ -->
+
+<!ENTITY % MediaDesc "CDATA"
+ -- single or comma-separated list of media descriptors
+ -->
+
+<!ENTITY % URI "CDATA"
+ -- a Uniform Resource Identifier,
+ see [URI]
+ -->
+
+<!ENTITY % Datetime "CDATA" -- date and time information. ISO date format -->
+
+
+<!ENTITY % Script "CDATA" -- script expression -->
+
+<!ENTITY % StyleSheet "CDATA" -- style sheet data -->
+
+
+
+<!ENTITY % Text "CDATA">
+
+
+<!-- Parameter Entities -->
+
+<!ENTITY % head.misc "SCRIPT|STYLE|META|LINK|OBJECT" -- repeatable head elements -->
+
+<!ENTITY % heading "H1|H2|H3|H4|H5|H6">
+
+<!ENTITY % list "UL | OL">
+
+<!ENTITY % preformatted "PRE">
+
+
+<!--================ Character mnemonic entities =========================-->
+
+<!ENTITY % HTMLlat1 PUBLIC
+ "-//W3C//ENTITIES Latin1//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent">
+%HTMLlat1;
+
+<!ENTITY % HTMLsymbol PUBLIC
+ "-//W3C//ENTITIES Symbols//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent">
+%HTMLsymbol;
+
+<!ENTITY % HTMLspecial PUBLIC
+ "-//W3C//ENTITIES Special//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent">
+%HTMLspecial;
+<!--=================== Generic Attributes ===============================-->
+
+<!ENTITY % coreattrs
+ "id ID #IMPLIED -- document-wide unique id --
+ class CDATA #IMPLIED -- space separated list of classes --
+ style %StyleSheet; #IMPLIED -- associated style info --
+ title %Text; #IMPLIED -- advisory title/amplification --"
+ >
+
+<!ENTITY % i18n
+ "lang %LanguageCode; #IMPLIED -- language code --
+ dir (ltr|rtl) #IMPLIED -- direction for weak/neutral text --"
+ >
+
+<!ENTITY % events
+ "onclick %Script; #IMPLIED -- a pointer button was clicked --
+ ondblclick %Script; #IMPLIED -- a pointer button was double clicked--
+ onmousedown %Script; #IMPLIED -- a pointer button was pressed down --
+ onmouseup %Script; #IMPLIED -- a pointer button was released --
+ onmouseover %Script; #IMPLIED -- a pointer was moved onto --
+ onmousemove %Script; #IMPLIED -- a pointer was moved within --
+ onmouseout %Script; #IMPLIED -- a pointer was moved away --
+ onkeypress %Script; #IMPLIED -- a key was pressed and released --
+ onkeydown %Script; #IMPLIED -- a key was pressed down --
+ onkeyup %Script; #IMPLIED -- a key was released --"
+ >
+
+<!-- Reserved Feature Switch -->
+<!ENTITY % HTML.Reserved "IGNORE">
+
+<!-- The following attributes are reserved for possible future use -->
+<![ %HTML.Reserved; [
+<!ENTITY % reserved
+ "datasrc %URI; #IMPLIED -- a single or tabular Data Source --
+ datafld CDATA #IMPLIED -- the property or column name --
+ dataformatas (plaintext|html) plaintext -- text or html --"
+ >
+]]>
+
+<!ENTITY % reserved "">
+
+<!ENTITY % attrs "%coreattrs; %i18n; %events;">
+
+
+<!--=================== Text Markup ======================================-->
+
+<!ENTITY % fontstyle
+ "TT | I | B | BIG | SMALL">
+
+<!ENTITY % phrase "EM | STRONG | DFN | CODE |
+ SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
+
+<!ENTITY % special
+ "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO">
+
+<!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
+
+<!-- %inline; covers inline or "text-level" elements -->
+<!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
+
+<!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*>
+<!ATTLIST (%fontstyle;|%phrase;)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT (SUB|SUP) - - (%inline;)* -- subscript, superscript -->
+<!ATTLIST (SUB|SUP)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT SPAN - - (%inline;)* -- generic language/style container -->
+<!ATTLIST SPAN
+ %attrs; -- %coreattrs, %i18n, %events --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT BDO - - (%inline;)* -- I18N BiDi over-ride -->
+<!ATTLIST BDO
+ %coreattrs; -- id, class, style, title --
+ lang %LanguageCode; #IMPLIED -- language code --
+ dir (ltr|rtl) #REQUIRED -- directionality --
+ >
+
+
+<!ELEMENT BR - O EMPTY -- forced line break -->
+<!ATTLIST BR
+ %coreattrs; -- id, class, style, title --
+ >
+
+<!--================== HTML content models ===============================-->
+
+<!--
+ HTML has two basic content models:
+
+ %inline; character level elements and text strings
+ %block; block-like elements e.g. paragraphs and lists
+-->
+
+<!ENTITY % block
+ "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT |
+ BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS">
+
+<!ENTITY % flow "%block; | %inline;">
+
+<!--=================== Document Body ====================================-->
+
+<!ELEMENT BODY O O (%block;|SCRIPT)+ +(INS|DEL) -- document body -->
+<!ATTLIST BODY
+ %attrs; -- %coreattrs, %i18n, %events --
+ onload %Script; #IMPLIED -- the document has been loaded --
+ onunload %Script; #IMPLIED -- the document has been removed --
+ >
+
+<!ELEMENT ADDRESS - - (%inline;)* -- information on author -->
+<!ATTLIST ADDRESS
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT DIV - - (%flow;)* -- generic language/style container -->
+<!ATTLIST DIV
+ %attrs; -- %coreattrs, %i18n, %events --
+ %reserved; -- reserved for possible future use --
+ >
+
+
+<!--================== The Anchor Element ================================-->
+
+<!ENTITY % Shape "(rect|circle|poly|default)">
+<!ENTITY % Coords "CDATA" -- comma separated list of lengths -->
+
+<!ELEMENT A - - (%inline;)* -(A) -- anchor -->
+<!ATTLIST A
+ %attrs; -- %coreattrs, %i18n, %events --
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ type %ContentType; #IMPLIED -- advisory content type --
+ name CDATA #IMPLIED -- named link end --
+ href %URI; #IMPLIED -- URI for linked resource --
+ hreflang %LanguageCode; #IMPLIED -- language code --
+ rel %LinkTypes; #IMPLIED -- forward link types --
+ rev %LinkTypes; #IMPLIED -- reverse link types --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ shape %Shape; rect -- for use with client-side image maps --
+ coords %Coords; #IMPLIED -- for use with client-side image maps --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!--================== Client-side image maps ============================-->
+
+<!-- These can be placed in the same document or grouped in a
+ separate document although this isn't yet widely supported -->
+
+<!ELEMENT MAP - - ((%block;)+ | AREA+) -- client-side image map -->
+<!ATTLIST MAP
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #REQUIRED -- for reference by usemap --
+ >
+
+<!ELEMENT AREA - O EMPTY -- client-side image map area -->
+<!ATTLIST AREA
+ %attrs; -- %coreattrs, %i18n, %events --
+ shape %Shape; rect -- controls interpretation of coords --
+ coords %Coords; #IMPLIED -- comma separated list of lengths --
+ href %URI; #IMPLIED -- URI for linked resource --
+ nohref (nohref) #IMPLIED -- this region has no action --
+ alt %Text; #REQUIRED -- short description --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!--================== The LINK Element ==================================-->
+
+<!--
+ Relationship values can be used in principle:
+
+ a) for document specific toolbars/menus when used
+ with the LINK element in document head e.g.
+ start, contents, previous, next, index, end, help
+ b) to link to a separate style sheet (rel=stylesheet)
+ c) to make a link to a script (rel=script)
+ d) by stylesheets to control how collections of
+ html nodes are rendered into printed documents
+ e) to make a link to a printable version of this document
+ e.g. a postscript or pdf version (rel=alternate media=print)
+-->
+
+<!ELEMENT LINK - O EMPTY -- a media-independent link -->
+<!ATTLIST LINK
+ %attrs; -- %coreattrs, %i18n, %events --
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ href %URI; #IMPLIED -- URI for linked resource --
+ hreflang %LanguageCode; #IMPLIED -- language code --
+ type %ContentType; #IMPLIED -- advisory content type --
+ rel %LinkTypes; #IMPLIED -- forward link types --
+ rev %LinkTypes; #IMPLIED -- reverse link types --
+ media %MediaDesc; #IMPLIED -- for rendering on these media --
+ >
+
+<!--=================== Images ===========================================-->
+
+<!-- Length defined in strict DTD for cellpadding/cellspacing -->
+<!ENTITY % Length "CDATA" -- nn for pixels or nn% for percentage length -->
+<!ENTITY % MultiLength "CDATA" -- pixel, percentage, or relative -->
+
+<!ENTITY % MultiLengths "CDATA" -- comma-separated list of MultiLength -->
+
+<!ENTITY % Pixels "CDATA" -- integer representing length in pixels -->
+
+
+<!-- To avoid problems with text-only UAs as well as
+ to make image content understandable and navigable
+ to users of non-visual UAs, you need to provide
+ a description with ALT, and avoid server-side image maps -->
+<!ELEMENT IMG - O EMPTY -- Embedded image -->
+<!ATTLIST IMG
+ %attrs; -- %coreattrs, %i18n, %events --
+ src %URI; #REQUIRED -- URI of image to embed --
+ alt %Text; #REQUIRED -- short description --
+ longdesc %URI; #IMPLIED -- link to long description
+ (complements alt) --
+ height %Length; #IMPLIED -- override height --
+ width %Length; #IMPLIED -- override width --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ ismap (ismap) #IMPLIED -- use server-side image map --
+ >
+
+<!-- USEMAP points to a MAP element which may be in this document
+ or an external document, although the latter is not widely supported -->
+
+<!--==================== OBJECT ======================================-->
+<!--
+ OBJECT is used to embed objects as part of HTML pages
+ PARAM elements should precede other content. SGML mixed content
+ model technicality precludes specifying this formally ...
+-->
+
+<!ELEMENT OBJECT - - (PARAM | %flow;)*
+ -- generic embedded object -->
+<!ATTLIST OBJECT
+ %attrs; -- %coreattrs, %i18n, %events --
+ declare (declare) #IMPLIED -- declare but don't instantiate flag --
+ classid %URI; #IMPLIED -- identifies an implementation --
+ codebase %URI; #IMPLIED -- base URI for classid, data, archive--
+ data %URI; #IMPLIED -- reference to object's data --
+ type %ContentType; #IMPLIED -- content type for data --
+ codetype %ContentType; #IMPLIED -- content type for code --
+ archive %URI; #IMPLIED -- space separated archive list --
+ standby %Text; #IMPLIED -- message to show while loading --
+ height %Length; #IMPLIED -- override height --
+ width %Length; #IMPLIED -- override width --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ name CDATA #IMPLIED -- submit as part of form --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT PARAM - O EMPTY -- named property value -->
+<!ATTLIST PARAM
+ id ID #IMPLIED -- document-wide unique id --
+ name CDATA #REQUIRED -- property name --
+ value CDATA #IMPLIED -- property value --
+ valuetype (DATA|REF|OBJECT) DATA -- How to interpret value --
+ type %ContentType; #IMPLIED -- content type for value
+ when valuetype=ref --
+ >
+
+
+<!--=================== Horizontal Rule ==================================-->
+
+<!ELEMENT HR - O EMPTY -- horizontal rule -->
+<!ATTLIST HR
+ %coreattrs; -- id, class, style, title --
+ %events;
+ >
+
+<!--=================== Paragraphs =======================================-->
+
+<!ELEMENT P - O (%inline;)* -- paragraph -->
+<!ATTLIST P
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--=================== Headings =========================================-->
+
+<!--
+ There are six levels of headings from H1 (the most important)
+ to H6 (the least important).
+-->
+
+<!ELEMENT (%heading;) - - (%inline;)* -- heading -->
+<!ATTLIST (%heading;)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--=================== Preformatted Text ================================-->
+
+<!-- excludes markup for images and changes in font size -->
+<!ENTITY % pre.exclusion "IMG|OBJECT|BIG|SMALL|SUB|SUP">
+
+<!ELEMENT PRE - - (%inline;)* -(%pre.exclusion;) -- preformatted text -->
+<!ATTLIST PRE
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--===================== Inline Quotes ==================================-->
+
+<!ELEMENT Q - - (%inline;)* -- short inline quotation -->
+<!ATTLIST Q
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- URI for source document or msg --
+ >
+
+<!--=================== Block-like Quotes ================================-->
+
+<!ELEMENT BLOCKQUOTE - - (%block;|SCRIPT)+ -- long quotation -->
+<!ATTLIST BLOCKQUOTE
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- URI for source document or msg --
+ >
+
+<!--=================== Inserted/Deleted Text ============================-->
+
+
+<!-- INS/DEL are handled by inclusion on BODY -->
+<!ELEMENT (INS|DEL) - - (%flow;)* -- inserted text, deleted text -->
+<!ATTLIST (INS|DEL)
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- info on reason for change --
+ datetime %Datetime; #IMPLIED -- date and time of change --
+ >
+
+<!--=================== Lists ============================================-->
+
+<!-- definition lists - DT for term, DD for its definition -->
+
+<!ELEMENT DL - - (DT|DD)+ -- definition list -->
+<!ATTLIST DL
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT DT - O (%inline;)* -- definition term -->
+<!ELEMENT DD - O (%flow;)* -- definition description -->
+<!ATTLIST (DT|DD)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+
+<!ELEMENT OL - - (LI)+ -- ordered list -->
+<!ATTLIST OL
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!-- Unordered Lists (UL) bullet styles -->
+<!ELEMENT UL - - (LI)+ -- unordered list -->
+<!ATTLIST UL
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+
+
+<!ELEMENT LI - O (%flow;)* -- list item -->
+<!ATTLIST LI
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--================ Forms ===============================================-->
+<!ELEMENT FORM - - (%block;|SCRIPT)+ -(FORM) -- interactive form -->
+<!ATTLIST FORM
+ %attrs; -- %coreattrs, %i18n, %events --
+ action %URI; #REQUIRED -- server-side form handler --
+ method (GET|POST) GET -- HTTP method used to submit the form--
+ enctype %ContentType; "application/x-www-form-urlencoded"
+ onsubmit %Script; #IMPLIED -- the form was submitted --
+ onreset %Script; #IMPLIED -- the form was reset --
+ accept-charset %Charsets; #IMPLIED -- list of supported charsets --
+ >
+
+<!-- Each label must not contain more than ONE field -->
+<!ELEMENT LABEL - - (%inline;)* -(LABEL) -- form field label text -->
+<!ATTLIST LABEL
+ %attrs; -- %coreattrs, %i18n, %events --
+ for IDREF #IMPLIED -- matches field ID value --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!ENTITY % InputType
+ "(TEXT | PASSWORD | CHECKBOX |
+ RADIO | SUBMIT | RESET |
+ FILE | HIDDEN | IMAGE | BUTTON)"
+ >
+
+<!-- attribute name required for all but submit & reset -->
+<!ELEMENT INPUT - O EMPTY -- form control -->
+<!ATTLIST INPUT
+ %attrs; -- %coreattrs, %i18n, %events --
+ type %InputType; TEXT -- what kind of widget is needed --
+ name CDATA #IMPLIED -- submit as part of form --
+ value CDATA #IMPLIED -- required for radio and checkboxes --
+ checked (checked) #IMPLIED -- for radio buttons and check boxes --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ readonly (readonly) #IMPLIED -- for text and passwd --
+ size CDATA #IMPLIED -- specific to each type of field --
+ maxlength NUMBER #IMPLIED -- max chars for text fields --
+ src %URI; #IMPLIED -- for fields with images --
+ alt CDATA #IMPLIED -- short description --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onselect %Script; #IMPLIED -- some text was selected --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ accept %ContentTypes; #IMPLIED -- list of MIME types for file upload --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT SELECT - - (OPTGROUP|OPTION)+ -- option selector -->
+<!ATTLIST SELECT
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED -- field name --
+ size NUMBER #IMPLIED -- rows visible --
+ multiple (multiple) #IMPLIED -- default is single selection --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT OPTGROUP - - (OPTION)+ -- option group -->
+<!ATTLIST OPTGROUP
+ %attrs; -- %coreattrs, %i18n, %events --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ label %Text; #REQUIRED -- for use in hierarchical menus --
+ >
+
+<!ELEMENT OPTION - O (#PCDATA) -- selectable choice -->
+<!ATTLIST OPTION
+ %attrs; -- %coreattrs, %i18n, %events --
+ selected (selected) #IMPLIED
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ label %Text; #IMPLIED -- for use in hierarchical menus --
+ value CDATA #IMPLIED -- defaults to element content --
+ >
+
+<!ELEMENT TEXTAREA - - (#PCDATA) -- multi-line text field -->
+<!ATTLIST TEXTAREA
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED
+ rows NUMBER #REQUIRED
+ cols NUMBER #REQUIRED
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ readonly (readonly) #IMPLIED
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onselect %Script; #IMPLIED -- some text was selected --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!--
+ #PCDATA is to solve the mixed content problem,
+ per specification only whitespace is allowed there!
+ -->
+<!ELEMENT FIELDSET - - (#PCDATA,LEGEND,(%flow;)*) -- form control group -->
+<!ATTLIST FIELDSET
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT LEGEND - - (%inline;)* -- fieldset legend -->
+<!ENTITY % LAlign "(top|bottom|left|right)">
+
+<!ATTLIST LEGEND
+ %attrs; -- %coreattrs, %i18n, %events --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ >
+
+<!ELEMENT BUTTON - -
+ (%flow;)* -(A|%formctrl;|FORM|FIELDSET)
+ -- push button -->
+<!ATTLIST BUTTON
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED
+ value CDATA #IMPLIED -- sent to server when submitted --
+ type (button|submit|reset) submit -- for use as form button --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!--======================= Tables =======================================-->
+
+<!-- IETF HTML table standard, see [RFC1942] -->
+
+<!--
+ The BORDER attribute sets the thickness of the frame around the
+ table. The default units are screen pixels.
+
+ The FRAME attribute specifies which parts of the frame around
+ the table should be rendered. The values are not the same as
+ CALS to avoid a name clash with the VALIGN attribute.
+
+ The value "border" is included for backwards compatibility with
+ <TABLE BORDER> which yields frame=border and border=implied
+ For <TABLE BORDER=1> you get border=1 and frame=implied. In this
+ case, it is appropriate to treat this as frame=border for backwards
+ compatibility with deployed browsers.
+-->
+<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
+
+<!--
+ The RULES attribute defines which rules to draw between cells:
+
+ If RULES is absent then assume:
+ "none" if BORDER is absent or BORDER=0 otherwise "all"
+-->
+
+<!ENTITY % TRules "(none | groups | rows | cols | all)">
+
+<!-- horizontal placement of table relative to document -->
+<!ENTITY % TAlign "(left|center|right)">
+
+<!-- horizontal alignment attributes for cell contents -->
+<!ENTITY % cellhalign
+ "align (left|center|right|justify|char) #IMPLIED
+ char %Character; #IMPLIED -- alignment char, e.g. char=':' --
+ charoff %Length; #IMPLIED -- offset for alignment char --"
+ >
+
+<!-- vertical alignment attributes for cell contents -->
+<!ENTITY % cellvalign
+ "valign (top|middle|bottom|baseline) #IMPLIED"
+ >
+
+<!ELEMENT TABLE - -
+ (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)>
+<!ELEMENT CAPTION - - (%inline;)* -- table caption -->
+<!ELEMENT THEAD - O (TR)+ -- table header -->
+<!ELEMENT TFOOT - O (TR)+ -- table footer -->
+<!ELEMENT TBODY O O (TR)+ -- table body -->
+<!ELEMENT COLGROUP - O (col)* -- table column group -->
+<!ELEMENT COL - O EMPTY -- table column -->
+<!ELEMENT TR - O (TH|TD)+ -- table row -->
+<!ELEMENT (TH|TD) - O (%flow;)* -- table header cell, table data cell-->
+
+<!ATTLIST TABLE -- table element --
+ %attrs; -- %coreattrs, %i18n, %events --
+ summary %Text; #IMPLIED -- purpose/structure for speech output--
+ width %Length; #IMPLIED -- table width --
+ border %Pixels; #IMPLIED -- controls frame width around table --
+ frame %TFrame; #IMPLIED -- which parts of frame to render --
+ rules %TRules; #IMPLIED -- rulings between rows and cols --
+ cellspacing %Length; #IMPLIED -- spacing between cells --
+ cellpadding %Length; #IMPLIED -- spacing within cells --
+ %reserved; -- reserved for possible future use --
+ datapagesize CDATA #IMPLIED -- reserved for possible future use --
+ >
+
+<!ENTITY % CAlign "(top|bottom|left|right)">
+
+<!ATTLIST CAPTION
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--
+COLGROUP groups a set of COL elements. It allows you to group
+several semantically related columns together.
+-->
+<!ATTLIST COLGROUP
+ %attrs; -- %coreattrs, %i18n, %events --
+ span NUMBER 1 -- default number of columns in group --
+ width %MultiLength; #IMPLIED -- default width for enclosed COLs --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!--
+ COL elements define the alignment properties for cells in
+ one or more columns.
+
+ The WIDTH attribute specifies the width of the columns, e.g.
+
+ width=64 width in screen pixels
+ width=0.5* relative width of 0.5
+
+ The SPAN attribute causes the attributes of one
+ COL element to apply to more than one column.
+-->
+<!ATTLIST COL -- column groups and properties --
+ %attrs; -- %coreattrs, %i18n, %events --
+ span NUMBER 1 -- COL attributes affect N columns --
+ width %MultiLength; #IMPLIED -- column width specification --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!--
+ Use THEAD to duplicate headers when breaking table
+ across page boundaries, or for static headers when
+ TBODY sections are rendered in scrolling panel.
+
+ Use TFOOT to duplicate footers when breaking table
+ across page boundaries, or for static footers when
+ TBODY sections are rendered in scrolling panel.
+
+ Use multiple TBODY sections when rules are needed
+ between groups of table rows.
+-->
+<!ATTLIST (THEAD|TBODY|TFOOT) -- table section --
+ %attrs; -- %coreattrs, %i18n, %events --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!ATTLIST TR -- table row --
+ %attrs; -- %coreattrs, %i18n, %events --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+
+<!-- Scope is simpler than axes attribute for common tables -->
+<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
+
+<!-- TH is for headers, TD for data, but for cells acting as both use TD -->
+<!ATTLIST (TH|TD) -- header or data cell --
+ %attrs; -- %coreattrs, %i18n, %events --
+ abbr %Text; #IMPLIED -- abbreviation for header cell --
+ axis CDATA #IMPLIED -- names groups of related headers--
+ headers IDREFS #IMPLIED -- list of id's for header cells --
+ scope %Scope; #IMPLIED -- scope covered by header cells --
+ rowspan NUMBER 1 -- number of rows spanned by cell --
+ colspan NUMBER 1 -- number of cols spanned by cell --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+
+<!--================ Document Head =======================================-->
+<!-- %head.misc; defined earlier on as "SCRIPT|STYLE|META|LINK|OBJECT" -->
+<!ENTITY % head.content "TITLE & BASE?">
+
+<!ELEMENT HEAD O O (%head.content;) +(%head.misc;) -- document head -->
+<!ATTLIST HEAD
+ %i18n; -- lang, dir --
+ profile %URI; #IMPLIED -- named dictionary of meta info --
+ >
+
+<!-- The TITLE element is not considered part of the flow of text.
+ It should be displayed, for example as the page header or
+ window title. Exactly one title is required per document.
+ -->
+<!ELEMENT TITLE - - (#PCDATA) -(%head.misc;) -- document title -->
+<!ATTLIST TITLE %i18n>
+
+
+<!ELEMENT BASE - O EMPTY -- document base URI -->
+<!ATTLIST BASE
+ href %URI; #REQUIRED -- URI that acts as base URI --
+ >
+
+<!ELEMENT META - O EMPTY -- generic metainformation -->
+<!ATTLIST META
+ %i18n; -- lang, dir, for use with content --
+ http-equiv NAME #IMPLIED -- HTTP response header name --
+ name NAME #IMPLIED -- metainformation name --
+ content CDATA #REQUIRED -- associated information --
+ scheme CDATA #IMPLIED -- select form of content --
+ >
+
+<!ELEMENT STYLE - - %StyleSheet -- style info -->
+<!ATTLIST STYLE
+ %i18n; -- lang, dir, for use with title --
+ type %ContentType; #REQUIRED -- content type of style language --
+ media %MediaDesc; #IMPLIED -- designed for use with these media --
+ title %Text; #IMPLIED -- advisory title --
+ >
+
+<!ELEMENT SCRIPT - - %Script; -- script statements -->
+<!ATTLIST SCRIPT
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ type %ContentType; #REQUIRED -- content type of script language --
+ language CDATA #IMPLIED -- predefined script language name --
+ src %URI; #IMPLIED -- URI for an external script --
+ defer (defer) #IMPLIED -- UA may defer execution of script --
+ event CDATA #IMPLIED -- reserved for possible future use --
+ for %URI; #IMPLIED -- reserved for possible future use --
+ >
+
+<!ELEMENT NOSCRIPT - - (%block;)+
+ -- alternate content container for non script-based rendering -->
+<!ATTLIST NOSCRIPT
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--================ Document Structure ==================================-->
+<!ENTITY % html.content "HEAD, BODY">
+
+<!ELEMENT HTML O O (%html.content;) -- document root element -->
+<!ATTLIST HTML
+ %i18n; -- lang, dir --
+ >
diff --git a/tests/dtds/HTML4.dcl b/tests/dtds/HTML4.dcl
new file mode 100644
index 00000000..db46db0f
--- /dev/null
+++ b/tests/dtds/HTML4.dcl
@@ -0,0 +1,88 @@
+<!SGML "ISO 8879:1986 (WWW)"
+ --
+ SGML Declaration for HyperText Markup Language version 4.0
+
+ With support for the first 17 planes of ISO 10646 and
+ increased limits for tag and literal lengths etc.
+
+ Modified by jjc to work around SP's 16-bit character limit.
+ Modified by jjc to support hex character references.
+ --
+
+ CHARSET
+ BASESET "ISO Registration Number 177//CHARSET
+ ISO/IEC 10646-1:1993 UCS-4 with
+ implementation level 3//ESC 2/5 2/15 4/6"
+ DESCSET 0 9 UNUSED
+ 9 2 9
+ 11 2 UNUSED
+ 13 1 13
+ 14 18 UNUSED
+ 32 95 32
+ 127 1 UNUSED
+ 128 32 UNUSED
+ -- jjc: changed the rest of the DESCSET.
+ Note that surrogates are not declared UNUSED;
+ this allows non-BMP characters to be parsed. --
+ 160 65376 160
+ -- 160 55136 160
+ 55296 2048 UNUSED
+ 57344 1056768 57344 --
+
+CAPACITY SGMLREF
+ TOTALCAP 150000
+ GRPCAP 150000
+ ENTCAP 150000
+
+SCOPE DOCUMENT
+SYNTAX
+ SHUNCHAR CONTROLS 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+ 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 127
+ BASESET "ISO 646IRV:1991//CHARSET
+ International Reference Version
+ (IRV)//ESC 2/8 4/2"
+ DESCSET 0 128 0
+
+ FUNCTION
+ RE 13
+ RS 10
+ SPACE 32
+ TAB SEPCHAR 9
+
+ NAMING LCNMSTRT ""
+ UCNMSTRT ""
+ LCNMCHAR ".-_:"
+ UCNMCHAR ".-_:"
+ NAMECASE GENERAL YES
+ ENTITY NO
+ DELIM GENERAL SGMLREF
+ HCRO "&#38;#X" -- added by jjc --
+ SHORTREF SGMLREF
+ NAMES SGMLREF
+ QUANTITY SGMLREF
+ ATTCNT 60 -- increased --
+ ATTSPLEN 65536 -- These are the largest values --
+ LITLEN 65536 -- permitted in the declaration --
+ NAMELEN 65536 -- Avoid fixed limits in actual --
+ PILEN 65536 -- implementations of HTML UA's --
+ TAGLVL 100
+ TAGLEN 65536
+ GRPGTCNT 150
+ GRPCNT 64
+
+FEATURES
+ MINIMIZE
+ DATATAG NO
+ OMITTAG YES
+ RANK NO
+ SHORTTAG YES
+ LINK
+ SIMPLE NO
+ IMPLICIT NO
+ EXPLICIT NO
+ OTHER
+ CONCUR NO
+ SUBDOC NO
+ FORMAL YES
+ APPINFO NONE
+> \ No newline at end of file
diff --git a/tests/dtds/HTML4.dtd b/tests/dtds/HTML4.dtd
new file mode 100644
index 00000000..9e781db1
--- /dev/null
+++ b/tests/dtds/HTML4.dtd
@@ -0,0 +1,1092 @@
+<!--
+ This is the HTML 4.0 Transitional DTD, which includes
+ presentation attributes and elements that W3C expects to phase out
+ as support for style sheets matures. Authors should use the Strict
+ DTD when possible, but may use the Transitional DTD when support
+ for presentation attribute and elements is required.
+
+ HTML 4.0 includes mechanisms for style sheets, scripting,
+ embedding objects, improved support for right to left and mixed
+ direction text, and enhancements to forms for improved
+ accessibility for people with disabilities.
+
+ Draft: $Date: 1999/05/02 15:37:15 $
+
+ Authors:
+ Dave Raggett <dsr@w3.org>
+ Arnaud Le Hors <lehors@w3.org>
+ Ian Jacobs <ij@w3.org>
+
+ Further information about HTML 4.0 is available at:
+
+ http://www.w3.org/TR/REC-html40
+-->
+<!ENTITY % HTML.Version "-//W3C//DTD HTML 4.0 Transitional//EN"
+ -- Typical usage:
+
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+ "http://www.w3.org/TR/REC-html40/loose.dtd">
+ <html>
+ <head>
+ ...
+ </head>
+ <body>
+ ...
+ </body>
+ </html>
+
+ The URI used as a system identifier with the public identifier allows
+ the user agent to download the DTD and entity sets as needed.
+
+ The FPI for the Strict HTML 4.0 DTD is:
+
+ "-//W3C//DTD HTML 4.0//EN"
+
+ and its URI is:
+
+ http://www.w3.org/TR/REC-html40/strict.dtd
+
+ Authors should use the Strict DTD unless they need the
+ presentation control for user agents that don't (adequately)
+ support style sheets.
+
+ If you are writing a document that includes frames, use
+ the following FPI:
+
+ "-//W3C//DTD HTML 4.0 Frameset//EN"
+
+ with the URI:
+
+ http://www.w3.org/TR/REC-html40/frameset.dtd
+
+ The following URIs are supported in relation to HTML 4.0
+
+ "http://www.w3.org/TR/REC-html40/strict.dtd" (Strict DTD)
+ "http://www.w3.org/TR/REC-html40/loose.dtd" (Loose DTD)
+ "http://www.w3.org/TR/REC-html40/frameset.dtd" (Frameset DTD)
+ "http://www.w3.org/TR/REC-html40/HTMLlat1.ent" (Latin-1 entities)
+ "http://www.w3.org/TR/REC-html40/HTMLsymbol.ent" (Symbol entities)
+ "http://www.w3.org/TR/REC-html40/HTMLspecial.ent" (Special entities)
+
+ These URIs point to the latest version of each file. To reference
+ this specific revision use the following URIs:
+
+ "http://www.w3.org/TR/REC-html40-971218/strict.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/loose.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/frameset.dtd"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent"
+
+-->
+
+<!--================== Imported Names ====================================-->
+
+<!ENTITY % ContentType "CDATA"
+ -- media type, as per [RFC2045]
+ -->
+
+<!ENTITY % ContentTypes "CDATA"
+ -- comma-separated list of media types, as per [RFC2045]
+ -->
+
+<!ENTITY % Charset "CDATA"
+ -- a character encoding, as per [RFC2045]
+ -->
+
+<!ENTITY % Charsets "CDATA"
+ -- a space separated list of character encodings, as per [RFC2045]
+ -->
+
+<!ENTITY % LanguageCode "NAME"
+ -- a language code, as per [RFC1766]
+ -->
+
+<!ENTITY % Character "CDATA"
+ -- a single character from [ISO10646]
+ -->
+
+<!ENTITY % LinkTypes "CDATA"
+ -- space-separated list of link types
+ -->
+
+<!ENTITY % MediaDesc "CDATA"
+ -- single or comma-separated list of media descriptors
+ -->
+
+<!ENTITY % URI "CDATA"
+ -- a Uniform Resource Identifier,
+ see [URI]
+ -->
+
+<!ENTITY % Datetime "CDATA" -- date and time information. ISO date format -->
+
+
+<!ENTITY % Script "CDATA" -- script expression -->
+
+<!ENTITY % StyleSheet "CDATA" -- style sheet data -->
+
+<!ENTITY % FrameTarget "CDATA" -- render in this frame -->
+
+
+<!ENTITY % Text "CDATA">
+
+
+<!-- Parameter Entities -->
+
+<!ENTITY % head.misc "SCRIPT|STYLE|META|LINK|OBJECT" -- repeatable head elements -->
+
+<!ENTITY % heading "H1|H2|H3|H4|H5|H6">
+
+<!ENTITY % list "UL | OL | DIR | MENU">
+
+<!ENTITY % preformatted "PRE">
+
+<!ENTITY % Color "CDATA" -- a color using sRGB: #RRGGBB as Hex values -->
+
+<!-- There are also 16 widely known color names with their sRGB values:
+
+ Black = #000000 Green = #008000
+ Silver = #C0C0C0 Lime = #00FF00
+ Gray = #808080 Olive = #808000
+ White = #FFFFFF Yellow = #FFFF00
+ Maroon = #800000 Navy = #000080
+ Red = #FF0000 Blue = #0000FF
+ Purple = #800080 Teal = #008080
+ Fuchsia= #FF00FF Aqua = #00FFFF
+ -->
+
+<!ENTITY % bodycolors "
+ bgcolor %Color; #IMPLIED -- document background color --
+ text %Color; #IMPLIED -- document text color --
+ link %Color; #IMPLIED -- color of links --
+ vlink %Color; #IMPLIED -- color of visited links --
+ alink %Color; #IMPLIED -- color of selected links --
+ ">
+
+<!--================ Character mnemonic entities =========================-->
+
+<!ENTITY % HTMLlat1 PUBLIC
+ "-//W3C//ENTITIES Latin1//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent">
+%HTMLlat1;
+
+<!ENTITY % HTMLsymbol PUBLIC
+ "-//W3C//ENTITIES Symbols//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent">
+%HTMLsymbol;
+
+<!ENTITY % HTMLspecial PUBLIC
+ "-//W3C//ENTITIES Special//EN//HTML"
+ "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent">
+%HTMLspecial;
+<!--=================== Generic Attributes ===============================-->
+
+<!ENTITY % coreattrs
+ "id ID #IMPLIED -- document-wide unique id --
+ class CDATA #IMPLIED -- space separated list of classes --
+ style %StyleSheet; #IMPLIED -- associated style info --
+ title %Text; #IMPLIED -- advisory title/amplification --"
+ >
+
+<!ENTITY % i18n
+ "lang %LanguageCode; #IMPLIED -- language code --
+ dir (ltr|rtl) #IMPLIED -- direction for weak/neutral text --"
+ >
+
+<!ENTITY % events
+ "onclick %Script; #IMPLIED -- a pointer button was clicked --
+ ondblclick %Script; #IMPLIED -- a pointer button was double clicked--
+ onmousedown %Script; #IMPLIED -- a pointer button was pressed down --
+ onmouseup %Script; #IMPLIED -- a pointer button was released --
+ onmouseover %Script; #IMPLIED -- a pointer was moved onto --
+ onmousemove %Script; #IMPLIED -- a pointer was moved within --
+ onmouseout %Script; #IMPLIED -- a pointer was moved away --
+ onkeypress %Script; #IMPLIED -- a key was pressed and released --
+ onkeydown %Script; #IMPLIED -- a key was pressed down --
+ onkeyup %Script; #IMPLIED -- a key was released --"
+ >
+
+<!-- Reserved Feature Switch -->
+<!ENTITY % HTML.Reserved "IGNORE">
+
+<!-- The following attributes are reserved for possible future use -->
+<![ %HTML.Reserved; [
+<!ENTITY % reserved
+ "datasrc %URI; #IMPLIED -- a single or tabular Data Source --
+ datafld CDATA #IMPLIED -- the property or column name --
+ dataformatas (plaintext|html) plaintext -- text or html --"
+ >
+]]>
+
+<!ENTITY % reserved "">
+
+<!ENTITY % attrs "%coreattrs; %i18n; %events;">
+
+<!ENTITY % align "align (left|center|right|justify) #IMPLIED"
+ -- default is left for ltr paragraphs, right for rtl --
+ >
+
+<!--=================== Text Markup ======================================-->
+
+<!ENTITY % fontstyle
+ "TT | I | B | U | S | STRIKE | BIG | SMALL">
+
+<!ENTITY % phrase "EM | STRONG | DFN | CODE |
+ SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
+
+<!ENTITY % special
+ "A | IMG | APPLET | OBJECT | FONT | BASEFONT | BR | SCRIPT |
+ MAP | Q | SUB | SUP | SPAN | BDO | IFRAME">
+
+<!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
+
+<!-- %inline; covers inline or "text-level" elements -->
+<!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
+
+<!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*>
+<!ATTLIST (%fontstyle;|%phrase;)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT (SUB|SUP) - - (%inline;)* -- subscript, superscript -->
+<!ATTLIST (SUB|SUP)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT SPAN - - (%inline;)* -- generic language/style container -->
+<!ATTLIST SPAN
+ %attrs; -- %coreattrs, %i18n, %events --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT BDO - - (%inline;)* -- I18N BiDi over-ride -->
+<!ATTLIST BDO
+ %coreattrs; -- id, class, style, title --
+ lang %LanguageCode; #IMPLIED -- language code --
+ dir (ltr|rtl) #REQUIRED -- directionality --
+ >
+
+<!ELEMENT BASEFONT - O EMPTY -- base font size -->
+<!ATTLIST BASEFONT
+ id ID #IMPLIED -- document-wide unique id --
+ size CDATA #REQUIRED -- base font size for FONT elements --
+ color %Color; #IMPLIED -- text color --
+ face CDATA #IMPLIED -- comma separated list of font names --
+ >
+
+<!ELEMENT FONT - - (%inline;)* -- local change to font -->
+<!ATTLIST FONT
+ %coreattrs; -- id, class, style, title --
+ %i18n; -- lang, dir --
+ size CDATA #IMPLIED -- [+|-]nn e.g. size="+1", size="4" --
+ color %Color; #IMPLIED -- text color --
+ face CDATA #IMPLIED -- comma separated list of font names --
+ >
+
+<!ELEMENT BR - O EMPTY -- forced line break -->
+<!ATTLIST BR
+ %coreattrs; -- id, class, style, title --
+ clear (left|all|right|none) none -- control of text flow --
+ >
+
+<!--================== HTML content models ===============================-->
+
+<!--
+ HTML has two basic content models:
+
+ %inline; character level elements and text strings
+ %block; block-like elements e.g. paragraphs and lists
+-->
+
+<!ENTITY % block
+ "P | %heading; | %list; | %preformatted; | DL | DIV | CENTER |
+ NOSCRIPT | NOFRAMES | BLOCKQUOTE | FORM | ISINDEX | HR |
+ TABLE | FIELDSET | ADDRESS">
+
+<!ENTITY % flow "%block; | %inline;">
+
+<!--=================== Document Body ====================================-->
+
+<!ELEMENT BODY O O (%flow;)* +(INS|DEL) -- document body -->
+<!ATTLIST BODY
+ %attrs; -- %coreattrs, %i18n, %events --
+ onload %Script; #IMPLIED -- the document has been loaded --
+ onunload %Script; #IMPLIED -- the document has been removed --
+ background %URI; #IMPLIED -- texture tile for document
+ background --
+ %bodycolors; -- bgcolor, text, link, vlink, alink --
+ >
+
+<!ELEMENT ADDRESS - - ((%inline;)|P)* -- information on author -->
+<!ATTLIST ADDRESS
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT DIV - - (%flow;)* -- generic language/style container -->
+<!ATTLIST DIV
+ %attrs; -- %coreattrs, %i18n, %events --
+ %align; -- align, text alignment --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT CENTER - - (%flow;)* -- shorthand for DIV align=center -->
+<!ATTLIST CENTER
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--================== The Anchor Element ================================-->
+
+<!ENTITY % Shape "(rect|circle|poly|default)">
+<!ENTITY % Coords "CDATA" -- comma separated list of lengths -->
+
+<!ELEMENT A - - (%inline;)* -(A) -- anchor -->
+<!ATTLIST A
+ %attrs; -- %coreattrs, %i18n, %events --
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ type %ContentType; #IMPLIED -- advisory content type --
+ name CDATA #IMPLIED -- named link end --
+ href %URI; #IMPLIED -- URI for linked resource --
+ hreflang %LanguageCode; #IMPLIED -- language code --
+ target %FrameTarget; #IMPLIED -- render in this frame --
+ rel %LinkTypes; #IMPLIED -- forward link types --
+ rev %LinkTypes; #IMPLIED -- reverse link types --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ shape %Shape; rect -- for use with client-side image maps --
+ coords %Coords; #IMPLIED -- for use with client-side image maps --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!--================== Client-side image maps ============================-->
+
+<!-- These can be placed in the same document or grouped in a
+ separate document although this isn't yet widely supported -->
+
+<!ELEMENT MAP - - ((%block;)+ | AREA+) -- client-side image map -->
+<!ATTLIST MAP
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #REQUIRED -- for reference by usemap --
+ >
+
+<!ELEMENT AREA - O EMPTY -- client-side image map area -->
+<!ATTLIST AREA
+ %attrs; -- %coreattrs, %i18n, %events --
+ shape %Shape; rect -- controls interpretation of coords --
+ coords %Coords; #IMPLIED -- comma separated list of lengths --
+ href %URI; #IMPLIED -- URI for linked resource --
+ target %FrameTarget; #IMPLIED -- render in this frame --
+ nohref (nohref) #IMPLIED -- this region has no action --
+ alt %Text; #REQUIRED -- short description --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!--================== The LINK Element ==================================-->
+
+<!--
+ Relationship values can be used in principle:
+
+ a) for document specific toolbars/menus when used
+ with the LINK element in document head e.g.
+ start, contents, previous, next, index, end, help
+ b) to link to a separate style sheet (rel=stylesheet)
+ c) to make a link to a script (rel=script)
+ d) by stylesheets to control how collections of
+ html nodes are rendered into printed documents
+ e) to make a link to a printable version of this document
+ e.g. a postscript or pdf version (rel=alternate media=print)
+-->
+
+<!ELEMENT LINK - O EMPTY -- a media-independent link -->
+<!ATTLIST LINK
+ %attrs; -- %coreattrs, %i18n, %events --
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ href %URI; #IMPLIED -- URI for linked resource --
+ hreflang %LanguageCode; #IMPLIED -- language code --
+ type %ContentType; #IMPLIED -- advisory content type --
+ rel %LinkTypes; #IMPLIED -- forward link types --
+ rev %LinkTypes; #IMPLIED -- reverse link types --
+ media %MediaDesc; #IMPLIED -- for rendering on these media --
+ target %FrameTarget; #IMPLIED -- render in this frame --
+ >
+
+<!--=================== Images ===========================================-->
+
+<!-- Length defined in strict DTD for cellpadding/cellspacing -->
+<!ENTITY % Length "CDATA" -- nn for pixels or nn% for percentage length -->
+<!ENTITY % MultiLength "CDATA" -- pixel, percentage, or relative -->
+
+<!ENTITY % MultiLengths "CDATA" -- comma-separated list of MultiLength -->
+
+<!ENTITY % Pixels "CDATA" -- integer representing length in pixels -->
+
+<!ENTITY % IAlign "(top|middle|bottom|left|right)" -- center? -->
+
+<!-- To avoid problems with text-only UAs as well as
+ to make image content understandable and navigable
+ to users of non-visual UAs, you need to provide
+ a description with ALT, and avoid server-side image maps -->
+<!ELEMENT IMG - O EMPTY -- Embedded image -->
+<!ATTLIST IMG
+ %attrs; -- %coreattrs, %i18n, %events --
+ src %URI; #REQUIRED -- URI of image to embed --
+ alt %Text; #REQUIRED -- short description --
+ longdesc %URI; #IMPLIED -- link to long description
+ (complements alt) --
+ height %Length; #IMPLIED -- override height --
+ width %Length; #IMPLIED -- override width --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ ismap (ismap) #IMPLIED -- use server-side image map --
+ align %IAlign; #IMPLIED -- vertical or horizontal alignment --
+ border %Length; #IMPLIED -- link border width --
+ hspace %Pixels; #IMPLIED -- horizontal gutter --
+ vspace %Pixels; #IMPLIED -- vertical gutter --
+ >
+
+<!-- USEMAP points to a MAP element which may be in this document
+ or an external document, although the latter is not widely supported -->
+
+<!--==================== OBJECT ======================================-->
+<!--
+ OBJECT is used to embed objects as part of HTML pages
+ PARAM elements should precede other content. SGML mixed content
+ model technicality precludes specifying this formally ...
+-->
+
+<!ELEMENT OBJECT - - (PARAM | %flow;)*
+ -- generic embedded object -->
+<!ATTLIST OBJECT
+ %attrs; -- %coreattrs, %i18n, %events --
+ declare (declare) #IMPLIED -- declare but don't instantiate flag --
+ classid %URI; #IMPLIED -- identifies an implementation --
+ codebase %URI; #IMPLIED -- base URI for classid, data, archive--
+ data %URI; #IMPLIED -- reference to object's data --
+ type %ContentType; #IMPLIED -- content type for data --
+ codetype %ContentType; #IMPLIED -- content type for code --
+ archive %URI; #IMPLIED -- space separated archive list --
+ standby %Text; #IMPLIED -- message to show while loading --
+ height %Length; #IMPLIED -- override height --
+ width %Length; #IMPLIED -- override width --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ name CDATA #IMPLIED -- submit as part of form --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ align %IAlign; #IMPLIED -- vertical or horizontal alignment --
+ border %Length; #IMPLIED -- link border width --
+ hspace %Pixels; #IMPLIED -- horizontal gutter --
+ vspace %Pixels; #IMPLIED -- vertical gutter --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT PARAM - O EMPTY -- named property value -->
+<!ATTLIST PARAM
+ id ID #IMPLIED -- document-wide unique id --
+ name CDATA #REQUIRED -- property name --
+ value CDATA #IMPLIED -- property value --
+ valuetype (DATA|REF|OBJECT) DATA -- How to interpret value --
+ type %ContentType; #IMPLIED -- content type for value
+ when valuetype=ref --
+ >
+
+<!--=================== Java APPLET ==================================-->
+<!--
+ One of code or object attributes must be present.
+ Place PARAM elements before other content.
+-->
+<!ELEMENT APPLET - - (PARAM | %flow;)* -- Java applet -->
+<!ATTLIST APPLET
+ %coreattrs; -- id, class, style, title --
+ codebase %URI; #IMPLIED -- optional base URI for applet --
+ archive CDATA #IMPLIED -- comma separated archive list --
+ code CDATA #IMPLIED -- applet class file --
+ object CDATA #IMPLIED -- serialized applet file --
+ alt %Text; #IMPLIED -- short description --
+ name CDATA #IMPLIED -- allows applets to find each other --
+ width %Length; #REQUIRED -- initial width --
+ height %Length; #REQUIRED -- initial height --
+ align %IAlign; #IMPLIED -- vertical or horizontal alignment --
+ hspace %Pixels; #IMPLIED -- horizontal gutter --
+ vspace %Pixels; #IMPLIED -- vertical gutter --
+ >
+
+<!--=================== Horizontal Rule ==================================-->
+
+<!ELEMENT HR - O EMPTY -- horizontal rule -->
+<!ATTLIST HR
+ %coreattrs; -- id, class, style, title --
+ %events;
+ align (left|center|right) #IMPLIED
+ noshade (noshade) #IMPLIED
+ size %Pixels; #IMPLIED
+ width %Length; #IMPLIED
+ >
+
+<!--=================== Paragraphs =======================================-->
+
+<!ELEMENT P - O (%inline;)* -- paragraph -->
+<!ATTLIST P
+ %attrs; -- %coreattrs, %i18n, %events --
+ %align; -- align, text alignment --
+ >
+
+<!--=================== Headings =========================================-->
+
+<!--
+ There are six levels of headings from H1 (the most important)
+ to H6 (the least important).
+-->
+
+<!ELEMENT (%heading;) - - (%inline;)* -- heading -->
+<!ATTLIST (%heading;)
+ %attrs; -- %coreattrs, %i18n, %events --
+ %align; -- align, text alignment --
+ >
+
+<!--=================== Preformatted Text ================================-->
+
+<!-- excludes markup for images and changes in font size -->
+<!ENTITY % pre.exclusion "IMG|OBJECT|APPLET|BIG|SMALL|SUB|SUP|FONT|BASEFONT">
+
+<!ELEMENT PRE - - (%inline;)* -(%pre.exclusion;) -- preformatted text -->
+<!ATTLIST PRE
+ %attrs; -- %coreattrs, %i18n, %events --
+ width NUMBER #IMPLIED
+ >
+
+<!--===================== Inline Quotes ==================================-->
+
+<!ELEMENT Q - - (%inline;)* -- short inline quotation -->
+<!ATTLIST Q
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- URI for source document or msg --
+ >
+
+<!--=================== Block-like Quotes ================================-->
+
+<!ELEMENT BLOCKQUOTE - - (%flow;)* -- long quotation -->
+<!ATTLIST BLOCKQUOTE
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- URI for source document or msg --
+ >
+
+<!--=================== Inserted/Deleted Text ============================-->
+
+
+<!-- INS/DEL are handled by inclusion on BODY -->
+<!ELEMENT (INS|DEL) - - (%flow;)* -- inserted text, deleted text -->
+<!ATTLIST (INS|DEL)
+ %attrs; -- %coreattrs, %i18n, %events --
+ cite %URI; #IMPLIED -- info on reason for change --
+ datetime %Datetime; #IMPLIED -- date and time of change --
+ >
+
+<!--=================== Lists ============================================-->
+
+<!-- definition lists - DT for term, DD for its definition -->
+
+<!ELEMENT DL - - (DT|DD)+ -- definition list -->
+<!ATTLIST DL
+ %attrs; -- %coreattrs, %i18n, %events --
+ compact (compact) #IMPLIED -- reduced interitem spacing --
+ >
+
+<!ELEMENT DT - O (%inline;)* -- definition term -->
+<!ELEMENT DD - O (%flow;)* -- definition description -->
+<!ATTLIST (DT|DD)
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!-- Ordered lists (OL) Numbering style
+
+ 1 arablic numbers 1, 2, 3, ...
+ a lower alpha a, b, c, ...
+ A upper alpha A, B, C, ...
+ i lower roman i, ii, iii, ...
+ I upper roman I, II, III, ...
+
+ The style is applied to the sequence number which by default
+ is reset to 1 for the first list item in an ordered list.
+
+ This can't be expressed directly in SGML due to case folding.
+-->
+
+<!ENTITY % OLStyle "CDATA" -- constrained to: "(1|a|A|i|I)" -->
+
+<!ELEMENT OL - - (LI)+ -- ordered list -->
+<!ATTLIST OL
+ %attrs; -- %coreattrs, %i18n, %events --
+ type %OLStyle; #IMPLIED -- numbering style --
+ compact (compact) #IMPLIED -- reduced interitem spacing --
+ start NUMBER #IMPLIED -- starting sequence number --
+ >
+
+<!-- Unordered Lists (UL) bullet styles -->
+<!ENTITY % ULStyle "(disc|square|circle)">
+
+<!ELEMENT UL - - (LI)+ -- unordered list -->
+<!ATTLIST UL
+ %attrs; -- %coreattrs, %i18n, %events --
+ type %ULStyle; #IMPLIED -- bullet style --
+ compact (compact) #IMPLIED -- reduced interitem spacing --
+ >
+
+<!ELEMENT (DIR|MENU) - - (LI)+ -(%block;) -- directory list, menu list -->
+<!ATTLIST DIR
+ %attrs; -- %coreattrs, %i18n, %events --
+ compact (compact) #IMPLIED
+ >
+<!ATTLIST MENU
+ %attrs; -- %coreattrs, %i18n, %events --
+ compact (compact) #IMPLIED
+ >
+
+<!ENTITY % LIStyle "CDATA" -- constrained to: "(%ULStyle;|%OLStyle;)" -->
+
+<!ELEMENT LI - O (%flow;)* -- list item -->
+<!ATTLIST LI
+ %attrs; -- %coreattrs, %i18n, %events --
+ type %LIStyle; #IMPLIED -- list item style --
+ value NUMBER #IMPLIED -- reset sequence number --
+ >
+
+<!--================ Forms ===============================================-->
+<!ELEMENT FORM - - (%flow;)* -(FORM) -- interactive form -->
+<!ATTLIST FORM
+ %attrs; -- %coreattrs, %i18n, %events --
+ action %URI; #REQUIRED -- server-side form handler --
+ method (GET|POST) GET -- HTTP method used to submit the form--
+ enctype %ContentType; "application/x-www-form-urlencoded"
+ onsubmit %Script; #IMPLIED -- the form was submitted --
+ onreset %Script; #IMPLIED -- the form was reset --
+ target %FrameTarget; #IMPLIED -- render in this frame --
+ accept-charset %Charsets; #IMPLIED -- list of supported charsets --
+ >
+
+<!-- Each label must not contain more than ONE field -->
+<!ELEMENT LABEL - - (%inline;)* -(LABEL) -- form field label text -->
+<!ATTLIST LABEL
+ %attrs; -- %coreattrs, %i18n, %events --
+ for IDREF #IMPLIED -- matches field ID value --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ >
+
+<!ENTITY % InputType
+ "(TEXT | PASSWORD | CHECKBOX |
+ RADIO | SUBMIT | RESET |
+ FILE | HIDDEN | IMAGE | BUTTON)"
+ >
+
+<!-- attribute name required for all but submit & reset -->
+<!ELEMENT INPUT - O EMPTY -- form control -->
+<!ATTLIST INPUT
+ %attrs; -- %coreattrs, %i18n, %events --
+ type %InputType; TEXT -- what kind of widget is needed --
+ name CDATA #IMPLIED -- submit as part of form --
+ value CDATA #IMPLIED -- required for radio and checkboxes --
+ checked (checked) #IMPLIED -- for radio buttons and check boxes --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ readonly (readonly) #IMPLIED -- for text and passwd --
+ size CDATA #IMPLIED -- specific to each type of field --
+ maxlength NUMBER #IMPLIED -- max chars for text fields --
+ src %URI; #IMPLIED -- for fields with images --
+ alt CDATA #IMPLIED -- short description --
+ usemap %URI; #IMPLIED -- use client-side image map --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onselect %Script; #IMPLIED -- some text was selected --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ accept %ContentTypes; #IMPLIED -- list of MIME types for file upload --
+ align %IAlign; #IMPLIED -- vertical or horizontal alignment --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT SELECT - - (OPTGROUP|OPTION)+ -- option selector -->
+<!ATTLIST SELECT
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED -- field name --
+ size NUMBER #IMPLIED -- rows visible --
+ multiple (multiple) #IMPLIED -- default is single selection --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!ELEMENT OPTGROUP - - (OPTION)+ -- option group -->
+<!ATTLIST OPTGROUP
+ %attrs; -- %coreattrs, %i18n, %events --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ label %Text; #REQUIRED -- for use in hierarchical menus --
+ >
+
+<!ELEMENT OPTION - O (#PCDATA) -- selectable choice -->
+<!ATTLIST OPTION
+ %attrs; -- %coreattrs, %i18n, %events --
+ selected (selected) #IMPLIED
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ label %Text; #IMPLIED -- for use in hierarchical menus --
+ value CDATA #IMPLIED -- defaults to element content --
+ >
+
+<!ELEMENT TEXTAREA - - (#PCDATA) -- multi-line text field -->
+<!ATTLIST TEXTAREA
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED
+ rows NUMBER #REQUIRED
+ cols NUMBER #REQUIRED
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ readonly (readonly) #IMPLIED
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ onselect %Script; #IMPLIED -- some text was selected --
+ onchange %Script; #IMPLIED -- the element value was changed --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!--
+ #PCDATA is to solve the mixed content problem,
+ per specification only whitespace is allowed there!
+ -->
+<!ELEMENT FIELDSET - - (#PCDATA,LEGEND,(%flow;)*) -- form control group -->
+<!ATTLIST FIELDSET
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!ELEMENT LEGEND - - (%inline;)* -- fieldset legend -->
+<!ENTITY % LAlign "(top|bottom|left|right)">
+
+<!ATTLIST LEGEND
+ %attrs; -- %coreattrs, %i18n, %events --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ align %LAlign; #IMPLIED -- relative to fieldset --
+ >
+
+<!ELEMENT BUTTON - -
+ (%flow;)* -(A|%formctrl;|FORM|ISINDEX|FIELDSET|IFRAME)
+ -- push button -->
+<!ATTLIST BUTTON
+ %attrs; -- %coreattrs, %i18n, %events --
+ name CDATA #IMPLIED
+ value CDATA #IMPLIED -- sent to server when submitted --
+ type (button|submit|reset) submit -- for use as form button --
+ disabled (disabled) #IMPLIED -- unavailable in this context --
+ tabindex NUMBER #IMPLIED -- position in tabbing order --
+ accesskey %Character; #IMPLIED -- accessibility key character --
+ onfocus %Script; #IMPLIED -- the element got the focus --
+ onblur %Script; #IMPLIED -- the element lost the focus --
+ %reserved; -- reserved for possible future use --
+ >
+
+<!--======================= Tables =======================================-->
+
+<!-- IETF HTML table standard, see [RFC1942] -->
+
+<!--
+ The BORDER attribute sets the thickness of the frame around the
+ table. The default units are screen pixels.
+
+ The FRAME attribute specifies which parts of the frame around
+ the table should be rendered. The values are not the same as
+ CALS to avoid a name clash with the VALIGN attribute.
+
+ The value "border" is included for backwards compatibility with
+ <TABLE BORDER> which yields frame=border and border=implied
+ For <TABLE BORDER=1> you get border=1 and frame=implied. In this
+ case, it is appropriate to treat this as frame=border for backwards
+ compatibility with deployed browsers.
+-->
+<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
+
+<!--
+ The RULES attribute defines which rules to draw between cells:
+
+ If RULES is absent then assume:
+ "none" if BORDER is absent or BORDER=0 otherwise "all"
+-->
+
+<!ENTITY % TRules "(none | groups | rows | cols | all)">
+
+<!-- horizontal placement of table relative to document -->
+<!ENTITY % TAlign "(left|center|right)">
+
+<!-- horizontal alignment attributes for cell contents -->
+<!ENTITY % cellhalign
+ "align (left|center|right|justify|char) #IMPLIED
+ char %Character; #IMPLIED -- alignment char, e.g. char=':' --
+ charoff %Length; #IMPLIED -- offset for alignment char --"
+ >
+
+<!-- vertical alignment attributes for cell contents -->
+<!ENTITY % cellvalign
+ "valign (top|middle|bottom|baseline) #IMPLIED"
+ >
+
+<!ELEMENT TABLE - -
+ (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)>
+<!ELEMENT CAPTION - - (%inline;)* -- table caption -->
+<!ELEMENT THEAD - O (TR)+ -- table header -->
+<!ELEMENT TFOOT - O (TR)+ -- table footer -->
+<!ELEMENT TBODY O O (TR)+ -- table body -->
+<!ELEMENT COLGROUP - O (col)* -- table column group -->
+<!ELEMENT COL - O EMPTY -- table column -->
+<!ELEMENT TR - O (TH|TD)+ -- table row -->
+<!ELEMENT (TH|TD) - O (%flow;)* -- table header cell, table data cell-->
+
+<!ATTLIST TABLE -- table element --
+ %attrs; -- %coreattrs, %i18n, %events --
+ summary %Text; #IMPLIED -- purpose/structure for speech output--
+ width %Length; #IMPLIED -- table width --
+ border %Pixels; #IMPLIED -- controls frame width around table --
+ frame %TFrame; #IMPLIED -- which parts of frame to render --
+ rules %TRules; #IMPLIED -- rulings between rows and cols --
+ cellspacing %Length; #IMPLIED -- spacing between cells --
+ cellpadding %Length; #IMPLIED -- spacing within cells --
+ align %TAlign; #IMPLIED -- table position relative to window --
+ bgcolor %Color; #IMPLIED -- background color for cells --
+ %reserved; -- reserved for possible future use --
+ datapagesize CDATA #IMPLIED -- reserved for possible future use --
+ >
+
+<!ENTITY % CAlign "(top|bottom|left|right)">
+
+<!ATTLIST CAPTION
+ %attrs; -- %coreattrs, %i18n, %events --
+ align %CAlign; #IMPLIED -- relative to table --
+ >
+
+<!--
+COLGROUP groups a set of COL elements. It allows you to group
+several semantically related columns together.
+-->
+<!ATTLIST COLGROUP
+ %attrs; -- %coreattrs, %i18n, %events --
+ span NUMBER 1 -- default number of columns in group --
+ width %MultiLength; #IMPLIED -- default width for enclosed COLs --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!--
+ COL elements define the alignment properties for cells in
+ one or more columns.
+
+ The WIDTH attribute specifies the width of the columns, e.g.
+
+ width=64 width in screen pixels
+ width=0.5* relative width of 0.5
+
+ The SPAN attribute causes the attributes of one
+ COL element to apply to more than one column.
+-->
+<!ATTLIST COL -- column groups and properties --
+ %attrs; -- %coreattrs, %i18n, %events --
+ span NUMBER 1 -- COL attributes affect N columns --
+ width %MultiLength; #IMPLIED -- column width specification --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!--
+ Use THEAD to duplicate headers when breaking table
+ across page boundaries, or for static headers when
+ TBODY sections are rendered in scrolling panel.
+
+ Use TFOOT to duplicate footers when breaking table
+ across page boundaries, or for static footers when
+ TBODY sections are rendered in scrolling panel.
+
+ Use multiple TBODY sections when rules are needed
+ between groups of table rows.
+-->
+<!ATTLIST (THEAD|TBODY|TFOOT) -- table section --
+ %attrs; -- %coreattrs, %i18n, %events --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ >
+
+<!ATTLIST TR -- table row --
+ %attrs; -- %coreattrs, %i18n, %events --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ bgcolor %Color; #IMPLIED -- background color for row --
+ >
+
+
+<!-- Scope is simpler than axes attribute for common tables -->
+<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
+
+<!-- TH is for headers, TD for data, but for cells acting as both use TD -->
+<!ATTLIST (TH|TD) -- header or data cell --
+ %attrs; -- %coreattrs, %i18n, %events --
+ abbr %Text; #IMPLIED -- abbreviation for header cell --
+ axis CDATA #IMPLIED -- names groups of related headers--
+ headers IDREFS #IMPLIED -- list of id's for header cells --
+ scope %Scope; #IMPLIED -- scope covered by header cells --
+ rowspan NUMBER 1 -- number of rows spanned by cell --
+ colspan NUMBER 1 -- number of cols spanned by cell --
+ %cellhalign; -- horizontal alignment in cells --
+ %cellvalign; -- vertical alignment in cells --
+ nowrap (nowrap) #IMPLIED -- suppress word wrap --
+ bgcolor %Color; #IMPLIED -- cell background color --
+ width %Pixels; #IMPLIED -- width for cell --
+ height %Pixels; #IMPLIED -- height for cell --
+ >
+
+<!--================== Document Frames ===================================-->
+
+<!--
+ The content model for HTML documents depends on whether the HEAD is
+ followed by a FRAMESET or BODY element. The widespread omission of
+ the BODY start tag makes it impractical to define the content model
+ without the use of a marked section.
+-->
+
+<!-- Feature Switch for frameset documents -->
+<!ENTITY % HTML.Frameset "IGNORE">
+
+<![ %HTML.Frameset; [
+<!ELEMENT FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?) -- window subdivision-->
+<!ATTLIST FRAMESET
+ %coreattrs; -- id, class, style, title --
+ rows %MultiLengths; #IMPLIED -- list of lengths,
+ default: 100% (1 row) --
+ cols %MultiLengths; #IMPLIED -- list of lengths,
+ default: 100% (1 col) --
+ onload %Script; #IMPLIED -- all the frames have been loaded --
+ onunload %Script; #IMPLIED -- all the frames have been removed --
+ >
+]]>
+
+<![ %HTML.Frameset; [
+<!-- reserved frame names start with "_" otherwise starts with letter -->
+<!ELEMENT FRAME - O EMPTY -- subwindow -->
+<!ATTLIST FRAME
+ %coreattrs; -- id, class, style, title --
+ longdesc %URI; #IMPLIED -- link to long description
+ (complements title) --
+ name CDATA #IMPLIED -- name of frame for targetting --
+ src %URI; #IMPLIED -- source of frame content --
+ frameborder (1|0) 1 -- request frame borders? --
+ marginwidth %Pixels; #IMPLIED -- margin widths in pixels --
+ marginheight %Pixels; #IMPLIED -- margin height in pixels --
+ noresize (noresize) #IMPLIED -- allow users to resize frames? --
+ scrolling (yes|no|auto) auto -- scrollbar or none --
+ >
+]]>
+
+<!ELEMENT IFRAME - - (%flow;)* -- inline subwindow -->
+<!ATTLIST IFRAME
+ %coreattrs; -- id, class, style, title --
+ longdesc %URI; #IMPLIED -- link to long description
+ (complements title) --
+ name CDATA #IMPLIED -- name of frame for targetting --
+ src %URI; #IMPLIED -- source of frame content --
+ frameborder (1|0) 1 -- request frame borders? --
+ marginwidth %Pixels; #IMPLIED -- margin widths in pixels --
+ marginheight %Pixels; #IMPLIED -- margin height in pixels --
+ scrolling (yes|no|auto) auto -- scrollbar or none --
+ align %IAlign; #IMPLIED -- vertical or horizontal alignment --
+ height %Length; #IMPLIED -- frame height --
+ width %Length; #IMPLIED -- frame width --
+ >
+
+<![ %HTML.Frameset; [
+<!ENTITY % noframes.content "(BODY) -(NOFRAMES)">
+]]>
+
+<!ENTITY % noframes.content "(%flow;)*">
+
+<!ELEMENT NOFRAMES - - %noframes.content;
+ -- alternate content container for non frame-based rendering -->
+<!ATTLIST NOFRAMES
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--================ Document Head =======================================-->
+<!-- %head.misc; defined earlier on as "SCRIPT|STYLE|META|LINK|OBJECT" -->
+<!ENTITY % head.content "TITLE & ISINDEX? & BASE?">
+
+<!ELEMENT HEAD O O (%head.content;) +(%head.misc;) -- document head -->
+<!ATTLIST HEAD
+ %i18n; -- lang, dir --
+ profile %URI; #IMPLIED -- named dictionary of meta info --
+ >
+
+<!-- The TITLE element is not considered part of the flow of text.
+ It should be displayed, for example as the page header or
+ window title. Exactly one title is required per document.
+ -->
+<!ELEMENT TITLE - - (#PCDATA) -(%head.misc;) -- document title -->
+<!ATTLIST TITLE %i18n>
+
+<!ELEMENT ISINDEX - O EMPTY -- single line prompt -->
+<!ATTLIST ISINDEX
+ %coreattrs; -- id, class, style, title --
+ %i18n; -- lang, dir --
+ prompt %Text; #IMPLIED -- prompt message -->
+
+<!ELEMENT BASE - O EMPTY -- document base URI -->
+<!ATTLIST BASE
+ href %URI; #IMPLIED -- URI that acts as base URI --
+ target %FrameTarget; #IMPLIED -- render in this frame --
+ >
+
+<!ELEMENT META - O EMPTY -- generic metainformation -->
+<!ATTLIST META
+ %i18n; -- lang, dir, for use with content --
+ http-equiv NAME #IMPLIED -- HTTP response header name --
+ name NAME #IMPLIED -- metainformation name --
+ content CDATA #REQUIRED -- associated information --
+ scheme CDATA #IMPLIED -- select form of content --
+ >
+
+<!ELEMENT STYLE - - %StyleSheet -- style info -->
+<!ATTLIST STYLE
+ %i18n; -- lang, dir, for use with title --
+ type %ContentType; #REQUIRED -- content type of style language --
+ media %MediaDesc; #IMPLIED -- designed for use with these media --
+ title %Text; #IMPLIED -- advisory title --
+ >
+
+<!ELEMENT SCRIPT - - %Script; -- script statements -->
+<!ATTLIST SCRIPT
+ charset %Charset; #IMPLIED -- char encoding of linked resource --
+ type %ContentType; #REQUIRED -- content type of script language --
+ language CDATA #IMPLIED -- predefined script language name --
+ src %URI; #IMPLIED -- URI for an external script --
+ defer (defer) #IMPLIED -- UA may defer execution of script --
+ event CDATA #IMPLIED -- reserved for possible future use --
+ for %URI; #IMPLIED -- reserved for possible future use --
+ >
+
+<!ELEMENT NOSCRIPT - - (%flow;)*
+ -- alternate content container for non script-based rendering -->
+<!ATTLIST NOSCRIPT
+ %attrs; -- %coreattrs, %i18n, %events --
+ >
+
+<!--================ Document Structure ==================================-->
+<!ENTITY % version "version CDATA #FIXED '%HTML.Version;'">
+
+<![ %HTML.Frameset; [
+<!ENTITY % html.content "HEAD, FRAMESET">
+]]>
+
+<!ENTITY % html.content "HEAD, BODY">
+
+<!ELEMENT HTML O O (%html.content;) -- document root element -->
+<!ATTLIST HTML
+ %i18n; -- lang, dir --
+ %version;
+ >
diff --git a/tests/dtds/HTML4.soc b/tests/dtds/HTML4.soc
new file mode 100644
index 00000000..ec4825f8
--- /dev/null
+++ b/tests/dtds/HTML4.soc
@@ -0,0 +1,9 @@
+OVERRIDE YES
+SGMLDECL HTML4.dcl
+DOCTYPE HTML HTML4.dtd
+PUBLIC "-//W3C//DTD HTML 4.0//EN" HTML4-s.dtd
+PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" HTML4.dtd
+PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" HTML4-f.dtd
+PUBLIC "-//W3C//ENTITIES Latin1//EN//HTML" HTMLlat1.ent
+PUBLIC "-//W3C//ENTITIES Special//EN//HTML" HTMLspec.ent
+PUBLIC "-//W3C//ENTITIES Symbols//EN//HTML" HTMLsym.ent
diff --git a/tests/dtds/HTMLlat1.ent b/tests/dtds/HTMLlat1.ent
new file mode 100644
index 00000000..7632023a
--- /dev/null
+++ b/tests/dtds/HTMLlat1.ent
@@ -0,0 +1,195 @@
+<!-- Portions (C) International Organization for Standardization 1986
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+-->
+<!-- Character entity set. Typical invocation:
+ <!ENTITY % HTMLlat1 PUBLIC
+ "-//W3C//ENTITIES Full Latin 1//EN//HTML">
+ %HTMLlat1;
+-->
+
+<!ENTITY nbsp CDATA "&#160;" -- no-break space = non-breaking space,
+ U+00A0 ISOnum -->
+<!ENTITY iexcl CDATA "&#161;" -- inverted exclamation mark, U+00A1 ISOnum -->
+<!ENTITY cent CDATA "&#162;" -- cent sign, U+00A2 ISOnum -->
+<!ENTITY pound CDATA "&#163;" -- pound sign, U+00A3 ISOnum -->
+<!ENTITY curren CDATA "&#164;" -- currency sign, U+00A4 ISOnum -->
+<!ENTITY yen CDATA "&#165;" -- yen sign = yuan sign, U+00A5 ISOnum -->
+<!ENTITY brvbar CDATA "&#166;" -- broken bar = broken vertical bar,
+ U+00A6 ISOnum -->
+<!ENTITY sect CDATA "&#167;" -- section sign, U+00A7 ISOnum -->
+<!ENTITY uml CDATA "&#168;" -- diaeresis = spacing diaeresis,
+ U+00A8 ISOdia -->
+<!ENTITY copy CDATA "&#169;" -- copyright sign, U+00A9 ISOnum -->
+<!ENTITY ordf CDATA "&#170;" -- feminine ordinal indicator, U+00AA ISOnum -->
+<!ENTITY laquo CDATA "&#171;" -- left-pointing double angle quotation mark
+ = left pointing guillemet, U+00AB ISOnum -->
+<!ENTITY not CDATA "&#172;" -- not sign = discretionary hyphen,
+ U+00AC ISOnum -->
+<!ENTITY shy CDATA "&#173;" -- soft hyphen = discretionary hyphen,
+ U+00AD ISOnum -->
+<!ENTITY reg CDATA "&#174;" -- registered sign = registered trade mark sign,
+ U+00AE ISOnum -->
+<!ENTITY macr CDATA "&#175;" -- macron = spacing macron = overline
+ = APL overbar, U+00AF ISOdia -->
+<!ENTITY deg CDATA "&#176;" -- degree sign, U+00B0 ISOnum -->
+<!ENTITY plusmn CDATA "&#177;" -- plus-minus sign = plus-or-minus sign,
+ U+00B1 ISOnum -->
+<!ENTITY sup2 CDATA "&#178;" -- superscript two = superscript digit two
+ = squared, U+00B2 ISOnum -->
+<!ENTITY sup3 CDATA "&#179;" -- superscript three = superscript digit three
+ = cubed, U+00B3 ISOnum -->
+<!ENTITY acute CDATA "&#180;" -- acute accent = spacing acute,
+ U+00B4 ISOdia -->
+<!ENTITY micro CDATA "&#181;" -- micro sign, U+00B5 ISOnum -->
+<!ENTITY para CDATA "&#182;" -- pilcrow sign = paragraph sign,
+ U+00B6 ISOnum -->
+<!ENTITY middot CDATA "&#183;" -- middle dot = Georgian comma
+ = Greek middle dot, U+00B7 ISOnum -->
+<!ENTITY cedil CDATA "&#184;" -- cedilla = spacing cedilla, U+00B8 ISOdia -->
+<!ENTITY sup1 CDATA "&#185;" -- superscript one = superscript digit one,
+ U+00B9 ISOnum -->
+<!ENTITY ordm CDATA "&#186;" -- masculine ordinal indicator,
+ U+00BA ISOnum -->
+<!ENTITY raquo CDATA "&#187;" -- right-pointing double angle quotation mark
+ = right pointing guillemet, U+00BB ISOnum -->
+<!ENTITY frac14 CDATA "&#188;" -- vulgar fraction one quarter
+ = fraction one quarter, U+00BC ISOnum -->
+<!ENTITY frac12 CDATA "&#189;" -- vulgar fraction one half
+ = fraction one half, U+00BD ISOnum -->
+<!ENTITY frac34 CDATA "&#190;" -- vulgar fraction three quarters
+ = fraction three quarters, U+00BE ISOnum -->
+<!ENTITY iquest CDATA "&#191;" -- inverted question mark
+ = turned question mark, U+00BF ISOnum -->
+<!ENTITY Agrave CDATA "&#192;" -- latin capital letter A with grave
+ = latin capital letter A grave,
+ U+00C0 ISOlat1 -->
+<!ENTITY Aacute CDATA "&#193;" -- latin capital letter A with acute,
+ U+00C1 ISOlat1 -->
+<!ENTITY Acirc CDATA "&#194;" -- latin capital letter A with circumflex,
+ U+00C2 ISOlat1 -->
+<!ENTITY Atilde CDATA "&#195;" -- latin capital letter A with tilde,
+ U+00C3 ISOlat1 -->
+<!ENTITY Auml CDATA "&#196;" -- latin capital letter A with diaeresis,
+ U+00C4 ISOlat1 -->
+<!ENTITY Aring CDATA "&#197;" -- latin capital letter A with ring above
+ = latin capital letter A ring,
+ U+00C5 ISOlat1 -->
+<!ENTITY AElig CDATA "&#198;" -- latin capital letter AE
+ = latin capital ligature AE,
+ U+00C6 ISOlat1 -->
+<!ENTITY Ccedil CDATA "&#199;" -- latin capital letter C with cedilla,
+ U+00C7 ISOlat1 -->
+<!ENTITY Egrave CDATA "&#200;" -- latin capital letter E with grave,
+ U+00C8 ISOlat1 -->
+<!ENTITY Eacute CDATA "&#201;" -- latin capital letter E with acute,
+ U+00C9 ISOlat1 -->
+<!ENTITY Ecirc CDATA "&#202;" -- latin capital letter E with circumflex,
+ U+00CA ISOlat1 -->
+<!ENTITY Euml CDATA "&#203;" -- latin capital letter E with diaeresis,
+ U+00CB ISOlat1 -->
+<!ENTITY Igrave CDATA "&#204;" -- latin capital letter I with grave,
+ U+00CC ISOlat1 -->
+<!ENTITY Iacute CDATA "&#205;" -- latin capital letter I with acute,
+ U+00CD ISOlat1 -->
+<!ENTITY Icirc CDATA "&#206;" -- latin capital letter I with circumflex,
+ U+00CE ISOlat1 -->
+<!ENTITY Iuml CDATA "&#207;" -- latin capital letter I with diaeresis,
+ U+00CF ISOlat1 -->
+<!ENTITY ETH CDATA "&#208;" -- latin capital letter ETH, U+00D0 ISOlat1 -->
+<!ENTITY Ntilde CDATA "&#209;" -- latin capital letter N with tilde,
+ U+00D1 ISOlat1 -->
+<!ENTITY Ograve CDATA "&#210;" -- latin capital letter O with grave,
+ U+00D2 ISOlat1 -->
+<!ENTITY Oacute CDATA "&#211;" -- latin capital letter O with acute,
+ U+00D3 ISOlat1 -->
+<!ENTITY Ocirc CDATA "&#212;" -- latin capital letter O with circumflex,
+ U+00D4 ISOlat1 -->
+<!ENTITY Otilde CDATA "&#213;" -- latin capital letter O with tilde,
+ U+00D5 ISOlat1 -->
+<!ENTITY Ouml CDATA "&#214;" -- latin capital letter O with diaeresis,
+ U+00D6 ISOlat1 -->
+<!ENTITY times CDATA "&#215;" -- multiplication sign, U+00D7 ISOnum -->
+<!ENTITY Oslash CDATA "&#216;" -- latin capital letter O with stroke
+ = latin capital letter O slash,
+ U+00D8 ISOlat1 -->
+<!ENTITY Ugrave CDATA "&#217;" -- latin capital letter U with grave,
+ U+00D9 ISOlat1 -->
+<!ENTITY Uacute CDATA "&#218;" -- latin capital letter U with acute,
+ U+00DA ISOlat1 -->
+<!ENTITY Ucirc CDATA "&#219;" -- latin capital letter U with circumflex,
+ U+00DB ISOlat1 -->
+<!ENTITY Uuml CDATA "&#220;" -- latin capital letter U with diaeresis,
+ U+00DC ISOlat1 -->
+<!ENTITY Yacute CDATA "&#221;" -- latin capital letter Y with acute,
+ U+00DD ISOlat1 -->
+<!ENTITY THORN CDATA "&#222;" -- latin capital letter THORN,
+ U+00DE ISOlat1 -->
+<!ENTITY szlig CDATA "&#223;" -- latin small letter sharp s = ess-zed,
+ U+00DF ISOlat1 -->
+<!ENTITY agrave CDATA "&#224;" -- latin small letter a with grave
+ = latin small letter a grave,
+ U+00E0 ISOlat1 -->
+<!ENTITY aacute CDATA "&#225;" -- latin small letter a with acute,
+ U+00E1 ISOlat1 -->
+<!ENTITY acirc CDATA "&#226;" -- latin small letter a with circumflex,
+ U+00E2 ISOlat1 -->
+<!ENTITY atilde CDATA "&#227;" -- latin small letter a with tilde,
+ U+00E3 ISOlat1 -->
+<!ENTITY auml CDATA "&#228;" -- latin small letter a with diaeresis,
+ U+00E4 ISOlat1 -->
+<!ENTITY aring CDATA "&#229;" -- latin small letter a with ring above
+ = latin small letter a ring,
+ U+00E5 ISOlat1 -->
+<!ENTITY aelig CDATA "&#230;" -- latin small letter ae
+ = latin small ligature ae, U+00E6 ISOlat1 -->
+<!ENTITY ccedil CDATA "&#231;" -- latin small letter c with cedilla,
+ U+00E7 ISOlat1 -->
+<!ENTITY egrave CDATA "&#232;" -- latin small letter e with grave,
+ U+00E8 ISOlat1 -->
+<!ENTITY eacute CDATA "&#233;" -- latin small letter e with acute,
+ U+00E9 ISOlat1 -->
+<!ENTITY ecirc CDATA "&#234;" -- latin small letter e with circumflex,
+ U+00EA ISOlat1 -->
+<!ENTITY euml CDATA "&#235;" -- latin small letter e with diaeresis,
+ U+00EB ISOlat1 -->
+<!ENTITY igrave CDATA "&#236;" -- latin small letter i with grave,
+ U+00EC ISOlat1 -->
+<!ENTITY iacute CDATA "&#237;" -- latin small letter i with acute,
+ U+00ED ISOlat1 -->
+<!ENTITY icirc CDATA "&#238;" -- latin small letter i with circumflex,
+ U+00EE ISOlat1 -->
+<!ENTITY iuml CDATA "&#239;" -- latin small letter i with diaeresis,
+ U+00EF ISOlat1 -->
+<!ENTITY eth CDATA "&#240;" -- latin small letter eth, U+00F0 ISOlat1 -->
+<!ENTITY ntilde CDATA "&#241;" -- latin small letter n with tilde,
+ U+00F1 ISOlat1 -->
+<!ENTITY ograve CDATA "&#242;" -- latin small letter o with grave,
+ U+00F2 ISOlat1 -->
+<!ENTITY oacute CDATA "&#243;" -- latin small letter o with acute,
+ U+00F3 ISOlat1 -->
+<!ENTITY ocirc CDATA "&#244;" -- latin small letter o with circumflex,
+ U+00F4 ISOlat1 -->
+<!ENTITY otilde CDATA "&#245;" -- latin small letter o with tilde,
+ U+00F5 ISOlat1 -->
+<!ENTITY ouml CDATA "&#246;" -- latin small letter o with diaeresis,
+ U+00F6 ISOlat1 -->
+<!ENTITY divide CDATA "&#247;" -- division sign, U+00F7 ISOnum -->
+<!ENTITY oslash CDATA "&#248;" -- latin small letter o with stroke,
+ = latin small letter o slash,
+ U+00F8 ISOlat1 -->
+<!ENTITY ugrave CDATA "&#249;" -- latin small letter u with grave,
+ U+00F9 ISOlat1 -->
+<!ENTITY uacute CDATA "&#250;" -- latin small letter u with acute,
+ U+00FA ISOlat1 -->
+<!ENTITY ucirc CDATA "&#251;" -- latin small letter u with circumflex,
+ U+00FB ISOlat1 -->
+<!ENTITY uuml CDATA "&#252;" -- latin small letter u with diaeresis,
+ U+00FC ISOlat1 -->
+<!ENTITY yacute CDATA "&#253;" -- latin small letter y with acute,
+ U+00FD ISOlat1 -->
+<!ENTITY thorn CDATA "&#254;" -- latin small letter thorn with,
+ U+00FE ISOlat1 -->
+<!ENTITY yuml CDATA "&#255;" -- latin small letter y with diaeresis,
+ U+00FF ISOlat1 --> \ No newline at end of file
diff --git a/tests/dtds/HTMLspec.ent b/tests/dtds/HTMLspec.ent
new file mode 100644
index 00000000..29011cc2
--- /dev/null
+++ b/tests/dtds/HTMLspec.ent
@@ -0,0 +1,77 @@
+<!-- Special characters for HTML -->
+
+<!-- Character entity set. Typical invocation:
+ <!ENTITY % HTMLspecial PUBLIC
+ "-//W3C//ENTITIES Special//EN//HTML">
+ %HTMLspecial; -->
+
+<!-- Portions (C) International Organization for Standardization 1986:
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+-->
+
+<!-- Relevant ISO entity set is given unless names are newly introduced.
+ New names (i.e., not in ISO 8879 list) do not clash with any
+ existing ISO 8879 entity names. ISO 10646 character numbers
+ are given for each character, in hex. CDATA values are decimal
+ conversions of the ISO 10646 values and refer to the document
+ character set. Names are Unicode 2.0 names.
+
+-->
+
+<!-- C0 Controls and Basic Latin -->
+<!ENTITY quot CDATA "&#34;" -- quotation mark = APL quote,
+ U+0022 ISOnum -->
+<!ENTITY amp CDATA "&#38;" -- ampersand, U+0026 ISOnum -->
+<!ENTITY lt CDATA "&#60;" -- less-than sign, U+003C ISOnum -->
+<!ENTITY gt CDATA "&#62;" -- greater-than sign, U+003E ISOnum -->
+
+<!-- Latin Extended-A -->
+<!ENTITY OElig CDATA "&#338;" -- latin capital ligature OE,
+ U+0152 ISOlat2 -->
+<!ENTITY oelig CDATA "&#339;" -- latin small ligature oe, U+0153 ISOlat2 -->
+<!-- ligature is a misnomer, this is a separate character in some languages -->
+<!ENTITY Scaron CDATA "&#352;" -- latin capital letter S with caron,
+ U+0160 ISOlat2 -->
+<!ENTITY scaron CDATA "&#353;" -- latin small letter s with caron,
+ U+0161 ISOlat2 -->
+<!ENTITY Yuml CDATA "&#376;" -- latin capital letter Y with diaeresis,
+ U+0178 ISOlat2 -->
+
+<!-- Spacing Modifier Letters -->
+<!ENTITY circ CDATA "&#710;" -- modifier letter circumflex accent,
+ U+02C6 ISOpub -->
+<!ENTITY tilde CDATA "&#732;" -- small tilde, U+02DC ISOdia -->
+
+<!-- General Punctuation -->
+<!ENTITY ensp CDATA "&#8194;" -- en space, U+2002 ISOpub -->
+<!ENTITY emsp CDATA "&#8195;" -- em space, U+2003 ISOpub -->
+<!ENTITY thinsp CDATA "&#8201;" -- thin space, U+2009 ISOpub -->
+<!ENTITY zwnj CDATA "&#8204;" -- zero width non-joiner,
+ U+200C NEW RFC 2070 -->
+<!ENTITY zwj CDATA "&#8205;" -- zero width joiner, U+200D NEW RFC 2070 -->
+<!ENTITY lrm CDATA "&#8206;" -- left-to-right mark, U+200E NEW RFC 2070 -->
+<!ENTITY rlm CDATA "&#8207;" -- right-to-left mark, U+200F NEW RFC 2070 -->
+<!ENTITY ndash CDATA "&#8211;" -- en dash, U+2013 ISOpub -->
+<!ENTITY mdash CDATA "&#8212;" -- em dash, U+2014 ISOpub -->
+<!ENTITY lsquo CDATA "&#8216;" -- left single quotation mark,
+ U+2018 ISOnum -->
+<!ENTITY rsquo CDATA "&#8217;" -- right single quotation mark,
+ U+2019 ISOnum -->
+<!ENTITY sbquo CDATA "&#8218;" -- single low-9 quotation mark, U+201A NEW -->
+<!ENTITY ldquo CDATA "&#8220;" -- left double quotation mark,
+ U+201C ISOnum -->
+<!ENTITY rdquo CDATA "&#8221;" -- right double quotation mark,
+ U+201D ISOnum -->
+<!ENTITY bdquo CDATA "&#8222;" -- double low-9 quotation mark, U+201E NEW -->
+<!ENTITY dagger CDATA "&#8224;" -- dagger, U+2020 ISOpub -->
+<!ENTITY Dagger CDATA "&#8225;" -- double dagger, U+2021 ISOpub -->
+<!ENTITY permil CDATA "&#8240;" -- per mille sign, U+2030 ISOtech -->
+<!ENTITY lsaquo CDATA "&#8249;" -- single left-pointing angle quotation mark,
+ U+2039 ISO proposed -->
+<!-- lsaquo is proposed but not yet ISO standardized -->
+<!ENTITY rsaquo CDATA "&#8250;" -- single right-pointing angle quotation mark,
+ U+203A ISO proposed -->
+<!-- rsaquo is proposed but not yet ISO standardized -->
+<!ENTITY euro CDATA "&#8364;" -- euro sign, U+20AC NEW --> \ No newline at end of file
diff --git a/tests/dtds/HTMLsym.ent b/tests/dtds/HTMLsym.ent
new file mode 100644
index 00000000..2a6250ba
--- /dev/null
+++ b/tests/dtds/HTMLsym.ent
@@ -0,0 +1,241 @@
+<!-- Mathematical, Greek and Symbolic characters for HTML -->
+
+<!-- Character entity set. Typical invocation:
+ <!ENTITY % HTMLsymbol PUBLIC
+ "-//W3C//ENTITIES Symbolic//EN//HTML">
+ %HTMLsymbol; -->
+
+<!-- Portions (C) International Organization for Standardization 1986:
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+-->
+
+<!-- Relevant ISO entity set is given unless names are newly introduced.
+ New names (i.e., not in ISO 8879 list) do not clash with any
+ existing ISO 8879 entity names. ISO 10646 character numbers
+ are given for each character, in hex. CDATA values are decimal
+ conversions of the ISO 10646 values and refer to the document
+ character set. Names are Unicode 2.0 names.
+
+-->
+
+<!-- Latin Extended-B -->
+<!ENTITY fnof CDATA "&#402;" -- latin small f with hook = function
+ = florin, U+0192 ISOtech -->
+
+<!-- Greek -->
+<!ENTITY Alpha CDATA "&#913;" -- greek capital letter alpha, U+0391 -->
+<!ENTITY Beta CDATA "&#914;" -- greek capital letter beta, U+0392 -->
+<!ENTITY Gamma CDATA "&#915;" -- greek capital letter gamma,
+ U+0393 ISOgrk3 -->
+<!ENTITY Delta CDATA "&#916;" -- greek capital letter delta,
+ U+0394 ISOgrk3 -->
+<!ENTITY Epsilon CDATA "&#917;" -- greek capital letter epsilon, U+0395 -->
+<!ENTITY Zeta CDATA "&#918;" -- greek capital letter zeta, U+0396 -->
+<!ENTITY Eta CDATA "&#919;" -- greek capital letter eta, U+0397 -->
+<!ENTITY Theta CDATA "&#920;" -- greek capital letter theta,
+ U+0398 ISOgrk3 -->
+<!ENTITY Iota CDATA "&#921;" -- greek capital letter iota, U+0399 -->
+<!ENTITY Kappa CDATA "&#922;" -- greek capital letter kappa, U+039A -->
+<!ENTITY Lambda CDATA "&#923;" -- greek capital letter lambda,
+ U+039B ISOgrk3 -->
+<!ENTITY Mu CDATA "&#924;" -- greek capital letter mu, U+039C -->
+<!ENTITY Nu CDATA "&#925;" -- greek capital letter nu, U+039D -->
+<!ENTITY Xi CDATA "&#926;" -- greek capital letter xi, U+039E ISOgrk3 -->
+<!ENTITY Omicron CDATA "&#927;" -- greek capital letter omicron, U+039F -->
+<!ENTITY Pi CDATA "&#928;" -- greek capital letter pi, U+03A0 ISOgrk3 -->
+<!ENTITY Rho CDATA "&#929;" -- greek capital letter rho, U+03A1 -->
+<!-- there is no Sigmaf, and no U+03A2 character either -->
+<!ENTITY Sigma CDATA "&#931;" -- greek capital letter sigma,
+ U+03A3 ISOgrk3 -->
+<!ENTITY Tau CDATA "&#932;" -- greek capital letter tau, U+03A4 -->
+<!ENTITY Upsilon CDATA "&#933;" -- greek capital letter upsilon,
+ U+03A5 ISOgrk3 -->
+<!ENTITY Phi CDATA "&#934;" -- greek capital letter phi,
+ U+03A6 ISOgrk3 -->
+<!ENTITY Chi CDATA "&#935;" -- greek capital letter chi, U+03A7 -->
+<!ENTITY Psi CDATA "&#936;" -- greek capital letter psi,
+ U+03A8 ISOgrk3 -->
+<!ENTITY Omega CDATA "&#937;" -- greek capital letter omega,
+ U+03A9 ISOgrk3 -->
+
+<!ENTITY alpha CDATA "&#945;" -- greek small letter alpha,
+ U+03B1 ISOgrk3 -->
+<!ENTITY beta CDATA "&#946;" -- greek small letter beta, U+03B2 ISOgrk3 -->
+<!ENTITY gamma CDATA "&#947;" -- greek small letter gamma,
+ U+03B3 ISOgrk3 -->
+<!ENTITY delta CDATA "&#948;" -- greek small letter delta,
+ U+03B4 ISOgrk3 -->
+<!ENTITY epsilon CDATA "&#949;" -- greek small letter epsilon,
+ U+03B5 ISOgrk3 -->
+<!ENTITY zeta CDATA "&#950;" -- greek small letter zeta, U+03B6 ISOgrk3 -->
+<!ENTITY eta CDATA "&#951;" -- greek small letter eta, U+03B7 ISOgrk3 -->
+<!ENTITY theta CDATA "&#952;" -- greek small letter theta,
+ U+03B8 ISOgrk3 -->
+<!ENTITY iota CDATA "&#953;" -- greek small letter iota, U+03B9 ISOgrk3 -->
+<!ENTITY kappa CDATA "&#954;" -- greek small letter kappa,
+ U+03BA ISOgrk3 -->
+<!ENTITY lambda CDATA "&#955;" -- greek small letter lambda,
+ U+03BB ISOgrk3 -->
+<!ENTITY mu CDATA "&#956;" -- greek small letter mu, U+03BC ISOgrk3 -->
+<!ENTITY nu CDATA "&#957;" -- greek small letter nu, U+03BD ISOgrk3 -->
+<!ENTITY xi CDATA "&#958;" -- greek small letter xi, U+03BE ISOgrk3 -->
+<!ENTITY omicron CDATA "&#959;" -- greek small letter omicron, U+03BF NEW -->
+<!ENTITY pi CDATA "&#960;" -- greek small letter pi, U+03C0 ISOgrk3 -->
+<!ENTITY rho CDATA "&#961;" -- greek small letter rho, U+03C1 ISOgrk3 -->
+<!ENTITY sigmaf CDATA "&#962;" -- greek small letter final sigma,
+ U+03C2 ISOgrk3 -->
+<!ENTITY sigma CDATA "&#963;" -- greek small letter sigma,
+ U+03C3 ISOgrk3 -->
+<!ENTITY tau CDATA "&#964;" -- greek small letter tau, U+03C4 ISOgrk3 -->
+<!ENTITY upsilon CDATA "&#965;" -- greek small letter upsilon,
+ U+03C5 ISOgrk3 -->
+<!ENTITY phi CDATA "&#966;" -- greek small letter phi, U+03C6 ISOgrk3 -->
+<!ENTITY chi CDATA "&#967;" -- greek small letter chi, U+03C7 ISOgrk3 -->
+<!ENTITY psi CDATA "&#968;" -- greek small letter psi, U+03C8 ISOgrk3 -->
+<!ENTITY omega CDATA "&#969;" -- greek small letter omega,
+ U+03C9 ISOgrk3 -->
+<!ENTITY thetasym CDATA "&#977;" -- greek small letter theta symbol,
+ U+03D1 NEW -->
+<!ENTITY upsih CDATA "&#978;" -- greek upsilon with hook symbol,
+ U+03D2 NEW -->
+<!ENTITY piv CDATA "&#982;" -- greek pi symbol, U+03D6 ISOgrk3 -->
+
+<!-- General Punctuation -->
+<!ENTITY bull CDATA "&#8226;" -- bullet = black small circle,
+ U+2022 ISOpub -->
+<!-- bullet is NOT the same as bullet operator, U+2219 -->
+<!ENTITY hellip CDATA "&#8230;" -- horizontal ellipsis = three dot leader,
+ U+2026 ISOpub -->
+<!ENTITY prime CDATA "&#8242;" -- prime = minutes = feet, U+2032 ISOtech -->
+<!ENTITY Prime CDATA "&#8243;" -- double prime = seconds = inches,
+ U+2033 ISOtech -->
+<!ENTITY oline CDATA "&#8254;" -- overline = spacing overscore,
+ U+203E NEW -->
+<!ENTITY frasl CDATA "&#8260;" -- fraction slash, U+2044 NEW -->
+
+<!-- Letterlike Symbols -->
+<!ENTITY weierp CDATA "&#8472;" -- script capital P = power set
+ = Weierstrass p, U+2118 ISOamso -->
+<!ENTITY image CDATA "&#8465;" -- blackletter capital I = imaginary part,
+ U+2111 ISOamso -->
+<!ENTITY real CDATA "&#8476;" -- blackletter capital R = real part symbol,
+ U+211C ISOamso -->
+<!ENTITY trade CDATA "&#8482;" -- trade mark sign, U+2122 ISOnum -->
+<!ENTITY alefsym CDATA "&#8501;" -- alef symbol = first transfinite cardinal,
+ U+2135 NEW -->
+<!-- alef symbol is NOT the same as hebrew letter alef,
+ U+05D0 although the same glyph could be used to depict both characters -->
+
+<!-- Arrows -->
+<!ENTITY larr CDATA "&#8592;" -- leftwards arrow, U+2190 ISOnum -->
+<!ENTITY uarr CDATA "&#8593;" -- upwards arrow, U+2191 ISOnum-->
+<!ENTITY rarr CDATA "&#8594;" -- rightwards arrow, U+2192 ISOnum -->
+<!ENTITY darr CDATA "&#8595;" -- downwards arrow, U+2193 ISOnum -->
+<!ENTITY harr CDATA "&#8596;" -- left right arrow, U+2194 ISOamsa -->
+<!ENTITY crarr CDATA "&#8629;" -- downwards arrow with corner leftwards
+ = carriage return, U+21B5 NEW -->
+<!ENTITY lArr CDATA "&#8656;" -- leftwards double arrow, U+21D0 ISOtech -->
+<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
+ but also does not have any other character for that function. So ? lArr can
+ be used for 'is implied by' as ISOtech suggests -->
+<!ENTITY uArr CDATA "&#8657;" -- upwards double arrow, U+21D1 ISOamsa -->
+<!ENTITY rArr CDATA "&#8658;" -- rightwards double arrow,
+ U+21D2 ISOtech -->
+<!-- Unicode does not say this is the 'implies' character but does not have
+ another character with this function so ?
+ rArr can be used for 'implies' as ISOtech suggests -->
+<!ENTITY dArr CDATA "&#8659;" -- downwards double arrow, U+21D3 ISOamsa -->
+<!ENTITY hArr CDATA "&#8660;" -- left right double arrow,
+ U+21D4 ISOamsa -->
+
+<!-- Mathematical Operators -->
+<!ENTITY forall CDATA "&#8704;" -- for all, U+2200 ISOtech -->
+<!ENTITY part CDATA "&#8706;" -- partial differential, U+2202 ISOtech -->
+<!ENTITY exist CDATA "&#8707;" -- there exists, U+2203 ISOtech -->
+<!ENTITY empty CDATA "&#8709;" -- empty set = null set = diameter,
+ U+2205 ISOamso -->
+<!ENTITY nabla CDATA "&#8711;" -- nabla = backward difference,
+ U+2207 ISOtech -->
+<!ENTITY isin CDATA "&#8712;" -- element of, U+2208 ISOtech -->
+<!ENTITY notin CDATA "&#8713;" -- not an element of, U+2209 ISOtech -->
+<!ENTITY ni CDATA "&#8715;" -- contains as member, U+220B ISOtech -->
+<!-- should there be a more memorable name than 'ni'? -->
+<!ENTITY prod CDATA "&#8719;" -- n-ary product = product sign,
+ U+220F ISOamsb -->
+<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
+ the same glyph might be used for both -->
+<!ENTITY sum CDATA "&#8721;" -- n-ary sumation, U+2211 ISOamsb -->
+<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
+ though the same glyph might be used for both -->
+<!ENTITY minus CDATA "&#8722;" -- minus sign, U+2212 ISOtech -->
+<!ENTITY lowast CDATA "&#8727;" -- asterisk operator, U+2217 ISOtech -->
+<!ENTITY radic CDATA "&#8730;" -- square root = radical sign,
+ U+221A ISOtech -->
+<!ENTITY prop CDATA "&#8733;" -- proportional to, U+221D ISOtech -->
+<!ENTITY infin CDATA "&#8734;" -- infinity, U+221E ISOtech -->
+<!ENTITY ang CDATA "&#8736;" -- angle, U+2220 ISOamso -->
+<!ENTITY and CDATA "&#8743;" -- logical and = wedge, U+2227 ISOtech -->
+<!ENTITY or CDATA "&#8744;" -- logical or = vee, U+2228 ISOtech -->
+<!ENTITY cap CDATA "&#8745;" -- intersection = cap, U+2229 ISOtech -->
+<!ENTITY cup CDATA "&#8746;" -- union = cup, U+222A ISOtech -->
+<!ENTITY int CDATA "&#8747;" -- integral, U+222B ISOtech -->
+<!ENTITY there4 CDATA "&#8756;" -- therefore, U+2234 ISOtech -->
+<!ENTITY sim CDATA "&#8764;" -- tilde operator = varies with = similar to,
+ U+223C ISOtech -->
+<!-- tilde operator is NOT the same character as the tilde, U+007E,
+ although the same glyph might be used to represent both -->
+<!ENTITY cong CDATA "&#8773;" -- approximately equal to, U+2245 ISOtech -->
+<!ENTITY asymp CDATA "&#8776;" -- almost equal to = asymptotic to,
+ U+2248 ISOamsr -->
+<!ENTITY ne CDATA "&#8800;" -- not equal to, U+2260 ISOtech -->
+<!ENTITY equiv CDATA "&#8801;" -- identical to, U+2261 ISOtech -->
+<!ENTITY le CDATA "&#8804;" -- less-than or equal to, U+2264 ISOtech -->
+<!ENTITY ge CDATA "&#8805;" -- greater-than or equal to,
+ U+2265 ISOtech -->
+<!ENTITY sub CDATA "&#8834;" -- subset of, U+2282 ISOtech -->
+<!ENTITY sup CDATA "&#8835;" -- superset of, U+2283 ISOtech -->
+<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
+ font encoding and is not included. Should it be, for symmetry?
+ It is in ISOamsn -->
+<!ENTITY nsub CDATA "&#8836;" -- not a subset of, U+2284 ISOamsn -->
+<!ENTITY sube CDATA "&#8838;" -- subset of or equal to, U+2286 ISOtech -->
+<!ENTITY supe CDATA "&#8839;" -- superset of or equal to,
+ U+2287 ISOtech -->
+<!ENTITY oplus CDATA "&#8853;" -- circled plus = direct sum,
+ U+2295 ISOamsb -->
+<!ENTITY otimes CDATA "&#8855;" -- circled times = vector product,
+ U+2297 ISOamsb -->
+<!ENTITY perp CDATA "&#8869;" -- up tack = orthogonal to = perpendicular,
+ U+22A5 ISOtech -->
+<!ENTITY sdot CDATA "&#8901;" -- dot operator, U+22C5 ISOamsb -->
+<!-- dot operator is NOT the same character as U+00B7 middle dot -->
+
+<!-- Miscellaneous Technical -->
+<!ENTITY lceil CDATA "&#8968;" -- left ceiling = apl upstile,
+ U+2308 ISOamsc -->
+<!ENTITY rceil CDATA "&#8969;" -- right ceiling, U+2309 ISOamsc -->
+<!ENTITY lfloor CDATA "&#8970;" -- left floor = apl downstile,
+ U+230A ISOamsc -->
+<!ENTITY rfloor CDATA "&#8971;" -- right floor, U+230B ISOamsc -->
+<!ENTITY lang CDATA "&#9001;" -- left-pointing angle bracket = bra,
+ U+2329 ISOtech -->
+<!-- lang is NOT the same character as U+003C 'less than'
+ or U+2039 'single left-pointing angle quotation mark' -->
+<!ENTITY rang CDATA "&#9002;" -- right-pointing angle bracket = ket,
+ U+232A ISOtech -->
+<!-- rang is NOT the same character as U+003E 'greater than'
+ or U+203A 'single right-pointing angle quotation mark' -->
+
+<!-- Geometric Shapes -->
+<!ENTITY loz CDATA "&#9674;" -- lozenge, U+25CA ISOpub -->
+
+<!-- Miscellaneous Symbols -->
+<!ENTITY spades CDATA "&#9824;" -- black spade suit, U+2660 ISOpub -->
+<!-- black here seems to mean filled as opposed to hollow -->
+<!ENTITY clubs CDATA "&#9827;" -- black club suit = shamrock,
+ U+2663 ISOpub -->
+<!ENTITY hearts CDATA "&#9829;" -- black heart suit = valentine,
+ U+2665 ISOpub -->
+<!ENTITY diams CDATA "&#9830;" -- black diamond suit, U+2666 ISOpub --> \ No newline at end of file
diff --git a/tests/test_html_formatter.py b/tests/test_html_formatter.py
index a23fbfa9..ba3ed69e 100644
--- a/tests/test_html_formatter.py
+++ b/tests/test_html_formatter.py
@@ -11,6 +11,7 @@ import os
import unittest
import StringIO
import random
+import tempfile
from pygments import lexers, formatters
from pygments.token import _TokenType
@@ -34,4 +35,33 @@ class HtmlFormatterTest(unittest.TestCase):
outfile = StringIO.StringIO()
fmt = HtmlFormatter(**optdict)
fmt.format(tokensource, outfile)
-
+
+ def test_valid_output(self):
+ tokensource = list(PythonLexer().get_tokens(file(os.path.join(testdir, testfile)).read()))
+ fmt = HtmlFormatter(full=True, linenos=True, noclasses=True)
+
+ handle, pathname = tempfile.mkstemp('.html')
+ # place all output files in /tmp too
+ old_wd = os.getcwd()
+ os.chdir(os.path.dirname(pathname))
+ tfile = os.fdopen(handle, 'w+b')
+ fmt.format(tokensource, tfile)
+ tfile.close()
+ catname = os.path.join(testdir, 'dtds', 'HTML4.soc')
+ try:
+ try:
+ import subprocess
+ ret = subprocess.Popen(['nsgmls', '-s', '-c', catname, pathname],
+ stdout=subprocess.PIPE).wait()
+ except ImportError:
+ # Python 2.3 - no subprocess module
+ ret = os.popen('nsgmls -s -c "%s" "%s"' % (catname, pathname)).close()
+ if ret == 32512: raise OSError # not found
+ except OSError:
+ # latex not available
+ pass
+ else:
+ self.failIf(ret, 'nsgmls run reported errors')
+
+ os.unlink(pathname)
+ os.chdir(old_wd)