diff options
Diffstat (limited to 'libjava/javax')
-rw-r--r-- | libjava/javax/swing/text/ChangedCharSetException.java | 100 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/HTML.java | 1246 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/HTMLDocument.java | 53 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/HTMLEditorKit.java | 173 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/HTMLFrameHyperlinkEvent.java | 132 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/AttributeList.java | 294 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/ContentModel.java | 218 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/DTD.java | 607 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/DTDConstants.java | 290 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/parser/DocumentParser.java | 258 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/Element.java | 317 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/parser/Entity.java | 185 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/Parser.java | 436 | ||||
-rw-r--r-- | libjava/javax/swing/text/html/parser/ParserDelegator.java | 168 | ||||
-rwxr-xr-x | libjava/javax/swing/text/html/parser/TagElement.java | 142 |
15 files changed, 4603 insertions, 16 deletions
diff --git a/libjava/javax/swing/text/ChangedCharSetException.java b/libjava/javax/swing/text/ChangedCharSetException.java new file mode 100644 index 00000000000..bf592a65a97 --- /dev/null +++ b/libjava/javax/swing/text/ChangedCharSetException.java @@ -0,0 +1,100 @@ +/* ChangedCharSetException.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text; + +import java.io.IOException; +import java.io.Serializable; + +/** + * The exception is thrown when the document charset is changed. + * + * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) + */ +public class ChangedCharSetException + extends IOException + implements Serializable +{ + /** + * Use serialVersionUID for interoperability. + * This value corresponds the version 1.4. + */ + private static final long serialVersionUID = 9119851554465432389L; + + /** + * The char set specification. + */ + private final String m_charSetSpec; + + /** + * The char set key. + */ + private final boolean m_charSetKey; + + /** + * Constructs a new char set exception with two additional parameters, + * defining the circumstances under that the exception was raised. + */ + public ChangedCharSetException(String charSetSpec, boolean charSetKey) + { + m_charSetSpec = charSetSpec; + m_charSetKey = charSetKey; + } + + /** + * Get the value of the first parameter, previously passed to the + * constructor. + * + * @return the value of the first parameter + */ + public String getCharSetSpec() + { + return m_charSetSpec; + } + + /** + * Get the value of the second parameter, previously passed to the + * constructor. + * + * @return the value of the second parameter + */ + public boolean keyEqualsCharSet() + { + return m_charSetKey; + } +} diff --git a/libjava/javax/swing/text/html/HTML.java b/libjava/javax/swing/text/html/HTML.java index 098a14715cb..06e0ef403ca 100644 --- a/libjava/javax/swing/text/html/HTML.java +++ b/libjava/javax/swing/text/html/HTML.java @@ -35,38 +35,1260 @@ this exception to your version of the library, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ + package javax.swing.text.html; import java.io.Serializable; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; + +import java.util.Map; +import java.util.TreeMap; + +import javax.swing.text.AttributeSet; + /** - * Stub implementation. - * Just enough to get gjdoc working. - * - * @author Mark Wielaard (mark@klomp.org) + * HTML attribute and tag definitions. + * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) */ public class HTML { - public static class Tag + /** + * Represents a HTML attribute. + */ + public static class Attribute + implements Serializable { - private final String id; + /** + * The action attribute + */ + public static final Attribute ACTION = new Attribute("action"); - protected Tag(String id) + /** + * The align attribute + */ + public static final Attribute ALIGN = new Attribute("align"); + + /** + * The alink attribute + */ + public static final Attribute ALINK = new Attribute("alink"); + + /** + * The alt attribute + */ + public static final Attribute ALT = new Attribute("alt"); + + /** + * The archive attribute + */ + public static final Attribute ARCHIVE = new Attribute("archive"); + + /** + * The background attribute + */ + public static final Attribute BACKGROUND = new Attribute("background"); + + /** + * The bgcolor attribute + */ + public static final Attribute BGCOLOR = new Attribute("bgcolor"); + + /** + * The border attribute + */ + public static final Attribute BORDER = new Attribute("border"); + + /** + * The cellpadding attribute + */ + public static final Attribute CELLPADDING = new Attribute("cellpadding"); + + /** + * The cellspacing attribute + */ + public static final Attribute CELLSPACING = new Attribute("cellspacing"); + + /** + * The checked attribute + */ + public static final Attribute CHECKED = new Attribute("checked"); + + /** + * The class attribute + */ + public static final Attribute CLASS = new Attribute("class"); + + /** + * The classid attribute + */ + public static final Attribute CLASSID = new Attribute("classid"); + + /** + * The clear attribute + */ + public static final Attribute CLEAR = new Attribute("clear"); + + /** + * The code attribute + */ + public static final Attribute CODE = new Attribute("code"); + + /** + * The codebase attribute + */ + public static final Attribute CODEBASE = new Attribute("codebase"); + + /** + * The codetype attribute + */ + public static final Attribute CODETYPE = new Attribute("codetype"); + + /** + * The color attribute + */ + public static final Attribute COLOR = new Attribute("color"); + + /** + * The cols attribute + */ + public static final Attribute COLS = new Attribute("cols"); + + /** + * The colspan attribute + */ + public static final Attribute COLSPAN = new Attribute("colspan"); + + /** + * The comment attribute + */ + public static final Attribute COMMENT = new Attribute("comment"); + + /** + * The compact attribute + */ + public static final Attribute COMPACT = new Attribute("compact"); + + /** + * The content attribute + */ + public static final Attribute CONTENT = new Attribute("content"); + + /** + * The coords attribute + */ + public static final Attribute COORDS = new Attribute("coords"); + + /** + * The data attribute + */ + public static final Attribute DATA = new Attribute("data"); + + /** + * The declare attribute + */ + public static final Attribute DECLARE = new Attribute("declare"); + + /** + * The dir attribute + */ + public static final Attribute DIR = new Attribute("dir"); + + /** + * The dummy attribute + */ + public static final Attribute DUMMY = new Attribute("dummy"); + + /** + * The enctype attribute + */ + public static final Attribute ENCTYPE = new Attribute("enctype"); + + /** + * The endtag attribute + */ + public static final Attribute ENDTAG = new Attribute("endtag"); + + /** + * The face attribute + */ + public static final Attribute FACE = new Attribute("face"); + + /** + * The frameborder attribute + */ + public static final Attribute FRAMEBORDER = new Attribute("frameborder"); + + /** + * The halign attribute + */ + public static final Attribute HALIGN = new Attribute("halign"); + + /** + * The height attribute + */ + public static final Attribute HEIGHT = new Attribute("height"); + + /** + * The href attribute + */ + public static final Attribute HREF = new Attribute("href"); + + /** + * The hspace attribute + */ + public static final Attribute HSPACE = new Attribute("hspace"); + + /** + * The http-equiv attribute + */ + public static final Attribute HTTPEQUIV = new Attribute("http-equiv"); + + /** + * The id attribute + */ + public static final Attribute ID = new Attribute("id"); + + /** + * The ismap attribute + */ + public static final Attribute ISMAP = new Attribute("ismap"); + + /** + * The lang attribute + */ + public static final Attribute LANG = new Attribute("lang"); + + /** + * The language attribute + */ + public static final Attribute LANGUAGE = new Attribute("language"); + + /** + * The link attribute + */ + public static final Attribute LINK = new Attribute("link"); + + /** + * The lowsrc attribute + */ + public static final Attribute LOWSRC = new Attribute("lowsrc"); + + /** + * The marginheight attribute + */ + public static final Attribute MARGINHEIGHT = new Attribute("marginheight"); + + /** + * The marginwidth attribute + */ + public static final Attribute MARGINWIDTH = new Attribute("marginwidth"); + + /** + * The maxlength attribute + */ + public static final Attribute MAXLENGTH = new Attribute("maxlength"); + + /** + * The media attribute + */ + public static final Attribute MEDIA = new Attribute("media"); + + /** + * The method attribute + */ + public static final Attribute METHOD = new Attribute("method"); + + /** + * The multiple attribute + */ + public static final Attribute MULTIPLE = new Attribute("multiple"); + + /** + * The n attribute + */ + public static final Attribute N = new Attribute("n"); + + /** + * The name attribute + */ + public static final Attribute NAME = new Attribute("name"); + + /** + * The nohref attribute + */ + public static final Attribute NOHREF = new Attribute("nohref"); + + /** + * The noresize attribute + */ + public static final Attribute NORESIZE = new Attribute("noresize"); + + /** + * The noshade attribute + */ + public static final Attribute NOSHADE = new Attribute("noshade"); + + /** + * The nowrap attribute + */ + public static final Attribute NOWRAP = new Attribute("nowrap"); + + /** + * The prompt attribute + */ + public static final Attribute PROMPT = new Attribute("prompt"); + + /** + * The rel attribute + */ + public static final Attribute REL = new Attribute("rel"); + + /** + * The rev attribute + */ + public static final Attribute REV = new Attribute("rev"); + + /** + * The rows attribute + */ + public static final Attribute ROWS = new Attribute("rows"); + + /** + * The rowspan attribute + */ + public static final Attribute ROWSPAN = new Attribute("rowspan"); + + /** + * The scrolling attribute + */ + public static final Attribute SCROLLING = new Attribute("scrolling"); + + /** + * The selected attribute + */ + public static final Attribute SELECTED = new Attribute("selected"); + + /** + * The shape attribute + */ + public static final Attribute SHAPE = new Attribute("shape"); + + /** + * The shapes attribute + */ + public static final Attribute SHAPES = new Attribute("shapes"); + + /** + * The size attribute + */ + public static final Attribute SIZE = new Attribute("size"); + + /** + * The src attribute + */ + public static final Attribute SRC = new Attribute("src"); + + /** + * The standby attribute + */ + public static final Attribute STANDBY = new Attribute("standby"); + + /** + * The start attribute + */ + public static final Attribute START = new Attribute("start"); + + /** + * The style attribute + */ + public static final Attribute STYLE = new Attribute("style"); + + /** + * The target attribute + */ + public static final Attribute TARGET = new Attribute("target"); + + /** + * The text attribute + */ + public static final Attribute TEXT = new Attribute("text"); + + /** + * The title attribute + */ + public static final Attribute TITLE = new Attribute("title"); + + /** + * The type attribute + */ + public static final Attribute TYPE = new Attribute("type"); + + /** + * The usemap attribute + */ + public static final Attribute USEMAP = new Attribute("usemap"); + + /** + * The valign attribute + */ + public static final Attribute VALIGN = new Attribute("valign"); + + /** + * The value attribute + */ + public static final Attribute VALUE = new Attribute("value"); + + /** + * The valuetype attribute + */ + public static final Attribute VALUETYPE = new Attribute("valuetype"); + + /** + * The version attribute + */ + public static final Attribute VERSION = new Attribute("version"); + + /** + * The vlink attribute + */ + public static final Attribute VLINK = new Attribute("vlink"); + + /** + * The vspace attribute + */ + public static final Attribute VSPACE = new Attribute("vspace"); + + /** + * The width attribute + */ + public static final Attribute WIDTH = new Attribute("width"); + private final String name; + + /** + * Creates the attribute with the given name. + */ + protected Attribute(String a_name) + { + name = a_name; + } + + /** + * Calls compareTo on the tag names (Strings) + */ + public int compareTo(Object other) + { + return name.compareTo(((Attribute) other).name); + } + + /** + * The attributes are equal if the names are equal + * (ignoring case) + */ + public boolean equals(Object other) + { + if (other == this) + return true; + + if (!(other instanceof Attribute)) + return false; + + Attribute that = (Attribute) other; + + return that.name.equalsIgnoreCase(name); + } + + /** + * Returns the hash code which corresponds to the string for this tag. + */ + public int hashCode() + { + return name == null ? 0 : name.hashCode(); + } + + /** + * Returns the attribute name. The names of the built-in attributes + * are always returned in lowercase. + */ + public String toString() { - this.id = id; + return name; + } + + /** + * Return an array of all attributes, declared in the HTML.Attribute + * class. WARNING: attributes are the only public fields, + * expected in this class. + */ + static Attribute[] getAllAttributes() + { + Field[] f = Attribute.class.getFields(); + Attribute[] attrs = new Attribute[ f.length ]; + Field x; + int p = 0; + Attribute a; + + for (int i = 0; i < f.length; i++) + { + x = f [ i ]; + + if ((x.getModifiers() & Modifier.STATIC) != 0) + { + if (x.getType().equals(Attribute.class)) + { + try + { + a = (Attribute) x.get(null); + attrs [ p++ ] = a; + } + catch (Exception ex) + { + ex.printStackTrace(System.err); + throw new Error("This should never happen, report a bug"); + } + } + } + } + + return attrs; } } - public static class UnknownTag extends Tag implements Serializable + /** + * Represents a HTML tag. + */ + public static class Tag + implements Comparable, Serializable { /** - * For compatability with Sun's JDK 1.4.2 rev. 5 + * The <a> tag + */ + public static final Tag A = new Tag("a"); + + /** + * The <address> tag + */ + public static final Tag ADDRESS = new Tag("address"); + + /** + * The <applet> tag + */ + public static final Tag APPLET = new Tag("applet"); + + /** + * The <area> tag + */ + public static final Tag AREA = new Tag("area"); + + /** + * The <b> tag + */ + public static final Tag B = new Tag("b"); + + /** + * The <base> tag + */ + public static final Tag BASE = new Tag("base"); + + /** + * The <basefont> tag + */ + public static final Tag BASEFONT = new Tag("basefont"); + + /** + * The <big> tag + */ + public static final Tag BIG = new Tag("big"); + + /** + * The <blockquote> tag , breaks flow, block tag. + */ + public static final Tag BLOCKQUOTE = new Tag("blockquote", BREAKS | BLOCK); + + /** + * The <body> tag , breaks flow, block tag. + */ + public static final Tag BODY = new Tag("body", BREAKS | BLOCK); + + /** + * The <br> tag , breaks flow. + */ + public static final Tag BR = new Tag("br", BREAKS); + + /** + * The <caption> tag + */ + public static final Tag CAPTION = new Tag("caption"); + + /** + * The <center> tag , breaks flow. + */ + public static final Tag CENTER = new Tag("center", BREAKS); + + /** + * The <cite> tag + */ + public static final Tag CITE = new Tag("cite"); + + /** + * The <code> tag + */ + public static final Tag CODE = new Tag("code"); + + /** + * The <dd> tag , breaks flow, block tag. + */ + public static final Tag DD = new Tag("dd", BREAKS | BLOCK); + + /** + * The <dfn> tag + */ + public static final Tag DFN = new Tag("dfn"); + + /** + * The <dir> tag , breaks flow, block tag. + */ + public static final Tag DIR = new Tag("dir", BREAKS | BLOCK); + + /** + * The <div> tag , breaks flow, block tag. + */ + public static final Tag DIV = new Tag("div", BREAKS | BLOCK); + + /** + * The <dl> tag , breaks flow, block tag. + */ + public static final Tag DL = new Tag("dl", BREAKS | BLOCK); + + /** + * The <dt> tag , breaks flow, block tag. + */ + public static final Tag DT = new Tag("dt", BREAKS | BLOCK); + + /** + * The <em> tag + */ + public static final Tag EM = new Tag("em"); + + /** + * The <font> tag + */ + public static final Tag FONT = new Tag("font"); + + /** + * The <form> tag , breaks flow. + */ + public static final Tag FORM = new Tag("form", BREAKS); + + /** + * The <frame> tag + */ + public static final Tag FRAME = new Tag("frame"); + + /** + * The <frameset> tag + */ + public static final Tag FRAMESET = new Tag("frameset"); + + /** + * The <h1> tag , breaks flow, block tag. + */ + public static final Tag H1 = new Tag("h1", BREAKS | BLOCK); + + /** + * The <h2> tag , breaks flow, block tag. + */ + public static final Tag H2 = new Tag("h2", BREAKS | BLOCK); + + /** + * The <h3> tag , breaks flow, block tag. + */ + public static final Tag H3 = new Tag("h3", BREAKS | BLOCK); + + /** + * The <h4> tag , breaks flow, block tag. + */ + public static final Tag H4 = new Tag("h4", BREAKS | BLOCK); + + /** + * The <h5> tag , breaks flow, block tag. + */ + public static final Tag H5 = new Tag("h5", BREAKS | BLOCK); + + /** + * The <h6> tag , breaks flow, block tag. + */ + public static final Tag H6 = new Tag("h6", BREAKS | BLOCK); + + /** + * The <head> tag , breaks flow, block tag. + */ + public static final Tag HEAD = new Tag("head", BREAKS | BLOCK); + + /** + * The <hr> tag , breaks flow. + */ + public static final Tag HR = new Tag("hr", BREAKS); + + /** + * The <html> tag , breaks flow. + */ + public static final Tag HTML = new Tag("html", BREAKS); + + /** + * The <i> tag + */ + public static final Tag I = new Tag("i"); + + /** + * The <img> tag + */ + public static final Tag IMG = new Tag("img"); + + /** + * The <input> tag + */ + public static final Tag INPUT = new Tag("input"); + + /** + * The <isindex> tag , breaks flow. + */ + public static final Tag ISINDEX = new Tag("isindex", BREAKS); + + /** + * The <kbd> tag + */ + public static final Tag KBD = new Tag("kbd"); + + /** + * The <li> tag , breaks flow, block tag. + */ + public static final Tag LI = new Tag("li", BREAKS | BLOCK); + + /** + * The <link> tag + */ + public static final Tag LINK = new Tag("link"); + + /** + * The <map> tag + */ + public static final Tag MAP = new Tag("map"); + + /** + * The <menu> tag , breaks flow, block tag. + */ + public static final Tag MENU = new Tag("menu", BREAKS | BLOCK); + + /** + * The <meta> tag + */ + public static final Tag META = new Tag("meta"); + + /** + * The <nobr> tag + */ + public static final Tag NOBR = new Tag("nobr"); + + /** + * The <noframes> tag , breaks flow, block tag. + */ + public static final Tag NOFRAMES = new Tag("noframes", BREAKS | BLOCK); + + /** + * The <object> tag + */ + public static final Tag OBJECT = new Tag("object"); + + /** + * The <ol> tag , breaks flow, block tag. + */ + public static final Tag OL = new Tag("ol", BREAKS | BLOCK); + + /** + * The <option> tag + */ + public static final Tag OPTION = new Tag("option"); + + /** + * The <p> tag , breaks flow, block tag. + */ + public static final Tag P = new Tag("p", BREAKS | BLOCK); + + /** + * The <param> tag + */ + public static final Tag PARAM = new Tag("param"); + + /** + * The <pre> tag , breaks flow, block tag, preformatted. */ + public static final Tag PRE = new Tag("pre", BREAKS | BLOCK | PREFORMATTED); + + /** + * The <s> tag + */ + public static final Tag S = new Tag("s"); + + /** + * The <samp> tag + */ + public static final Tag SAMP = new Tag("samp"); + + /** + * The <script> tag + */ + public static final Tag SCRIPT = new Tag("script"); + + /** + * The <select> tag + */ + public static final Tag SELECT = new Tag("select"); + + /** + * The <small> tag + */ + public static final Tag SMALL = new Tag("small"); + + /** + * The <span> tag + */ + public static final Tag SPAN = new Tag("span"); + + /** + * The <strike> tag + */ + public static final Tag STRIKE = new Tag("strike"); + + /** + * The <strong> tag + */ + public static final Tag STRONG = new Tag("strong"); + + /** + * The <style> tag + */ + public static final Tag STYLE = new Tag("style"); + + /** + * The <sub> tag + */ + public static final Tag SUB = new Tag("sub"); + + /** + * The <sup> tag + */ + public static final Tag SUP = new Tag("sup"); + + /** + * The <table> tag , block tag. + */ + public static final Tag TABLE = new Tag("table", BLOCK); + + /** + * The <td> tag , breaks flow, block tag. + */ + public static final Tag TD = new Tag("td", BREAKS | BLOCK); + + /** + * The <textarea> tag , preformatted. + */ + public static final Tag TEXTAREA = new Tag("textarea", PREFORMATTED); + + /** + * The <th> tag , breaks flow, block tag. + */ + public static final Tag TH = new Tag("th", BREAKS | BLOCK); + + /** + * The <title> tag , breaks flow, block tag. + */ + public static final Tag TITLE = new Tag("title", BREAKS | BLOCK); + + /** + * The <tr> tag , block tag. + */ + public static final Tag TR = new Tag("tr", BLOCK); + + /** + * The <tt> tag + */ + public static final Tag TT = new Tag("tt"); + + /** + * The <u> tag + */ + public static final Tag U = new Tag("u"); + + /** + * The <ul> tag , breaks flow, block tag. + */ + public static final Tag UL = new Tag("ul", BREAKS | BLOCK); + + /** + * The <var> tag + */ + public static final Tag VAR = new Tag("var"); + + /* Special tags */ + + /** + * Total number of syntetic tags, delared in the Tag class. + * This must be adjusted if the new synthetic tags are declared. + * Otherwise the HTML.getAllTags() will not work as expected. + */ + private static final int TOTAL_SYNTHETIC_TAGS = 3; + + /** + * All comments are labeled with this tag. + * This tag is not included into the array, returned by getAllTags(). + * toString() returns 'comment'. HTML reader synthesizes this tag. + */ + public static final Tag COMMENT = new Tag("comment", SYNTETIC); + + /** + * All text content is labeled with this tag. + * This tag is not included into the array, returned by getAllTags(). + * toString() returns 'content'. HTML reader synthesizes this tag. + */ + public static final Tag CONTENT = new Tag("content", SYNTETIC); + + /** + * All text content must be in a paragraph element. + * If a paragraph didn't exist when content was encountered, + * a paragraph is manufactured. + * toString() returns 'implied'. HTML reader synthesizes this tag. + */ + public static final Tag IMPLIED = new Tag("implied", SYNTETIC); + final String name; + final int flags; + + /** + * Creates a new Tag with the specified id, and with causesBreak + * and isBlock set to false. + */ + protected Tag(String id) + { + name = id; + flags = 0; + } + + /** + * Creates a new Tag with the specified tag name and + * causesBreak and isBlock properties. + */ + protected Tag(String id, boolean causesBreak, boolean isBlock) + { + int f = 0; + + if (causesBreak) + { + f |= BREAKS; + } + + if (isBlock) + { + f |= BLOCK; + } + + flags = f; + name = id; + } + + /** + * Create a tag taking flags. + */ + Tag(String id, int a_flags) + { + name = id; + flags = a_flags; + } + + /** + * Returns true if this tag is a block tag, which is a tag used to + * add structure to a document. + */ + public boolean isBlock() + { + return (flags & BLOCK) != 0; + } + + /** + * Returns true if this tag is pre-formatted, which is true if + * the tag is either PRE or TEXTAREA + */ + public boolean isPreformatted() + { + return (flags & PREFORMATTED) != 0; + } + + /** + * Returns true if this tag causes a line break to the flow of text + */ + public boolean breaksFlow() + { + return (flags & BREAKS) != 0; + } + + /** + * Calls compareTo on the tag names (Strings) + */ + public int compareTo(Object other) + { + return name.compareTo(((Tag) other).name); + } + + /** + * The tags are equal if the names are equal (ignoring case). + */ + public boolean equals(Object other) + { + if (other == this) + { + return true; + } + + if (!(other instanceof Tag)) + { + return false; + } + + Tag that = (Tag) other; + + return that.name.equalsIgnoreCase(name); + } + + /** + * Returns the hash code which corresponds to the string for this tag. + */ + public int hashCode() + { + return name == null ? 0 : name.hashCode(); + } + + /** + * Returns the tag name. The names of the built-in tags are always + * returned in lowercase. + */ + public String toString() + { + return name; + } + + /** + * Return an array of HTML tags, declared in HTML.Tag class. + * WARNING: This method expects that the Tags are the only + * public fields declared in the Tag class. + */ + static Tag[] getAllTags() + { + Field[] f = Tag.class.getFields(); + Field x; + + // The syntetic tags are not included. + Tag[] tags = new Tag[ f.length - TOTAL_SYNTHETIC_TAGS ]; + int p = 0; + Tag t; + + for (int i = 0; i < f.length; i++) + { + x = f [ i ]; + + if ((x.getModifiers() & Modifier.STATIC) != 0) + { + if (x.getType().equals(Tag.class)) + { + try + { + t = (Tag) x.get(null); + + if (!t.isSyntetic()) + { + tags [ p++ ] = t; + } + } + catch (IllegalAccessException ex) + { + unexpected(ex); + } + catch (IllegalArgumentException ex) + { + unexpected(ex); + } + } + } + } + + return tags; + } + + /** + * Returns true for tags, generated by the html reader + * (COMMENT, CONTENT and IMPLIED). + */ + boolean isSyntetic() + { + return (flags & SYNTETIC) != 0; + } + + private static void unexpected(Exception ex) + throws Error + { + throw new Error("This should never happen, report a bug", ex); + } + } + + /** + * Represents an unknown HTML tag. + * @author Mark Wielaard (mark@klomp.org) + */ + public static class UnknownTag + extends Tag + implements Serializable + { private static final long serialVersionUID = -1534369342247250625L; - public UnknownTag(String id) + /** + * Creates a new UnknownTag with the specified name + * @param name The tag name. + * + */ + public UnknownTag(String name) { - super(id); + super(name); } } + + /** + * This value is returned for attributes without value that have no + * default value defined in the DTD. + */ + public static final String NULL_ATTRIBUTE_VALUE = "#DEFAULT"; + + /* Package level html tag flags */ + static final int BREAKS = 1; + static final int BLOCK = 2; + static final int PREFORMATTED = 4; + static final int SYNTETIC = 8; + private static Map tagMap; + private static Map attrMap; + + /** + * The public constructor (does nothing). It it seldom required to have + * an instance of this class, because all public fields and methods + * are static. + */ + public HTML() + { + } + + /** + * Returns the set of the recognized HTML attributes. + */ + public static HTML.Attribute[] getAllAttributeKeys() + { + return Attribute.getAllAttributes(); + } + + /** + * Returns the set of actual HTML tags that are recognized by + * the default HTML reader. The returned array does not include the + * COMMENT, CONTENT and IMPLIED tags. + */ + public static HTML.Tag[] getAllTags() + { + return Tag.getAllTags(); + } + + /** + * Returns an htl attribute constant for the given attribute name. + * @param attName the attribute name, case insensitive + */ + public static Attribute getAttributeKey(String attName) + { + if (attrMap == null) + { + // Create the map on demand. + attrMap = new TreeMap(); + + Attribute[] attrs = getAllAttributeKeys(); + + for (int i = 0; i < attrs.length; i++) + { + attrMap.put(attrs [ i ].toString(), attrs [ i ]); + } + } + + return (Attribute) attrMap.get(attName.toLowerCase()); + } + + /** + * Searches the value of given attribute in the provided set. + * If the value is found (String type expected), tries to parse it as + * an integer value. If succeded, returns the obtained integer value. + * + * For example:<p><code> + * SimpleAttributeSet ase = new SimpleAttributeSet(); + * ase.addAttribute(HTML.getAttributeKey("size"),"222"); + * System.out.println( + * HTML.getIntegerAttributeValue + * (ase, HTML.getAttributeKey("size"), 333)); // prints "222" + * System.out.println( + * HTML.getIntegerAttributeValue + * (ase, HTML.getAttributeKey("width"), 333)); // prints "333". + * </code></p> + * + * + * @param set The attribute set to search in. If the set contains the + * given attribute, it must by a type of String. + * @param attribute The html attribute to search in + * @param defaultValue The value that is returned if the attribute is not + * found in the given set or if the NumberFormatException was thrown + * during the parsing. + */ + public static int getIntegerAttributeValue(AttributeSet set, + HTML.Attribute attribute, + int defaultValue + ) + { + Object v = set.getAttribute(attribute); + + if (v == null) + { + return defaultValue; + } + + try + { + return Integer.parseInt(v.toString().trim()); + } + catch (Exception ex) + { + return defaultValue; + } + } + + /** + * Returns a HTML tag constant for the given HTML attribute name. + * If the tag is unknown, the null is returned. + * @param tagName the tag name, case insensitive + */ + public static Tag getTag(String tagName) + { + if (tagMap == null) + { + // Create the mao on demand. + tagMap = new TreeMap(); + + Tag[] tags = getAllTags(); + + for (int i = 0; i < tags.length; i++) + { + tagMap.put(tags [ i ].toString(), tags [ i ]); + } + } + + return (Tag) tagMap.get(tagName.toLowerCase()); + } } diff --git a/libjava/javax/swing/text/html/HTMLDocument.java b/libjava/javax/swing/text/html/HTMLDocument.java new file mode 100644 index 00000000000..45c87c7b7fd --- /dev/null +++ b/libjava/javax/swing/text/html/HTMLDocument.java @@ -0,0 +1,53 @@ +/* HTMLDocument.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html; + +import javax.swing.text.DefaultStyledDocument; + +/** + * TODO: This class is not yet completetely implemented. + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class HTMLDocument extends DefaultStyledDocument +{ + public void processHTMLFrameHyperlinkEvent(HTMLFrameHyperlinkEvent event) + { + } +} diff --git a/libjava/javax/swing/text/html/HTMLEditorKit.java b/libjava/javax/swing/text/html/HTMLEditorKit.java new file mode 100755 index 00000000000..742eab36eb7 --- /dev/null +++ b/libjava/javax/swing/text/html/HTMLEditorKit.java @@ -0,0 +1,173 @@ +/* HTMLEditorKit.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html; + +import java.io.Reader; + +import javax.swing.text.BadLocationException; +import javax.swing.text.MutableAttributeSet; + +/** + * This class is NOT implemented. This file currently holds only + * declarations of the two enclosing classes, necessary for testing + * the implemented javax.swing.text.html.parser package. + * @author No authorship is taken, implement the class and be! + * TODO: replace this header after implementing the class. + */ +public class HTMLEditorKit +{ + /** + * The abstract HTML parser declaration. + */ + public abstract static class Parser + { + /** + * Parse the HTML text, calling various methods of the provided callback + * in response to the occurence of the corresponding HTML constructions. + * @param reader The reader to read the source HTML from. + * @param callback The callback to receive information about the parsed + * HTML structures + * @param ignoreCharSet If true, the parser ignores all charset information + * that may be present in HTML documents. + * @throws IOException, normally if the reader throws one. + */ + public abstract void parse(Reader reader, ParserCallback callback, + boolean ignoreCharSet + ) + throws java.io.IOException; + } + + /** + * The "hook" that receives all information about the HTML document + * structure while parsing it. The methods are invoked by parser + * and should be normally overridden. + */ + public static class ParserCallback + { + /** + * If the tag does not occurs in the html stream directly, but + * is supposed by parser, the tag attribute set contains this additional + * attribute, having value Boolean.True. + */ + public static final Object IMPLIED = "_implied_"; + + /** + * The parser calls this method after it finishes parsing the document. + */ + public void flush() + throws BadLocationException + { + } + + /** + * Handle HTML comment, present in the given position. + * @param comment the comment + * @position the position of the comment in the text being parsed. + */ + public void handleComment(char[] comment, int position) + { + } + + /** + * Notifies about the character sequences, used to separate lines in + * this document. The parser calls this method after it finishes + * parsing the document, but before flush(). + * @param end_of_line The "end of line sequence", one of: \r or \n or \r\n. + */ + public void handleEndOfLineString(String end_of_line) + { + } + + /** + * The method is called when the HTML closing tag ((like </table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param The tag being handled + * @position the tag position in the text being parsed. + */ + public void handleEndTag(HTML.Tag tag, int position) + { + } + + /** + * Handle the error. + * @param message The message, explaining the error. + * @param position The starting position of the fragment that has caused + * the error in the html document being parsed. + */ + public void handleError(String message, int position) + { + } + + /** + * Handle the tag with no content, like <br>. The method is + * called for the elements that, in accordance with the current DTD, + * has an empty content. + * @param tag The tag being handled. + * @param position The tag position in the text being parsed. + */ + public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, + int position + ) + { + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param tag The tag being handled + * @param position The tag position in the text bein parsed + */ + public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, + int position + ) + { + } + + /** + * Handle the text section. + * @param text A section text. + * @param position The text position in the HTML document text being parsed. + */ + public void handleText(char[] text, int position) + { + } + } +} diff --git a/libjava/javax/swing/text/html/HTMLFrameHyperlinkEvent.java b/libjava/javax/swing/text/html/HTMLFrameHyperlinkEvent.java new file mode 100644 index 00000000000..82de2a80355 --- /dev/null +++ b/libjava/javax/swing/text/html/HTMLFrameHyperlinkEvent.java @@ -0,0 +1,132 @@ +/* HTMLFrameHyperlinkEvent.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html; + +import java.net.URL; + +import javax.swing.event.HyperlinkEvent; +import javax.swing.event.HyperlinkEvent.EventType; +import javax.swing.text.Element; + +/** + * HTMLFrameHyperlinkEvent transfers information about the link that was + * activated in a frame. + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class HTMLFrameHyperlinkEvent + extends HyperlinkEvent +{ + private final String target_frame; + + /** + * Creates a new hypertext link event. + * + * @param source The object this link is associated to. + * @param type The type of event. + * @param url The URL this link pointing too. + * @param element The element in the document representing the anchor. + * @param frame - the Frame to display the document in. + */ + public HTMLFrameHyperlinkEvent(Object source, EventType type, URL url, + Element element, String frame) + { + super(source, type, url, frame, element); + target_frame = frame; + } + + /** + * Creates a new hypertext link event. + * + * @param source The object this link is associated to. + * @param type The type of event. + * @param url The URL this link pointing too. + * @param frame - the Frame to display the document in. + */ + public HTMLFrameHyperlinkEvent(Object source, EventType type, URL url, + String frame) + { + super(source, type, url, frame); + target_frame = frame; + } + + /** + * Creates a new hypertext link event. + * + * @param source The object this link is associated to. + * @param type The type of event. + * @param url The URL this link pointing too. + * @param description The description for this link. + * @param element The element in the document representing the anchor. + * @param frame - the Frame to display the document in. + */ + public HTMLFrameHyperlinkEvent(Object source, EventType type, URL url, + String description, Element element, + String frame) + { + super(source, type, url, description, element); + target_frame = frame; + } + + /** + * Creates a new hypertext link event. + * + * @param source The object this link is associated to. + * @param type The type of event. + * @param url The URL this link pointing too. + * @param description The description for this link. + * @param frame - the Frame to display the document in. + */ + public HTMLFrameHyperlinkEvent(Object source, EventType type, URL url, + String description, String frame) + { + super(source, type, url, description); + target_frame = frame; + } + + /** + * Gets the string, passed as the target frame identifier. + * + * @return the target for the link. + */ + public String getTarget() + { + return target_frame; + } +} diff --git a/libjava/javax/swing/text/html/parser/AttributeList.java b/libjava/javax/swing/text/html/parser/AttributeList.java new file mode 100755 index 00000000000..c74f35953ab --- /dev/null +++ b/libjava/javax/swing/text/html/parser/AttributeList.java @@ -0,0 +1,294 @@ +/* AttributeList.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +import java.io.Serializable; + +import java.util.Enumeration; +import java.util.Vector; + +/** + * <p> + * Stores the attribute information, obtained by parsing SGML (DTD) tag + * <code><!ATTLIST .. ></code></p> + * <p> + * Elements can have a associated named properties (attributes) having the + * assigned values. The element start tag can have any number of attribute + * value pairs, separated by spaces. They can appear in any order. + * SGML requires you to delimit the attribute values using either double (") + * or single (') quotation marks. In HTML, it is possible + * (but not recommended) to specify the value of an attribute without + * quotation marks. Such attribute value may only contain + * letters, digits, hyphens (-) and periods (.) . + * </p> + * <p> + * The <code>AttributeList</code> defines a single attribute that additionally + * has a pointer referencing the possible subsequent attribute. + * The whole structure is just a simple linked list, storing all attributes of + * some <code>Element</code>. + * Use the <code>getNext()</code> method repeatedly to see all attributes in + * the list. + * </p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class AttributeList + implements DTDConstants, Serializable +{ + /** Maps between type names and they string values. */ + private static final gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("CDATA", DTDConstants.CDATA); + add("ENTITY", DTDConstants.ENTITY); + add("ENTITIES", DTDConstants.ENTITIES); + add("ID", DTDConstants.ID); + add("IDREF", DTDConstants.IDREF); + add("IDREFS", DTDConstants.IDREFS); + add("NAME", DTDConstants.NAME); + add("NAMES", DTDConstants.NAMES); + add("NMTOKEN", DTDConstants.NMTOKEN); + add("NMTOKENS", DTDConstants.NMTOKENS); + add("NOTATION", DTDConstants.NOTATION); + add("NUMBER", DTDConstants.NUMBER); + add("NUMBERS", DTDConstants.NUMBERS); + add("NUTOKEN", DTDConstants.NUTOKEN); + add("NUTOKENS", DTDConstants.NUTOKENS); + } + }; + + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -1361214058742015233L; + + /** + * The value of ( = pointer to ) the next attribute in the linked list, + * storing all attributes of some Element. Contains null for the + * last attribute. + */ + public AttributeList next; + + /** + * The name of the attribute. The attribute names are case insensitive. + */ + public String name; + + /** + * The default value of this attribute. Equals to null if no default value + * is specified. + */ + public String value; + + /** + * The explicit set of the allowed values of this attribute. Equals to + * null, if this parameter was not specified. + * Values, defined in DTD, are case insensitive. + */ + public Vector values; + + /** + * The modifier of this attribute. This field contains one of the + * following DTD constants: + * <ul> + * <li> REQUIRED if the attribute value is always required,</li> + * <li> IMPLIED if the user agent must supply the default value itself,</li> + * <li> FIXED if the attribute value is fixed to some value and cannot + * be changed.</li> + * <li> DEFAULT if the attribute default value has been supplied.</li> + * <li> CURRENT the value that at any point in the document is + * the last value supplied for that element. A value is required to be + * supplied for the first* occurrence of an element</li> + * <li> CONREF specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead.</li> + * </ul> + */ + public int modifier; + + /** + * The type of the attribute. The possible values of this field + * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants. + */ + public int type; + + /** + * Creates the attribute with the given name, initializing other fields + * to the default values ( 0 and null ). + * + * @param a_name The name of the attribute. + */ + public AttributeList(String a_name) + { + name = a_name; + } + + /** + * Creates the attribute with the given properties. + * @param a_name The name of the attribute + * @param a_type The type of the attribute. The possible values are defined + * in <code> DTDConstants</code>. + * @param a_modifier The modifier of this attribute. The possible values + * are defined in <code> DTDConstants</code>. + * @param a_default The default value of this attribute + * @param allowed_values The explicit set of the allowed values of + * this attribute + * @param a_next The value of the subsequent instance of the AttributeList, + * representing the next attribute definition for the same element. + * Equals to null for the last attribute definition. + */ + public AttributeList(String a_name, int a_type, int a_modifier, + String a_default, Vector allowed_values, + AttributeList a_next + ) + { + this(a_name); + type = a_type; + modifier = a_modifier; + value = a_default; + values = allowed_values; + next = a_next; + } + + /** + * Get the modifier of this attribute. This field contains one of the + * following DTD constants: + * <ul> + * <li> REQUIRED if the attribute value is always required,</li> + * <li> IMPLIED if the user agent must supply the default value itself,</li> + * <li> FIXED if the attribute value is fixed to some value and cannot + * be changed.</li> + * <li> DEFAULT if the attribute default value has been supplied.</li> + * <li> CURRENT the value that at any point in the document is + * the last value supplied for that element. A value is required to be + * supplied for the first* occurrence of an element</li> + * <li> CONREF specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead.</li> + * </ul> + */ + public int getModifier() + { + return modifier; + } + + /** + * Get the name of the attribute. + * The value is returned as it was supplied to a + * constructor, preserving the character case. + */ + public String getName() + { + return name; + } + + /** + * Get the value of ( = pointer to ) the next attribute in the linked list, + * storing all attributes of some Element. Contains null for the + * last attribute. + */ + public AttributeList getNext() + { + return next; + } + + /** + * Get the type of the attribute. The possible values of this field + * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants. + */ + public int getType() + { + return type; + } + + /** + * Get the default value of this attribute. + */ + public String getValue() + { + return value; + } + + /** + * Get the allowed values of this attribute. + */ + public Enumeration getValues() + { + return values.elements(); + } + + /** + * Converts a string value, representing a valid SGLM attribute type, + * into the corresponding value, defined in DTDConstants. + * @param typeName the name of the type (character case is ignored). + * @return a value from DTDConstants or DTDConstants.ANY if the + * string is not representing a known type. The known attribute types + * in this implementation are CDATA, ENTITY, ENTITIES, ID, IDREF, IDREFS, + * NAME, NAMES, NMTOKEN, NMTOKENS, NOTATION, NUMBER, NUMBERS, NUTOKEN and + * NUTOKENS. + * @throws NullPointerException if the passed parameter is null. + */ + public static int name2type(String typeName) + { + return mapper.get(typeName.toUpperCase()); + } + + /** + * Returns the attribute name. + */ + public String toString() + { + return name; + } + + /** + * Converts a value from DTDConstants into the string representation. + * @param type - an integer value of the public static integer field, + * defined in the DTDConstants class. + * @return a corresponding SGML DTD keyword (UPPERCASE) or null if there + * are no attribute type constant having the given value. + */ + public static String type2name(int type) + { + return mapper.get(type); + } +} diff --git a/libjava/javax/swing/text/html/parser/ContentModel.java b/libjava/javax/swing/text/html/parser/ContentModel.java new file mode 100755 index 00000000000..7c8ff35a250 --- /dev/null +++ b/libjava/javax/swing/text/html/parser/ContentModel.java @@ -0,0 +1,218 @@ +/* ContentModel.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.models.transformer; + +import java.io.Serializable; + +import java.util.Vector; + +/** + * A representation of the element content. The instances of this class + * can be arranged into the linked list, representing a BNF expression. + * The content model is constructed as a branched tree structure in the + * following way: + * <pre> + * a = new ContentModel('+', A, null); // a reprensents A+ + * b = new ContentModel('&', B, a); // b represents B & A+ + * c = new ContentModel('*', b, null); // c represents ( B & A+) * + * d = new ContentModel('|', new ContentModel('*', A, null), + * new ContentModel('?', B, null)); // d represents ( A* | B? ) + * </pre> + * where the valid operations are: + * <ul> + * <li><code>E* </code> E occurs zero or more times</li> + * <li><code>E+ </code> E occurs one or more times</li> + * <li><code>E? </code> E occurs once or not atl all</li> + * <li><code>A,B</code> A occurs before B</li> + * <li><code>A|B</code> both A and B are permitted in any order. + * The '|' alone does not permit the repetetive occurence of A or B + * (use <code>(A|B)*</code>.</li> + * <li><code>A&B</code> both A and B must occur once (in any order)</li> + * </ul> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class ContentModel + implements Serializable +{ + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -1130825523866321257L; + + /** + * The next content model model ( = pointer to the next element of + * the linked list) for the binary expression (',','&' or '|'). Null + * for the last element in the list. + */ + public ContentModel next; + + /** + * The document content, containing either Element or the enclosed + * content model (that would be in the parentheses in BNF expression). + */ + public Object content; + + /** + * Specifies the BNF operation between this node and the node, + * stored in the field <code>next</code> (or for this node, if it is + * an unary operation. + */ + public int type; + + /** Create a content model initializing all fields to default values. */ + public ContentModel() + { + } + + /** + * Create a content model, consisting of the single element. + * Examples: + *<code> + * a = new ContentModel('+', A, null); // a reprensents A+ + * b = new ContentModel('&', B, a); // b represents B & A+ + * c = new ContentModel('*', b, null); // c represents ( B & A+) * + * d = new ContentModel('|', A, + * new ContentModel('?',b, null); + * // d represents + * </code> + */ + public ContentModel(Element a_content) + { + content = a_content; + } + + /** + * Create a content model, involving expression of the given type. + * @param a_type The expression operation type ('*','?' or '+' + * @param a_content The content for that the expression is applied. + */ + public ContentModel(int a_type, ContentModel a_content) + { + content = a_content; + type = a_type; + } + + /** + * Create a content model, involving binary expression of the given type. + * @param a_type The expression operation type ( ',', '|' or '&'). + * @param a_content The content of the left part of the expression. + * @param a_next The content model, representing the right part of the + * expression. + */ + public ContentModel(int a_type, Object a_content, ContentModel a_next) + { + content = a_content; + type = a_type; + next = a_next; + } + + /** + * Adds all list elements to the given vector, ignoring the + * operations between the elements. The old vector values are not + * discarded. + * @param elements - a vector to add the values to. + */ + public void getElements(Vector elements) + { + ContentModel c = this; + + while (c != null) + { + elements.add(c.content); + c = c.next; + } + } + + /** + * Checks if the content model matches an empty input stream. + * The empty content is created using SGML DTD keyword EMPTY. + * The empty model is a model with the content field equal to null. + * + * @return true if the content field is equal to null. + */ + public boolean empty() + { + return content == null; + } + + /** + * Get the element, stored in the <code>next.content</code>. + * The method is programmed as the part of the standard API, but not + * used in this implementation. + * @return the value of the field <code>next</code>. + */ + public Element first() + { + return (Element) next.content; + } + + /** + * Checks if this object can potentially be the first token in the + * ContenModel list. The method is programmed as the part of the + * standard API, but not used in this implementation. + */ + public boolean first(Object token) + { + ContentModel c = this; + while (c.next != null) + { + if (c.content != null && c.content.toString().equals(token.toString()) && + c.type != ',' + ) + + // Agree if the operation with the preceeding element + // is not the comma operation. + return true; + c = c.next; + } + return false; + } + + /** + * Returns a string representation (an expression) of this content model. + * The expression has BNF-like syntax, except the absence of the + * unary operator is additionally indicated by " ' ". It is + * advisable to check the created models for correctness using this + * method. + */ + public String toString() + { + return transformer.transform(this).toString(); + } +} diff --git a/libjava/javax/swing/text/html/parser/DTD.java b/libjava/javax/swing/text/html/parser/DTD.java new file mode 100755 index 00000000000..270c8158f9e --- /dev/null +++ b/libjava/javax/swing/text/html/parser/DTD.java @@ -0,0 +1,607 @@ +/* DTD.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.BitSet; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; + +/** + * <p>Representation or the SGML DTD document. + * Provides basis for describing a syntax of the + * HTML documents. The fields of this class are NOT initialized in + * constructor. You need to do this separately before passing this data + * structure to the HTML parser. The subclasses with the fields, pre- + * initialized, for example, for HTML 4.01, can be available only between + * the implementation specific classes + * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F } + * in this implementation).</p> + * <p> + * If you need more information about SGML DTD documents, + * the author suggests to read SGML tutorial on + * {@link http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html}. + * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>, + * Oxford University Press, 688 p, ISBN: 0198537379. + * </p> + * <p> + * Warning: the html, head and other tag fields will only be automatically + * assigned if the VM has the correctly implemented reflection mechanism. + * As these fields are not used anywhere in the implementation, not + * exception will be thrown in the opposite case. + * </p> + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class DTD + implements DTDConstants +{ + /** + * The version of the persistent data format. + */ + public static int FILE_VERSION = 1; + + /** + * The table of existing available DTDs. + */ + static Hashtable dtdHash = new Hashtable(); + + /** + * The applet element for this DTD. + */ + public Element applet; + + /** + * The base element for this DTD. + */ + public Element base; + + /** + * The body element for this DTD. + */ + public Element body; + + /** + * The head element for this DTD. + */ + public Element head; + + /** + * The html element for this DTD. + */ + public Element html; + + /** + * The isindex element of for this DTD. + */ + public Element isindex; + + /** + * The meta element for this DTD. + */ + public Element meta; + + /** + * The p element for this DTD. + */ + public Element p; + + /** + * The param element for this DTD. + */ + public Element param; + + /** + * The pcdata for this DTD. + */ + public Element pcdata; + + /** + * The title element for this DTD. + */ + public Element title; + + /** + * The element for accessing all DTD elements by name. + */ + public Hashtable elementHash = new Hashtable(); + + /** + * The entity table for accessing all DTD entities by name. + */ + public Hashtable entityHash = new Hashtable(); + + /** + * The name of this DTD. + */ + public String name; + + /** + * Contains all elements in this DTD. The + * javax.swing.text.html.parser.Element#index field of all elements + * in this vector is set to the element position in this vector. + */ + public Vector elements = new Vector(); + + /** Create a new DTD with the specified name. */ + protected DTD(String a_name) + { + name = a_name; + } + + /** Get this DTD by name. The current implementation + * only looks in the internal table of DTD documents. If no corresponding + * entry is found, the new entry is created, placed into + * the table and returned. */ + public static DTD getDTD(String name) + throws IOException + { + DTD d = (DTD) dtdHash.get(name); + + if (d == null) + { + d = new DTD(name); + dtdHash.put(d.name, d); + } + + return d; + } + + /** + * Get the element by the element name. If the element is not yet + * defined, it is newly created and placed into the element table. + * If the element name matches (ingoring case) a public non static + * element field in this class, this field is assigned to the value + * of the newly created element. + */ + public Element getElement(String element_name) + { + return newElement(element_name); + } + + /** + * Get the element by the value of its + * {@link javax.swing.text.html.parser.Element#index} field. + */ + public Element getElement(int index) + { + return (Element) elements.get(index); + } + + /** + * Get the entity with the given identifier. + * @param id that can be returned by + * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)} + * @return The entity from this DTD or null if there is no entity with + * such id or such entity is not present in the table of this instance. + */ + public Entity getEntity(int id) + { + String name = Entity.mapper.get(id); + + if (name != null) + return (Entity) entityHash.get(name); + else + return null; + } + + /** + * Get the named entity by its name. + */ + public Entity getEntity(String entity_name) + { + return (Entity) entityHash.get(entity_name); + } + + /** + * Get the name of this instance of DTD + */ + public String getName() + { + return name; + } + + /** + * Creates, adds into the entity table and returns the + * character entity like <code>&lt;</code> + * (means '<code><</code>' ); + * @param name The entity name (without heading & and closing ;) + * @param type The entity type + * @param character The entity value (single character) + * @return The created entity + */ + public Entity defEntity(String name, int type, int character) + { + Entity e = newEntity(name, type); + e.data = new char[] { (char) character }; + return e; + } + + /** + * Define the attributes for the element with the given name. + * If the element is not exist, it is created. + * @param forElement + * @param attributes + */ + public void defineAttributes(String forElement, AttributeList attributes) + { + Element e = (Element) elementHash.get(forElement.toLowerCase()); + + if (e == null) + e = newElement(forElement); + + e.atts = attributes; + } + + /** + * Defines the element and adds it to the element table. Sets the + * <code>Element.index</code> field to the value, unique for this + * instance of DTD. If the element with the given name already exists, + * replaces all other its settings by the method argument values. + * @param name the name of the element + * @param type the type of the element + * @param headless true if the element needs no starting tag + * (should not occur in HTML). + * @param tailless true if the element needs no ending tag (like + * <code><hr></code> + * @param content the element content + * @param exclusions the set of elements that must not occur inside + * this element. The <code>Element.index</code> value defines which + * bit in this bitset corresponds to that element. + * @param inclusions the set of elements that can occur inside this + * element. the <code>Element.index</code> value defines which + * bit in this bitset corresponds to that element. + * @param attributes the element attributes. + * @return the newly defined element. + */ + public Element defineElement(String name, int type, boolean headless, + boolean tailless, ContentModel content, + BitSet exclusions, BitSet inclusions, + AttributeList attributes + ) + { + Element e = newElement(name); + e.type = type; + e.oStart = headless; + e.oEnd = tailless; + e.content = content; + e.exclusions = exclusions; + e.inclusions = inclusions; + e.atts = attributes; + + return e; + } + + /** + * Creates, intializes and adds to the entity table the new + * entity. + * @param name the name of the entity + * @param type the type of the entity + * @param data the data section of the entity + * @return the created entity + */ + public Entity defineEntity(String name, int type, char[] data) + { + Entity e = newEntity(name, type); + e.data = data; + + return e; + } + + /** Place this DTD into the DTD table. */ + public static void putDTDHash(String name, DTD dtd) + { + dtdHash.put(name, dtd); + } + + /** + * <p>Reads DTD from an archived format. This format is not standardized + * and differs between implementations.</p><p> This implementation + * reads and defines all entities and elements using + * ObjectInputStream. The elements and entities can be written into the + * stream in any order. The objects other than elements and entities + * are ignored.</p> + * @param stream A data stream to read from. + * @throws java.io.IOException If one is thrown by the input stream + */ + public void read(DataInputStream stream) + throws java.io.IOException + { + ObjectInputStream oi = new ObjectInputStream(stream); + Object def; + try + { + while (true) + { + def = oi.readObject(); + if (def instanceof Element) + { + Element e = (Element) def; + elementHash.put(e.name.toLowerCase(), e); + assignField(e); + } + else if (def instanceof Entity) + { + Entity e = (Entity) def; + entityHash.put(e.name, e); + } + } + } + catch (ClassNotFoundException ex) + { + throw new IOException(ex.getMessage()); + } + catch (EOFException ex) + { + // ok EOF + } + } + + /** + * Returns the name of this instance of DTD. + */ + public String toString() + { + return name; + } + + /** + * Creates and returns new attribute (not an attribute list). + * @param name the name of this attribute + * @param type the type of this attribute (FIXED, IMPLIED or + * REQUIRED from <code>DTDConstants</code>). + * @param modifier the modifier of this attribute + * @param default_value the default value of this attribute + * @param allowed_values the allowed values of this attribute. The multiple + * possible values in this parameter are supposed to be separated by + * '|', same as in SGML DTD <code><!ATTLIST </code>tag. This parameter + * can be null if no list of allowed values is specified. + * @param atts the previous attribute of this element. This is + * placed to the field + * {@link javax.swing.text.html.parser.AttributeList#next }, + * creating a linked list. + * @return + */ + protected AttributeList defAttributeList(String name, int type, int modifier, + String default_value, + String allowed_values, + AttributeList atts + ) + { + AttributeList al = new AttributeList(name); + al.modifier = modifier; + al.value = default_value; + al.next = atts; + + if (allowed_values != null) + { + StringTokenizer st = new StringTokenizer(allowed_values, " \t|"); + Vector v = new Vector(st.countTokens()); + + while (st.hasMoreTokens()) + v.add(st.nextToken()); + + al.values = v; + } + + return al; + } + + /** + * Creates a new content model. + * @param type specifies the BNF operation for this content model. + * The valid operations are documented in the + * {@link javax.swing.text.html.parser.ContentModel#type }. + * @param content the content of this content model + * @param next if the content model is specified by BNF-like + * expression, contains the rest of this expression. + * @return The newly created content model. + */ + protected ContentModel defContentModel(int type, Object content, + ContentModel next + ) + { + ContentModel model = new ContentModel(); + model.type = type; + model.next = next; + model.content = content; + + return model; + } + + /** + * Defines a new element and adds it to the element table. + * If the element alredy exists, + * overrides it settings with the specified values. + * @param name the name of the new element + * @param type the type of the element + * @param headless true if the element needs no starting tag + * @param tailless true if the element needs no closing tag + * @param content the element content. + * @param exclusions the elements that must be excluded from the + * content of this element, in all levels of the hierarchy. + * @param inclusions the elements that can be included as the + * content of this element. + * @param attributes the element attributes. + * @return the created or updated element. + */ + protected Element defElement(String name, int type, boolean headless, + boolean tailless, ContentModel content, + String[] exclusions, String[] inclusions, + AttributeList attributes + ) + { + // compute the bit sets + BitSet exclude = bitSet(exclusions); + BitSet include = bitSet(inclusions); + + Element e = + defineElement(name, type, headless, tailless, content, exclude, include, + attributes + ); + + return e; + } + + /** + * Creates, intializes and adds to the entity table the new + * entity. + * @param name the name of the entity + * @param type the type of the entity + * @param data the data section of the entity + * @return the created entity + */ + protected Entity defEntity(String name, int type, String data) + { + Entity e = newEntity(name, type); + e.data = data.toCharArray(); + + return e; + } + + private void assignField(Element e) + { + String element_name = e.name; + try + { + // Assign the field via reflection. + Field f = getClass().getField(element_name.toLowerCase()); + if ((f.getModifiers() & Modifier.PUBLIC) != 0) + if ((f.getModifiers() & Modifier.STATIC) == 0) + if (f.getType().isAssignableFrom(e.getClass())) + f.set(this, e); + } + catch (IllegalAccessException ex) + { + unexpected(ex); + } + catch (NoSuchFieldException ex) + { + // This is ok. + } + + // Some virtual machines may still lack the proper + // implementation of reflection. As the tag fields + // are not used anywhere in this implementation, + // (and this class is also rarely used by the end user), + // it may be better not to crash everything by throwing an error + // for each case when the HTML parsing is required. + catch (Throwable t) + { + // This VM has no reflection mechanism implemented! + if (t instanceof OutOfMemoryError) + throw (Error) t; + } + } + + /** + * Create the bit set for this array of elements. + * The unknown elements are automatically defined and added + * to the element table. + * @param elements + * @return + */ + private BitSet bitSet(String[] elements) + { + BitSet b = new BitSet(); + + for (int i = 0; i < elements.length; i++) + { + Element e = getElement(elements [ i ]); + + if (e == null) + e = newElement(elements [ i ]); + + b.set(e.index); + } + + return b; + } + + /** + * Find the element with the given name in the element table. + * If not find, create a new element with this name and add to the + * table. + * @param name the name of the element + * @return the found or created element. + */ + private Element newElement(String name) + { + Element e = (Element) elementHash.get(name.toLowerCase()); + + if (e == null) + { + e = new Element(); + e.name = name; + e.index = elements.size(); + elements.add(e); + elementHash.put(e.name.toLowerCase(), e); + assignField(e); + } + return e; + } + + /** + * Creates and adds to the element table the entity with an + * unitialized data section. Used internally. + * @param name the name of the entity + * @param type the type of the entity, a bitwise combination + * of GENERAL, PARAMETER, SYSTEM and PUBLIC. + * @throws an error if the parameter is both GENERAL and PARAMETER + * of both PUBLIC and SYSTEM. + * @return the created entity + */ + private Entity newEntity(String name, int type) + { + Entity e = new Entity(name, type, null); + entityHash.put(e.name, e); + return e; + } + + private void unexpected(Exception ex) + { + throw new Error("This should never happen, report a bug", ex); + } +} diff --git a/libjava/javax/swing/text/html/parser/DTDConstants.java b/libjava/javax/swing/text/html/parser/DTDConstants.java new file mode 100755 index 00000000000..29218c25d7e --- /dev/null +++ b/libjava/javax/swing/text/html/parser/DTDConstants.java @@ -0,0 +1,290 @@ +/* DTDConstants.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +/** + * <p>This class defines the SGML basic types, used for describing HTML 4.01 + * at {@link http://www.w3.org/TR/html4/types.html }. Not all constants, + * defined here, are actually used in HTML 4.01 SGML specification. Some others + * are defined just as part of the required implementation. + * </p> + * <p> + * If you need more information about SGML DTD documents, + * the author suggests to read SGML tutorial on + * {@link http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html}. + * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>, + * Oxford University Press, 688 p, ISBN: 0198537379. + * </p> + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public interface DTDConstants +{ + /* ----- The data types, used in HTML 4.01 SGML definition: ---- */ + + /** + * The CDATA (Character data) constant, specifes the content model, + * consisting of characters only. In SGML for HTML 4.01, the character + * entities must be replaced by characters, the line feeds must be + * ignored and any number of the subsequent carriage returns or tabs + * must be replaced by a single space. + */ + int CDATA = 1; + + /** + * The EMPTY constant, means the element with no content. + */ + int EMPTY = 17; + + /** + * The ID constant, means that the token is the unique identifier. + * This identifier can be referenced by attribute with value of IDREF. + * The identifier must begin with letter, followed by any number of + * letters, digits, hyphens, underscores, colons and periods. + */ + int ID = 4; + + /** + * The IDREF constant, specifies reference to a valid ID within + * the document. + */ + int IDREF = 5; + + /** + * The IDREFS constant, a space separated list of IDREFs + */ + int IDREFS = 6; + + /** + * The NAME constant, means the token that + * must begin with letter, followed by any number of + * letters, digits, hyphens, underscores, colons and periods. + */ + int NAME = 7; + + /** + * The NAMES constant, specifies a space separated of NAMEs. + */ + int NAMES = 8; + + /** + * The NMTOKEN constant, specifies the attribute, consisting of + * characters that can be either digits or alphabetic characters). + */ + int NMTOKEN = 9; + + /** + * The NMTOKENS constant, specifies a list of NMTOKENs. + */ + int NMTOKENS = 10; + + /** + * The NOTATION constant, a previously defined data type. + */ + int NOTATION = 11; + + /** + * The NUMBER constant (means that the attribute consists of at least + * one decimal digit). + */ + int NUMBER = 12; + + /** + * The NUMBERS constant, specifies a space separated list of NUMBERs. + */ + int NUMBERS = 13; + + /** + * The NUTOKEN constant. + */ + int NUTOKEN = 14; + + /** + * The NUTOKENS constant. + */ + int NUTOKENS = 15; + + /* ------- + The entity scope constants. + As these four constants are combined with the bitwise OR, + they are defined in the hexadecimal notation. + The reason of setting the two bits at once (for PUBLIC and SYSTEM) + is probably historical. ----- */ + + /** + * The PUBLIC constant, specifies the public entity. The PUBLIC entities + * are assumed to be known to many systems so that a full declaration + * need not be transmitted. For example, + * <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN"> + */ + int PUBLIC = 0xA; + + /** + * The SYSTEM constant, specifies the system entitiy. The system entities + * are assumed to be known but require the clear identifer + * (like the file path), where they can be found in the system. + * For example, <code> + * <DOCTYPE html SYSTEM "/path/to/file.dtd"> </code>. + */ + int SYSTEM = 0x11; + + /** + * The PARAMETER constant, specifies that entity is only valid + * inside SGML DTD scope. + */ + int PARAMETER = 0x40000; + + /** + * The GENERAL constant, specifies theat the entity is valid in the + * whole HTML document scope. + */ + int GENERAL = 0x10000; + + /* ---- The constants, defining if the element attribute is required, + fixed or implied. ---- */ + + /** + * The attribute modifier #REQUIRED constant, indicates that the + * value must be supplied. + */ + int REQUIRED = 2; + + /** + * The attribute modifier #FIXED constant, means that the attribute has + * the fixed value that cannot be changed. + */ + int FIXED = 1; + + /** + * The attribute modifier #IMPLIED constant, + * indicating that for this attribute the user agent must provide + * the value itself. + */ + int IMPLIED = 5; + + /** + * The attribute modifier #CURRENT constant, specifies the value + * that at any point in the document is the last value supplied for + * that element. A value is required to be supplied for the first + * occurrence of an element + */ + int CURRENT = 3; + + /** + * The attribute modifier #CONREF constant, specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead. + */ + int CONREF = 4; + + /* ----- Constants, defining if the element + start and end tags are required. ---- */ + + /** + * The STARTTAG, meaning that the element needs a starting tag. + */ + int STARTTAG = 13; + + /** + * The ENDTAG constant, meaning that the element needs a closing tag. + */ + int ENDTAG = 14; + + /* ----- Other constants: ----- */ + + /** + * The ANY constant, specifies + * an attribute, consisting from arbitrary characters. + */ + int ANY = 19; + + /** + * The DEFAULT constant, specifies the default value. + */ + int DEFAULT = 131072; + + /** + * The ENTITIES constant (list of ENTITYes) + */ + int ENTITIES = 3; + + /** + * The ENTITY constant, meaning the numeric or symbolic name of some + * HTML data. + */ + int ENTITY = 2; + + /** + * The MD constant. + */ + int MD = 16; + + /** + * The MODEL constant. + */ + int MODEL = 18; + + /** + * The MS constant. + */ + int MS = 15; + + /** + * The PI (Processing Instruction) constant, specifies a processing + * instruction. Processing instructions are used to embed information + * intended for specific applications. + */ + int PI = 12; + + /** + * The RCDATA constant (Entity References and Character Data), specifies + * the content model, consisting of characters AND entities. The + * "<" is threated as an ordinary character, but + * "<code>&name;</code>" still means the general entity with + * the given name. + */ + int RCDATA = 16; + + /** + * The SDATA constant. Means that the value contains the entity name + * and the replacement value of a character entity reference. + */ + int SDATA = 11; +} diff --git a/libjava/javax/swing/text/html/parser/DocumentParser.java b/libjava/javax/swing/text/html/parser/DocumentParser.java new file mode 100644 index 00000000000..8f7d213b36a --- /dev/null +++ b/libjava/javax/swing/text/html/parser/DocumentParser.java @@ -0,0 +1,258 @@ +/* DocumentParser.java -- A parser for HTML documents. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.htmlAttributeSet; +import javax.swing.text.html.parser.Parser; + +import java.io.IOException; +import java.io.Reader; + +import javax.swing.text.BadLocationException; +import javax.swing.text.html.HTMLEditorKit; + +/** + * <p>A simple error-tolerant HTML parser that uses a DTD document + * to access data on the possible tokens, arguments and syntax.</p> + * <p> The parser reads an HTML content from a Reader and calls various + * notifying methods (which should be overridden in a subclass) + * when tags or data are encountered.</p> + * <p>Some HTML elements need no opening or closing tags. The + * task of this parser is to invoke the tag handling methods also when + * the tags are not explicitly specified and must be supposed using + * information, stored in the DTD. + * For example, parsing the document + * <p><table><tr><td>a<td>b<td>c</tr> <br> + * will invoke exactly the handling methods exactly in the same order + * (and with the same parameters) as if parsing the document: <br> + * <em><html><head></head><body><table>< + * tbody></em><tr><td>a<em></td></em><td>b<em> + * </td></em><td>c<em></td></tr></em>< + * <em>/tbody></table></body></html></em></p> + * (supposed tags are given in italics). The parser also supports + * obsolete elements of HTML syntax.<p> + * </p> + * In this implementation, DocumentParser is directly derived from its + * ancestor without changes of functionality. + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class DocumentParser + extends Parser + implements DTDConstants +{ + /** + * The enclosed working parser class. + */ + private class gnuParser + extends gnu.javax.swing.text.html.parser.support.Parser + { + private gnuParser(DTD d) + { + super(d); + } + + protected final void handleComment(char[] comment) + { + parser.handleComment(comment); + callBack.handleComment(comment, hTag.where.startPosition); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + parser.handleEmptyTag(tag); + callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(), + hTag.where.startPosition + ); + } + + protected final void handleEndTag(TagElement tag) + { + parser.handleEndTag(tag); + callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition); + } + + protected final void handleError(int line, String message) + { + parser.handleError(line, message); + callBack.handleError(message, hTag.where.startPosition); + } + + protected final void handleStartTag(TagElement tag) + { + parser.handleStartTag(tag); + htmlAttributeSet attributes = gnu.getAttributes(); + + if (tag.fictional()) + attributes.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED, + Boolean.TRUE + ); + + callBack.handleStartTag(tag.getHTMLTag(), attributes, + hTag.where.startPosition + ); + } + + protected final void handleText(char[] text) + { + parser.handleText(text); + callBack.handleText(text, hTag.where.startPosition); + } + + DTD getDTD() + { + return dtd; + } + } + + /** + * This field is used to access the identically named + * methods of the outer class. + */ + private DocumentParser parser = this; + + /** + * The callback. + */ + private HTMLEditorKit.ParserCallback callBack; + + /** + * The reference to the working class of HTML parser that is + * actually used to parse the document. + */ + private gnuParser gnu; + + /** + * Creates a new parser that uses the given DTD to access data on the + * possible tokens, arguments and syntax. There is no single - step way + * to get a default DTD; you must either refer to the implementation - + * specific packages, write your own DTD or obtain the working instance + * of parser in other way, for example, by calling + * {@link javax.swing.text.html.HTMLEditorKit#getParser() }. + * @param a_dtd a DTD to use. + */ + public DocumentParser(DTD a_dtd) + { + super(a_dtd); + gnu = new gnuParser(a_dtd); + } + + /** + * Parses the HTML document, calling methods of the provided + * callback. This method must be multithread - safe. + * @param reader The reader to read the HTML document from + * @param callback The callback that is notifyed about the presence + * of HTML elements in the document. + * @param ignoreCharSet If thrue, any charset changes during parsing + * are ignored. + * @throws java.io.IOException + */ + public void parse(Reader reader, HTMLEditorKit.ParserCallback a_callback, + boolean ignoreCharSet + ) + throws IOException + { + callBack = a_callback; + gnu.parse(reader); + + callBack.handleEndOfLineString(gnu.getEndOfLineSequence()); + try + { + callBack.flush(); + } + catch (BadLocationException ex) + { + // Convert this into the supported type of exception. + throw new IOException(ex.getMessage()); + } + } + + /** + * Handle HTML comment. The default method returns without action. + * @param comment the comment being handled + */ + protected void handleComment(char[] comment) + { + } + + /** + * Handle the tag with no content, like <br>. The method is + * called for the elements that, in accordance with the current DTD, + * has an empty content. + * @param tag the tag being handled. + * @throws javax.swing.text.ChangedCharSetException + */ + protected void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + } + + /** + * The method is called when the HTML closing tag ((like </table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param The tag being handled + */ + protected void handleEndTag(TagElement tag) + { + } + + /* Handle error that has occured in the given line. */ + protected void handleError(int line, String message) + { + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param The tag being handled + */ + protected void handleStartTag(TagElement tag) + { + } + + /** + * Handle the text section. + * @param text a section text. + */ + protected void handleText(char[] text) + { + } +} diff --git a/libjava/javax/swing/text/html/parser/Element.java b/libjava/javax/swing/text/html/parser/Element.java new file mode 100755 index 00000000000..4f39fbef0ee --- /dev/null +++ b/libjava/javax/swing/text/html/parser/Element.java @@ -0,0 +1,317 @@ +/* Element.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +import java.io.Serializable; + +import java.util.BitSet; + +/** + * <p> + * Stores the element information, obtained by parsing SGML DTD + * tag <code><!ELEMENT .. ></code>. This class has no public + * constructor and can only be instantiated using the + * {@link javax.swing.text.html.parser.DTD } methods</p> + * + * <p>SGML defines elements that represent structures or + * behavior. An element typically consists of a start tag, content, and an + * end tag. Hence the elements are not tags. The HTML 4.0 definition specifies + * that some elements are not required to have the end tags. Also, some + * HTML elements (like <code><hr></code>) have no content. Element names + * are case sensitive.</p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class Element + implements DTDConstants, Serializable +{ + /** + * Package level mapper between type names and they string values. + */ + static final gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("CDATA", DTDConstants.CDATA); + add("RCDATA", DTDConstants.RCDATA); + add("EMPTY", DTDConstants.EMPTY); + add("ANY", DTDConstants.ANY); + } + }; + + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -6717939384601675586L; + + /** + * The element attributes. + */ + public AttributeList atts; + + /** + * Contains refernces to elements that must NOT occur inside this element, + * at any level of hierarchy. + */ + public BitSet exclusions; + + /** + * Contains refernces to elements that must CAN occur inside this element, + * at any level of hierarchy. + */ + public BitSet inclusions; + + /** + * The content model, defining elements, entities and DTD text + * that may/may not occur inside this element. + */ + public ContentModel content; + + /** + * A field to store additional user data for this Element. + */ + public Object data; + + /** + * The element name. + */ + public String name; + + /** + * True is this element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><hr></code>are + * not required to have the end tags. + */ + public boolean oEnd; + + /** + * True is this element need not to have the starting tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><head></code> or + * <code><body></code>) are + * not required to have the start tags. + + */ + public boolean oStart; + + /** + * This field contains the unique integer identifier of this Element, + * used to refer the element (more exactly, the element flag) + * in <code>inclusions</code> and <code>exclusions</code> bit set. + */ + public int index; + + /** + * The element type, containing value, defined in DTDConstants. + * In this implementation, the element type can be + * CDATA, RCDATA, EMPTY or ANY. + */ + public int type; + + /** + * The default constructor must have package level access in this + * class. Use DTD.defineElement(..) to create an element when required. + * @todo MAKE THIS PACKAGE in the final version. Now the Parser needs it! + */ + Element() + { + } + + /** + * Converts the string representation of the element type + * into its unique integer identifier, defined in DTDConstants. + * @param a_type A name of the type + * @return DTDConstants.CDATA, DTDConstants.RCDATA, DTDConstants.EMPTY, + * DTDConstants.ANY or null if the type name is not + * "CDATA", "RCDATA", "EMPTY" or "ANY". This function is case sensitive. + * @throws NullPointerException if <code>a_type</code> is null. + */ + public static int name2type(String a_type) + { + return mapper.get(a_type); + } + + /** + * Get the element attribute by name. + * @param attribute the attribute name, case insensitive. + * @return the correspoding attribute of this element. The class, + * for storing as attribute list, as a single attribute, is used to + * store a single attribute in this case. + * @throws NullPointerException if the attribute name is null. + */ + public AttributeList getAttribute(String attribute) + { + AttributeList a = atts; + + while (a != null && !attribute.equalsIgnoreCase(a.name)) + a = a.next; + + return a; + } + + /** + * Get the element attribute by its value. + * @param a_value the attribute value, case insensitive. + * @return the correspoding attribute of this element. The class, + * for storing as attribute list, as a single attribute, is used to + * store a single attribute in this case. If there are several + * attributes with the same value, there is no garranty, which one + * is returned. + */ + public AttributeList getAttributeByValue(String a_value) + { + AttributeList a = atts; + + if (a_value == null) + { + while (a != null) + { + if (a.value == null) + return a; + + a = a.next; + } + } + else + { + while (a != null) + { + if (a.value != null && a_value.equalsIgnoreCase(a.value)) + return a; + + a = a.next; + } + } + + return null; + } + + /** + * Get all attributes of this document as an attribute list. + * @return + */ + public AttributeList getAttributes() + { + return atts; + } + + /** + * Get the content model, defining elements, entities and DTD text + * that may/may not occur inside this element. + */ + public ContentModel getContent() + { + return content; + } + + /** + * Returns true for the element with no content. + * Empty elements are defined with the SGML DTD keyword "EMPTY". + * @return true if content model field (content) method is equal to + * null or its method empty() returns true. + */ + public boolean isEmpty() + { + return content == null || content.empty(); + } + + /** + * Get the unique integer identifier of this Element, + * used to refer the element (more exactly, the element flag) + * in <code>inclusions</code> and <code>exclusions</code> bit set. + * WARNING: This value may not be the same between different + * implementations. + */ + public int getIndex() + { + return index; + } + + /** + * Get the element name. + */ + public String getName() + { + return name; + } + + /** + * Get the element type. + * @return one of the values, defined DTDConstants. + * In this implementation, the element type can be + * CDATA, RCDATA, EMPTY or ANY. + */ + public int getType() + { + return type; + } + + /** + * True is this element need not to have the starting tag, false + * otherwise.s element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><hr></code>are + * not required to have the end tags. + */ + public boolean omitEnd() + { + return oEnd; + } + + /** + * True is this element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><head></code> or + * <code><body></code>) are + * not required to have the start tags. + */ + public boolean omitStart() + { + return oStart; + } + + /** + * Returns the name of this element. + */ + public String toString() + { + return name; + } +} diff --git a/libjava/javax/swing/text/html/parser/Entity.java b/libjava/javax/swing/text/html/parser/Entity.java new file mode 100644 index 00000000000..ba79552c795 --- /dev/null +++ b/libjava/javax/swing/text/html/parser/Entity.java @@ -0,0 +1,185 @@ +/* Entity.java -- Stores information, obtained by parsing SGML DTL + * <!ENTITY % .. > tag + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +import java.io.Serializable; + +/** + * <p>Stores information, obtained by parsing SGML DTL + * <!ENTITY % .. > tag.</p> + * <p> + * The entity defines some kind of macro that can be used elsewhere in + * the document. + * When the macro is referred to by the name in the DTD, it is expanded into + * a string + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class Entity + implements DTDConstants, Serializable +{ + /** + * Package level mapper between type names and they string values. + */ + final static gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("ANY", DTDConstants.ANY); + add("CDATA", DTDConstants.CDATA); + add("PUBLIC", DTDConstants.PUBLIC); + add("SDATA", DTDConstants.SDATA); + add("PI", DTDConstants.PI); + add("STARTTAG", DTDConstants.STARTTAG); + add("ENDTAG", DTDConstants.ENDTAG); + add("MS", DTDConstants.MS); + add("MD", DTDConstants.MD); + add("SYSTEM", DTDConstants.SYSTEM); + } + }; + + /** + * The entity name. + */ + public String name; + + /** + * The entity data + */ + public char[] data; + + /** + * The entity type. + */ + public int type; + + /** + * String representation of the entity data. + */ + private String sdata; + + /** + * Create a new entity + * @param a_name the entity name + * @param a_type the entity type + * @param a_data the data replacing the entity reference + */ + public Entity(String a_name, int a_type, char[] a_data) + { + name = a_name; + type = a_type; + data = a_data; + } + + /** + * Converts a given string to the corresponding entity type. + * @return a value, defined in DTDConstants (one of + * PUBLIC, CDATA, SDATA, PI, STARTTAG, ENDTAG, MS, MD, SYSTEM) + * or CDATA if the parameter is not a valid entity type. + */ + public static int name2type(String an_entity) + { + int r = mapper.get(an_entity); + return (r == 0) ? DTDConstants.CDATA : r; + } + + /** + * Get the entity data. + */ + public char[] getData() + { + return data; + } + + /** + * Returns true for general entities. Each general entity can be + * referenced as <code>&entity-name;</code>. Such entities are + * defined by the SGML DTD tag + * <code><!ENTITY <i>name</i> "<i>value</i>"></code>. The general + * entities can be used anywhere in the document. + */ + public boolean isGeneral() + { + return (type & DTDConstants.GENERAL) != 0; + } + + /** + * Get the entity name. + */ + public String getName() + { + return name; + } + + /** + * Returns true for parameter entities. Each parameter entity can be + * referenced as <code>&entity-name;</code>. Such entities are + * defined by the SGML DTD tag + * <code><!ENTITY % <i>name</i> "<i>value</i>"></code>. The parameter + * entities can be used only in SGML context. + */ + public boolean isParameter() + { + return (type & DTDConstants.PARAMETER) != 0; + } + + /** + * Returns a data as String + */ + public String getString() + { + if (sdata == null) + sdata = new String(data); + + return sdata; + } + + /** + * Get the entity type. + * @return the value of the {@link #type}. + */ + public int getType() + { + return type; + } + +} diff --git a/libjava/javax/swing/text/html/parser/Parser.java b/libjava/javax/swing/text/html/parser/Parser.java new file mode 100755 index 00000000000..7202f6b5b27 --- /dev/null +++ b/libjava/javax/swing/text/html/parser/Parser.java @@ -0,0 +1,436 @@ +/* Parser.java -- HTML parser + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import java.io.IOException; +import java.io.Reader; + +import javax.swing.text.ChangedCharSetException; +import javax.swing.text.SimpleAttributeSet; + +/* + * FOR DEVELOPERS: To avoid regression, please run the package test + * textsuite/javax.swing.text.html.parser/AllParserTests after your + * modifications. + */ + +/** + * <p>A simple error-tolerant HTML parser that uses a DTD document + * to access data on the possible tokens, arguments and syntax.</p> + * <p> The parser reads an HTML content from a Reader and calls various + * notifying methods (which should be overridden in a subclass) + * when tags or data are encountered.</p> + * <p>Some HTML elements need no opening or closing tags. The + * task of this parser is to invoke the tag handling methods also when + * the tags are not explicitly specified and must be supposed using + * information, stored in the DTD. + * For example, parsing the document + * <p><table><tr><td>a<td>b<td>c</tr> <br> + * will invoke exactly the handling methods exactly in the same order + * (and with the same parameters) as if parsing the document: <br> + * <em><html><head></head><body><table>< + * tbody></em><tr><td>a<em></td></em><td>b<em> + * </td></em><td>c<em></td></tr></em>< + * <em>/tbody></table></body></html></em></p> + * (supposed tags are given in italics). The parser also supports + * obsolete elements of HTML syntax.<p> + * </p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class Parser + implements DTDConstants +{ + /** + * The document template description that will be used to parse the documents. + */ + protected DTD dtd; + + /** + * The value of this field determines whether or not the Parser will be + * strict in enforcing SGML compatibility. The default value is false, + * stating that the parser should do everything to parse and get at least + * some information even from the incorrectly written HTML input. + */ + protected boolean strict; + + /** + * The package level reference to the working HTML parser in this + * implementation. + */ + final gnu.javax.swing.text.html.parser.support.Parser gnu; + + /** + * Creates a new parser that uses the given DTD to access data on the + * possible tokens, arguments and syntax. There is no single - step way + * to get a default DTD; you must either refer to the implementation - + * specific packages, write your own DTD or obtain the working instance + * of parser in other way, for example, by calling + * {@link javax.swing.text.html.HTMLEditorKit#getParser() }. + * @param a_dtd A DTD to use. + */ + public Parser(DTD a_dtd) + { + dtd = a_dtd; + + final Parser j = this; + + gnu = + new gnu.javax.swing.text.html.parser.support.Parser(dtd) + { + protected final void handleComment(char[] comment) + { + j.handleComment(comment); + } + + protected final void handleEOFInComment() + { + j.handleEOFInComment(); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + j.handleEmptyTag(tag); + } + + protected final void handleStartTag(TagElement tag) + { + j.handleStartTag(tag); + } + + protected final void handleEndTag(TagElement tag) + { + j.handleEndTag(tag); + } + + protected final void handleError(int line, String message) + { + j.handleError(line, message); + } + + protected final void handleText(char[] text) + { + j.handleText(text); + } + + protected final void handleTitle(char[] title) + { + j.handleTitle(title); + } + + protected final void markFirstTime(Element element) + { + j.markFirstTime(element); + } + + protected final void startTag(TagElement tag) + throws ChangedCharSetException + { + j.startTag(tag); + } + + protected final void endTag(boolean omitted) + { + j.endTag(omitted); + } + + protected TagElement makeTag(Element element) + { + return j.makeTag(element); + } + + protected TagElement makeTag(Element element, boolean isSupposed) + { + return j.makeTag(element, isSupposed); + } + }; + } + + /** + * Parse the HTML text, calling various methods in response to the + * occurence of the corresponding HTML constructions. + * @param reader The reader to read the source HTML from. + * @throws IOException If the reader throws one. + */ + public synchronized void parse(Reader reader) + throws IOException + { + gnu.parse(reader); + } + + /** + * Parses DTD markup declaration. Currently returns without action. + * @return null. + * @throws java.io.IOException + */ + public String parseDTDMarkup() + throws IOException + { + return gnu.parseDTDMarkup(); + } + + /** + * Parse DTD document declarations. Currently only parses the document + * type declaration markup. + * @param strBuff + * @return true if this is a valid DTD markup declaration. + * @throws IOException + */ + protected boolean parseMarkupDeclarations(StringBuffer strBuff) + throws IOException + { + return gnu.parseMarkupDeclarations(strBuff); + } + + /** + * Get the attributes of the current tag. + * @return The attribute set, representing the attributes of the current tag. + */ + protected SimpleAttributeSet getAttributes() + { + return gnu.getAttributes(); + } + + /** + * Get the number of the document line being parsed. + * @return The current line. + */ + protected int getCurrentLine() + { + return gnu.hTag.where.beginLine; + } + + /** + * Get the current position in the document being parsed. + * @return The current position. + */ + protected int getCurrentPos() + { + return gnu.hTag.where.startPosition; + } + + /** + * The method is called when the HTML end (closing) tag is found or if + * the parser concludes that the one should be present in the + * current position. The method is called immediatly + * before calling the handleEndTag(). + * @param omitted True if the tag is no actually present in the document, + * but is supposed by the parser (like </html> at the end of the + * document). + */ + protected void endTag(boolean omitted) + { + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to handleError, also providing the number of the + * current line. + */ + protected void error(String msg) + { + gnu.error(msg); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error (msg+": '"+invalid+"'"). + */ + protected void error(String msg, String invalid) + { + gnu.error(msg, invalid); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error (parm1+" "+ parm2+" "+ parm3). + */ + protected void error(String parm1, String parm2, String parm3) + { + gnu.error(parm1, parm2, parm3); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error + * (parm1+" "+ parm2+" "+ parm3+" "+ parm4). + */ + protected void error(String parm1, String parm2, String parm3, String parm4) + { + gnu.error(parm1, parm2, parm3, parm4); + } + + /** + * In this implementation, this is never called and returns without action. + */ + protected void flushAttributes() + { + gnu.flushAttributes(); + } + + /** + * Handle HTML comment. The default method returns without action. + * @param comment The comment being handled + */ + protected void handleComment(char[] comment) + { + } + + /** + * This is additionally called in when the HTML content terminates + * without closing the HTML comment. This can only happen if the + * HTML document contains errors (for example, the closing --;gt is + * missing. The default method calls the error handler. + */ + protected void handleEOFInComment() + { + gnu.error("Unclosed comment"); + } + + /** + * Handle the tag with no content, like <br>. The method is + * called for the elements that, in accordance with the current DTD, + * has an empty content. + * @param The tag being handled. + * @throws javax.swing.text.ChangedCharSetException + */ + protected void handleEmptyTag(TagElement tag) + throws ChangedCharSetException + { + } + + /** + * The method is called when the HTML closing tag ((like </table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param The tag being handled + */ + protected void handleEndTag(TagElement tag) + { + } + + /* Handle error that has occured in the given line. */ + protected void handleError(int line, String message) + { + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param The tag being handled + */ + protected void handleStartTag(TagElement tag) + { + } + + /** + * Handle the text section. + * <p> For non-preformatted section, the parser replaces + * \t, \r and \n by spaces and then multiple spaces + * by a single space. Additionaly, all whitespace around + * tags is discarded. + * </p> + * <p> For pre-formatted text (inside TEXAREA and PRE), the parser preserves + * all tabs and spaces, but removes <b>one</b> bounding \r, \n or \r\n, + * if it is present. Additionally, it replaces each occurence of \r or \r\n + * by a single \n.</p> + * + * @param text A section text. + */ + protected void handleText(char[] text) + { + } + + /** + * Handle HTML <title> tag. This method is invoked when + * both title starting and closing tags are already behind. + * The passed argument contains the concatenation of all + * title text sections. + * @param The title text. + */ + protected void handleTitle(char[] title) + { + } + + /** + * Constructs the tag from the given element. In this implementation, + * this is defined, but never called. + * @param element the base element of the tag. + * @return the tag + */ + protected TagElement makeTag(Element element) + { + return makeTag(element, false); + } + + /** + * Constructs the tag from the given element. + * @param the tag base {@link javax.swing.text.html.parser.Element} + * @param isSupposed true if the tag is not actually present in the + * html input, but the parser supposes that it should to occur in + * the current location. + * @return the tag + */ + protected TagElement makeTag(Element element, boolean isSupposed) + { + return new TagElement(element, isSupposed); + } + + /** + * This is called when the tag, representing the given element, + * occurs first time in the document. + * @param element + */ + protected void markFirstTime(Element element) + { + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. The method is called immediately before + * calling the handleStartTag. + * @param The tag + */ + protected void startTag(TagElement tag) + throws ChangedCharSetException + { + } +} diff --git a/libjava/javax/swing/text/html/parser/ParserDelegator.java b/libjava/javax/swing/text/html/parser/ParserDelegator.java index 58c2bc37b76..798170630b7 100644 --- a/libjava/javax/swing/text/html/parser/ParserDelegator.java +++ b/libjava/javax/swing/text/html/parser/ParserDelegator.java @@ -1,5 +1,5 @@ -/* ParserDelegator - Delegator for ParserDocument. - Copyright (C) 2002 Free Software Foundation, Inc. +/* ParserDelegator.java -- Delegator for ParserDocument. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -37,10 +37,170 @@ exception statement from your version. */ package javax.swing.text.html.parser; +import gnu.javax.swing.text.html.parser.HTML_401F; +import gnu.javax.swing.text.html.parser.htmlAttributeSet; + +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; + +import javax.swing.text.BadLocationException; +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.HTMLEditorKit.ParserCallback; + /** - * Stub implementeation to get gjdoc working. + * This class instantiates and starts the working instance of + * html parser, being responsible for providing the default DTD. + * + * TODO Later this class must be derived from the totally abstract class + * HTMLEditorKit.Parser. HTMLEditorKit that does not yet exist. + * + * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) */ -//public class ParserDelegator extends HTMLEditorKit.Parser public class ParserDelegator + extends javax.swing.text.html.HTMLEditorKit.Parser + implements Serializable { + private class gnuParser + extends gnu.javax.swing.text.html.parser.support.Parser + { + private static final long serialVersionUID = 1; + + private gnuParser(DTD d) + { + super(d); + } + + protected final void handleComment(char[] comment) + { + callBack.handleComment(comment, hTag.where.startPosition); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(), + hTag.where.startPosition + ); + } + + protected final void handleEndTag(TagElement tag) + { + callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition); + } + + protected final void handleError(int line, String message) + { + callBack.handleError(message, hTag.where.startPosition); + } + + protected final void handleStartTag(TagElement tag) + { + htmlAttributeSet attributes = gnu.getAttributes(); + + if (tag.fictional()) + attributes.addAttribute(ParserCallback.IMPLIED, Boolean.TRUE); + + callBack.handleStartTag(tag.getHTMLTag(), attributes, + hTag.where.startPosition + ); + } + + protected final void handleText(char[] text) + { + callBack.handleText(text, hTag.where.startPosition); + } + + DTD getDTD() + { + return dtd; + } + } + + /** + * Use serialVersionUID for interoperability. + */ + private static final long serialVersionUID = -1276686502624777206L; + + private static DTD dtd = HTML_401F.getInstance(); + + /** + * The callback. + */ + private HTMLEditorKit.ParserCallback callBack; + + /** + * The reference to the working class of HTML parser that is + * actually used to parse the document. + */ + private gnuParser gnu; + + /** + * Parses the HTML document, calling methods of the provided + * callback. This method must be multithread - safe. + * @param reader The reader to read the HTML document from + * @param callback The callback that is notifyed about the presence + * of HTML elements in the document. + * @param ignoreCharSet If thrue, any charset changes during parsing + * are ignored. + * @throws java.io.IOException + */ + public void parse(Reader reader, HTMLEditorKit.ParserCallback a_callback, + boolean ignoreCharSet + ) + throws IOException + { + callBack = a_callback; + + if (gnu == null || !dtd.equals(gnu.getDTD())) + { + gnu = new gnuParser(dtd); + } + + gnu.parse(reader); + + callBack.handleEndOfLineString(gnu.getEndOfLineSequence()); + try + { + callBack.flush(); + } + catch (BadLocationException ex) + { + // Convert this into the supported type of exception. + throw new IOException(ex.getMessage()); + } + } + + /** + * Calling this method instructs that, if not specified directly, + * the documents will be parsed using the default + * DTD of the implementation. + */ + protected static void setDefaultDTD() + { + dtd = HTML_401F.getInstance(); + } + + /** + * Registers the user - written DTD under the given name, also + * making it default for the subsequent parsings. This has effect on + * all subsequent calls to the parse(...) . If you need to specify + * your DTD locally, simply {@link javax.swing.text.html.parser.Parser} + * instead. + * @param dtd The DTD that will be used to parse documents by this class. + * @param name The name of this DTD. + * @return No standard is specified on which instance of DTD must be + * returned by this method, and it is recommended to leave the returned + * value without consideration. This implementation returns the DTD + * that was previously set as the default DTD, or the implementations + * default DTD if none was set. + */ + protected static DTD createDTD(DTD a_dtd, String name) + { + DTD.putDTDHash(name, a_dtd); + + DTD dtd_prev = dtd; + dtd = a_dtd; + return dtd_prev; + } } diff --git a/libjava/javax/swing/text/html/parser/TagElement.java b/libjava/javax/swing/text/html/parser/TagElement.java new file mode 100755 index 00000000000..ab9465261df --- /dev/null +++ b/libjava/javax/swing/text/html/parser/TagElement.java @@ -0,0 +1,142 @@ +/* TagElement.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import javax.swing.text.html.HTML; + +/** + * The SGML element, defining a single html tag. + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class TagElement +{ + /** + * The Element the tag was constructed from. + */ + private final Element element; + + /** + * The coresponding HTML tag, assigned once in constructor. + */ + private final HTML.Tag tag; + + /** + * The 'fictional' flag. + */ + private final boolean fictional; + + /** + * Creates the html tag element from the defintion, stored in the + * given element. Sets the flag 'fictional' to false. + * @param an_element + */ + public TagElement(Element an_element) + { + this(an_element, false); + } + + /** + * Creates the html tag element from the defintion, stored in the + * given element, setting the flag 'fictional' to the given value. + */ + public TagElement(Element an_element, boolean is_fictional) + { + element = an_element; + fictional = is_fictional; + + HTML.Tag t = HTML.getTag(element.getName()); + + if (t != null) + tag = t; + else + tag = new HTML.UnknownTag(element.getName()); + } + + /** + * Get the element from that the tag was constructed. + */ + public Element getElement() + { + return element; + } + + /** + * Get the corresponding HTML tag. This is either one of the + * pre-defined HTML tags or the instance of the UnknownTag with the + * element name. + */ + public HTML.Tag getHTMLTag() + { + return tag; + } + + /** + * Calls isPreformatted() for the corresponding html tag and returns + * the obtained value. + */ + public boolean isPreformatted() + { + return tag.isPreformatted(); + } + + /** + * Calls breaksFlow() for the corresponding html tag and returns + * the obtained value. + */ + public boolean breaksFlow() + { + return tag.breaksFlow(); + } + + /** + * Get the value of the flag 'fictional'. + */ + public boolean fictional() + { + return fictional; + } + + /** + * Returns string representation of this object. + */ + public String toString() + { + return getElement() + (fictional ? "?" : ""); + } +} |