summaryrefslogtreecommitdiff
path: root/tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java
diff options
context:
space:
mode:
Diffstat (limited to 'tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java')
-rw-r--r--tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java809
1 files changed, 404 insertions, 405 deletions
diff --git a/tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java b/tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java
index 175d4e864..582a45b32 100644
--- a/tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java
+++ b/tools/gnu/classpath/tools/doclets/xmldoclet/HtmlRepairer.java
@@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
-
+
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@@ -16,7 +16,7 @@ General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-02111-1307 USA.
+02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
@@ -55,17 +55,17 @@ public final class HtmlRepairer {
private Set parentTags = new HashSet();
public TagInfo(String parentTag) {
- this.parentTags.add(parentTag);
+ this.parentTags.add(parentTag);
}
public TagInfo(String[] parentTagArr) {
- for (int i=0; i<parentTagArr.length; ++i) {
- this.parentTags.add(parentTagArr[i]);
- }
+ for (int i=0; i<parentTagArr.length; ++i) {
+ this.parentTags.add(parentTagArr[i]);
+ }
}
public boolean isLegalParentTag(String tag) {
- return this.parentTags.contains(tag);
+ return this.parentTags.contains(tag);
}
}
@@ -94,7 +94,7 @@ public final class HtmlRepairer {
tagInfoMap.put("param", new TagInfo(new String[] { "applet" }));
String[] noTextParentTagArr = {
- "area", "base", "body", "br", "dd", "dt", "head", "hr", "html",
+ "area", "base", "body", "br", "dd", "dt", "head", "hr", "html",
"img", "input", "link", "map", "meta", "ol", "optgroup", "param",
"select", "table", "tbody", "tfoot", "thead", "tr", "ul",
};
@@ -105,9 +105,9 @@ public final class HtmlRepairer {
}
}
- public HtmlRepairer(DocErrorReporter warningReporter,
- boolean noWarn, boolean noEmailWarn,
- ClassDoc contextClass, MemberDoc contextMember,
+ public HtmlRepairer(DocErrorReporter warningReporter,
+ boolean noWarn, boolean noEmailWarn,
+ ClassDoc contextClass, MemberDoc contextMember,
boolean throwAwayLeadingPara) {
this.warningReporter = warningReporter;
this.noWarn = noWarn;
@@ -116,16 +116,16 @@ public final class HtmlRepairer {
this.contextMember = contextMember;
this.throwAwayLeadingPara = throwAwayLeadingPara;
}
-
+
private static String replaceStr(String haystack, String needle, String replacement) {
int ndx=haystack.indexOf(needle);
- if (ndx<0)
- return haystack;
- else
- return haystack.substring(0, ndx)+replacement
- + replaceStr(haystack.substring(ndx+needle.length()), needle, replacement);
+ if (ndx<0)
+ return haystack;
+ else
+ return haystack.substring(0, ndx)+replacement
+ + replaceStr(haystack.substring(ndx+needle.length()), needle, replacement);
}
-
+
private void haveText(String text) {
if (isLeadingTag && throwAwayLeadingPara) {
@@ -163,9 +163,9 @@ public final class HtmlRepairer {
boolean isAtomTag = tag.endsWith("/");
if (isEndTag && isAtomTag) {
- // got something like '</a/>' which is invalid.
- // suppose a close tag was intended.
- tag = tag.substring(0, tag.length()-1);
+ // got something like '</a/>' which is invalid.
+ // suppose a close tag was intended.
+ tag = tag.substring(0, tag.length()-1);
}
if (tag.length() < 1) {
@@ -177,19 +177,19 @@ public final class HtmlRepairer {
String tagAttributes = "";
for (int i=0; i<tagName.length(); ++i) {
- if (" \t\r\n".indexOf(tagName.charAt(i))>=0) {
- tagAttributes = tagName.substring(i).trim();
- tagName = tagName.substring(0, i);
- break;
- }
+ if (" \t\r\n".indexOf(tagName.charAt(i))>=0) {
+ tagAttributes = tagName.substring(i).trim();
+ tagName = tagName.substring(0, i);
+ break;
+ }
}
if (!isEndTag && tagName.indexOf('@')>0) {
- if (!noEmailWarn) {
- printWarning("Tag looks like email address: <"+tagName+">");
- }
- output.append("&lt;"+tag+"&gt;");
- return;
+ if (!noEmailWarn) {
+ printWarning("Tag looks like email address: <"+tagName+">");
+ }
+ output.append("&lt;"+tag+"&gt;");
+ return;
}
tagName = tagName.toLowerCase();
@@ -199,189 +199,189 @@ public final class HtmlRepairer {
}
if ("p".equals(tagName) || "br".equals(tagName) || "hr".equals(tagName)) {
- // throw away </p> and </br>
- if (isEndTag) {
- return;
- }
- // make sure every <p> is a <p/> and every <br> is a <br/>
- else if (!isAtomTag) {
- tag += "/";
- isAtomTag = true;
- }
+ // throw away </p> and </br>
+ if (isEndTag) {
+ return;
+ }
+ // make sure every <p> is a <p/> and every <br> is a <br/>
+ else if (!isAtomTag) {
+ tag += "/";
+ isAtomTag = true;
+ }
}
if (isEndTag) {
- // check whether this close tag is on the stack
- // if yes, close all tags up to this tag
- if (tagStack.contains(tagName)) {
- String popped;
- do {
- popped = (String)tagStack.pop();
- if (!popped.equals(tagName))
- printWarning("Inserting '</"+popped+">");
- output.append("</"+popped+">");
- }
- while (!popped.equals(tagName));
- }
- // if not, just throw it away
- else {
- printWarning("Deleting <"+tag+">");
- }
+ // check whether this close tag is on the stack
+ // if yes, close all tags up to this tag
+ if (tagStack.contains(tagName)) {
+ String popped;
+ do {
+ popped = (String)tagStack.pop();
+ if (!popped.equals(tagName))
+ printWarning("Inserting '</"+popped+">");
+ output.append("</"+popped+">");
+ }
+ while (!popped.equals(tagName));
+ }
+ // if not, just throw it away
+ else {
+ printWarning("Deleting <"+tag+">");
+ }
}
else {
- final int STATE_INITIAL = 1;
- final int STATE_EXPECT_ATTRIBUTENAME = 2;
- final int STATE_UNQUOTED_ATTRIBUTEVALUE = 3;
- final int STATE_SINGLEQUOTE_ATTRIBUTEVALUE = 4;
- final int STATE_DOUBLEQUOTE_ATTRIBUTEVALUE = 5;
- final int STATE_EXPECT_ATTRIBUTEVALUE = 6;
- final int STATE_EXPECT_EQUALSIGN = 7;
-
- int state = STATE_INITIAL;
-
- String newAttributes = "";
- String attributeName = null;
- StringBuffer buf = new StringBuffer();
-
- char[] attrsAsChars = tagAttributes.toCharArray();
- for (int i=0, ilim=attrsAsChars.length+1; i<ilim; ++i) {
- int c;
- if (i<attrsAsChars.length)
- c = (int)attrsAsChars[i];
- else
- c = -1;
-
- switch (state) {
-
- case STATE_INITIAL:
- if (" \t\r\n".indexOf(c)>=0){
- continue;
- }
- else if (-1==c) {
- continue;
- }
- else {
- state = STATE_EXPECT_ATTRIBUTENAME;
- buf.append((char)c);
- }
- break;
-
- case STATE_EXPECT_ATTRIBUTENAME:
- if ('='==c) {
- attributeName = buf.toString();
- buf.setLength(0);
- state = STATE_EXPECT_ATTRIBUTEVALUE;
- }
- else if (-1==c) {
- attributeName = buf.toString();
- buf.setLength(0);
- printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
- }
- else if (" \t\r\n".indexOf(c)>=0) {
- state = STATE_EXPECT_EQUALSIGN;
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_EXPECT_EQUALSIGN:
- if (" \t\r\n".indexOf(c)>=0){
- continue;
- }
- else if ('='==c) {
- state = STATE_EXPECT_ATTRIBUTEVALUE;
- attributeName = buf.toString();
- buf.setLength(0);
- }
- else {
- attributeName = buf.toString();
- buf.setLength(0);
- printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
- newAttributes += " "+attributeName+"=\""+attributeName+"\"";
- buf.append((char)c);
- state = STATE_EXPECT_ATTRIBUTENAME;
- }
- break;
-
- case STATE_EXPECT_ATTRIBUTEVALUE:
- if (" \t\r\n".indexOf(c)>=0){
- continue;
- }
- else if ('\"'==c) {
- state = STATE_DOUBLEQUOTE_ATTRIBUTEVALUE;
- }
- else if ('\''==c) {
- state = STATE_SINGLEQUOTE_ATTRIBUTEVALUE;
- }
- else {
- state = STATE_UNQUOTED_ATTRIBUTEVALUE;
- buf.append((char)c);
- }
- break;
-
- case STATE_UNQUOTED_ATTRIBUTEVALUE:
- if (-1==c || " \t\r\n".indexOf(c)>=0){
- state = STATE_INITIAL;
- newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
- buf.setLength(0);
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_SINGLEQUOTE_ATTRIBUTEVALUE:
- if ('\''==c) {
- state = STATE_INITIAL;
- newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
- buf.setLength(0);
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_DOUBLEQUOTE_ATTRIBUTEVALUE:
- if ('\"'==c) {
- state = STATE_INITIAL;
- newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
- buf.setLength(0);
- }
- else {
- buf.append((char)c);
- }
- break;
- }
- }
-
-
- if (!isAtomTag) {
-
- // check whether this open tag is equal to the topmost
- // entry on the stack; if yes, emit a close tag first
-
- // corrects stuff like '<tr><td>...<td>...');
- if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
- printWarning("Inserting </"+tagName+">");
- output.append("</"+tagName+">");
- tagStack.pop();
- }
- else {
- processKnownChildTags(tagName, tagStack, output);
- }
-
- // otherwise, we assume there are no close tags required
- // before this open tag.
- tagStack.push(tagName);
-
- output.append("<"+tagName+newAttributes+">");
- }
- else {
- output.append("<"+tagName+newAttributes+"/>");
- }
+ final int STATE_INITIAL = 1;
+ final int STATE_EXPECT_ATTRIBUTENAME = 2;
+ final int STATE_UNQUOTED_ATTRIBUTEVALUE = 3;
+ final int STATE_SINGLEQUOTE_ATTRIBUTEVALUE = 4;
+ final int STATE_DOUBLEQUOTE_ATTRIBUTEVALUE = 5;
+ final int STATE_EXPECT_ATTRIBUTEVALUE = 6;
+ final int STATE_EXPECT_EQUALSIGN = 7;
+
+ int state = STATE_INITIAL;
+
+ String newAttributes = "";
+ String attributeName = null;
+ StringBuffer buf = new StringBuffer();
+
+ char[] attrsAsChars = tagAttributes.toCharArray();
+ for (int i=0, ilim=attrsAsChars.length+1; i<ilim; ++i) {
+ int c;
+ if (i<attrsAsChars.length)
+ c = (int)attrsAsChars[i];
+ else
+ c = -1;
+
+ switch (state) {
+
+ case STATE_INITIAL:
+ if (" \t\r\n".indexOf(c)>=0){
+ continue;
+ }
+ else if (-1==c) {
+ continue;
+ }
+ else {
+ state = STATE_EXPECT_ATTRIBUTENAME;
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_EXPECT_ATTRIBUTENAME:
+ if ('='==c) {
+ attributeName = buf.toString();
+ buf.setLength(0);
+ state = STATE_EXPECT_ATTRIBUTEVALUE;
+ }
+ else if (-1==c) {
+ attributeName = buf.toString();
+ buf.setLength(0);
+ printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
+ }
+ else if (" \t\r\n".indexOf(c)>=0) {
+ state = STATE_EXPECT_EQUALSIGN;
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_EXPECT_EQUALSIGN:
+ if (" \t\r\n".indexOf(c)>=0){
+ continue;
+ }
+ else if ('='==c) {
+ state = STATE_EXPECT_ATTRIBUTEVALUE;
+ attributeName = buf.toString();
+ buf.setLength(0);
+ }
+ else {
+ attributeName = buf.toString();
+ buf.setLength(0);
+ printWarning("In Tag '"+tag+"':\nAttribute name without a value, inserting value =\""+attributeName+"\"");
+ newAttributes += " "+attributeName+"=\""+attributeName+"\"";
+ buf.append((char)c);
+ state = STATE_EXPECT_ATTRIBUTENAME;
+ }
+ break;
+
+ case STATE_EXPECT_ATTRIBUTEVALUE:
+ if (" \t\r\n".indexOf(c)>=0){
+ continue;
+ }
+ else if ('\"'==c) {
+ state = STATE_DOUBLEQUOTE_ATTRIBUTEVALUE;
+ }
+ else if ('\''==c) {
+ state = STATE_SINGLEQUOTE_ATTRIBUTEVALUE;
+ }
+ else {
+ state = STATE_UNQUOTED_ATTRIBUTEVALUE;
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_UNQUOTED_ATTRIBUTEVALUE:
+ if (-1==c || " \t\r\n".indexOf(c)>=0){
+ state = STATE_INITIAL;
+ newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
+ buf.setLength(0);
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_SINGLEQUOTE_ATTRIBUTEVALUE:
+ if ('\''==c) {
+ state = STATE_INITIAL;
+ newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
+ buf.setLength(0);
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_DOUBLEQUOTE_ATTRIBUTEVALUE:
+ if ('\"'==c) {
+ state = STATE_INITIAL;
+ newAttributes += " "+attributeName + "=\"" + buf.toString() + "\"";
+ buf.setLength(0);
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+ }
+ }
+
+
+ if (!isAtomTag) {
+
+ // check whether this open tag is equal to the topmost
+ // entry on the stack; if yes, emit a close tag first
+
+ // corrects stuff like '<tr><td>...<td>...');
+ if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
+ printWarning("Inserting </"+tagName+">");
+ output.append("</"+tagName+">");
+ tagStack.pop();
+ }
+ else {
+ processKnownChildTags(tagName, tagStack, output);
+ }
+
+ // otherwise, we assume there are no close tags required
+ // before this open tag.
+ tagStack.push(tagName);
+
+ output.append("<"+tagName+newAttributes+">");
+ }
+ else {
+ output.append("<"+tagName+newAttributes+"/>");
+ }
}
}
@@ -390,33 +390,33 @@ public final class HtmlRepairer {
TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName);
if (null != tagInfo) {
- String parentTag = null;
- for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
- String tag = (String)en.nextElement();
- if (tagInfo.isLegalParentTag(tag)) {
- parentTag = tag;
- break;
- }
- }
- if (parentTag != null) {
- while (((String)tagStack.peek()) != parentTag) {
- String poppedTagName = (String)tagStack.pop();
- output.append("</"+poppedTagName+">");
- printWarning("Inserting </"+poppedTagName+">");
- }
- return true;
- }
+ String parentTag = null;
+ for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
+ String tag = (String)en.nextElement();
+ if (tagInfo.isLegalParentTag(tag)) {
+ parentTag = tag;
+ break;
+ }
+ }
+ if (parentTag != null) {
+ while (((String)tagStack.peek()) != parentTag) {
+ String poppedTagName = (String)tagStack.pop();
+ output.append("</"+poppedTagName+">");
+ printWarning("Inserting </"+poppedTagName+">");
+ }
+ return true;
+ }
}
return false;
}
private void flush() {
-
+
// close all pending tags
while (!tagStack.isEmpty()) {
- String tagName = (String)tagStack.pop();
- printWarning("Inserting </"+tagName+">");
- output.append("</"+tagName+">");
+ String tagName = (String)tagStack.pop();
+ printWarning("Inserting </"+tagName+">");
+ output.append("</"+tagName+">");
}
}
@@ -429,16 +429,16 @@ public final class HtmlRepairer {
*
* Currently, the returned String is not guaranteed to be
* well-formed. In particular there are no checks on the tag
- * names, attribute names and entity names.
+ * names, attribute names and entity names.
*/
public String getWellformedHTML(String text) {
- final int STATE_INITIAL = 1;
- final int STATE_TAG_START = 2;
- final int STATE_TAG = 3;
- final int STATE_TAG_DOUBLEQUOTE = 4;
- final int STATE_TAG_SINGLEQUOTE = 5;
- final int STATE_AMP = 6;
+ final int STATE_INITIAL = 1;
+ final int STATE_TAG_START = 2;
+ final int STATE_TAG = 3;
+ final int STATE_TAG_DOUBLEQUOTE = 4;
+ final int STATE_TAG_SINGLEQUOTE = 5;
+ final int STATE_AMP = 6;
int state = STATE_INITIAL;
output.setLength(0);
@@ -446,74 +446,74 @@ public final class HtmlRepairer {
StringBuffer buf = new StringBuffer();
char[] textAsChars = text.toCharArray();
-
+
outer_loop:
for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) {
- int c;
-
- if (i<textAsChars.length) {
- c = textAsChars[i];
- }
- else {
- c = -1;
- }
-
- switch (state) {
-
- case STATE_INITIAL:
- if ('<'==c) {
- state = STATE_TAG_START;
- if (buf.length()>0) {
- haveText(buf.toString());
- buf.setLength(0);
- }
- }
- else if ('>'==c) {
- // assume this is a greater-than sign
- buf.append("&gt;");
- }
- else if ('&'==c) {
- state = STATE_AMP;
- }
- else if (-1==c) {
- if (buf.length()>0) {
- haveText(buf.toString());
- buf.setLength(0);
- }
- continue;
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_AMP:
- if ('<'==c) {
- buf.append("&amp;");
- state = STATE_TAG_START;
- if (buf.length()>0) {
- haveText(buf.toString());
- buf.setLength(0);
- }
- }
- else if ('>'==c) {
- // assume this is a greater-than sign
- buf.append("&amp;");
- buf.append("&gt;");
- state = STATE_INITIAL;
- }
- else if ('&'==c) {
- buf.append("&amp;");
- buf.append("&amp;");
- state = STATE_INITIAL;
- }
- else if (-1==c) {
- buf.append("&amp;");
- haveText(buf.toString());
- buf.setLength(0);
- state = STATE_INITIAL;
- continue;
- }
+ int c;
+
+ if (i<textAsChars.length) {
+ c = textAsChars[i];
+ }
+ else {
+ c = -1;
+ }
+
+ switch (state) {
+
+ case STATE_INITIAL:
+ if ('<'==c) {
+ state = STATE_TAG_START;
+ if (buf.length()>0) {
+ haveText(buf.toString());
+ buf.setLength(0);
+ }
+ }
+ else if ('>'==c) {
+ // assume this is a greater-than sign
+ buf.append("&gt;");
+ }
+ else if ('&'==c) {
+ state = STATE_AMP;
+ }
+ else if (-1==c) {
+ if (buf.length()>0) {
+ haveText(buf.toString());
+ buf.setLength(0);
+ }
+ continue;
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_AMP:
+ if ('<'==c) {
+ buf.append("&amp;");
+ state = STATE_TAG_START;
+ if (buf.length()>0) {
+ haveText(buf.toString());
+ buf.setLength(0);
+ }
+ }
+ else if ('>'==c) {
+ // assume this is a greater-than sign
+ buf.append("&amp;");
+ buf.append("&gt;");
+ state = STATE_INITIAL;
+ }
+ else if ('&'==c) {
+ buf.append("&amp;");
+ buf.append("&amp;");
+ state = STATE_INITIAL;
+ }
+ else if (-1==c) {
+ buf.append("&amp;");
+ haveText(buf.toString());
+ buf.setLength(0);
+ state = STATE_INITIAL;
+ continue;
+ }
else {
// peek forward and see whether this is a valid entity.
if ('#'==c) {
@@ -543,114 +543,114 @@ public final class HtmlRepairer {
}
buf.append("&amp;");
buf.append((char)c);
- state = STATE_INITIAL;
+ state = STATE_INITIAL;
}
/*
- else if ('#'==c || Character.isLetter((char)c)) {
- buf.append("&");
- buf.append((char)c);
- state = STATE_INITIAL;
- }
- else {
- buf.append("&amp;");
- buf.append((char)c);
- state = STATE_INITIAL;
- }
+ else if ('#'==c || Character.isLetter((char)c)) {
+ buf.append("&");
+ buf.append((char)c);
+ state = STATE_INITIAL;
+ }
+ else {
+ buf.append("&amp;");
+ buf.append((char)c);
+ state = STATE_INITIAL;
+ }
*/
- break;
-
- case STATE_TAG_START:
- if (" \t\r\n".indexOf(c)>=0) {
- //continue;
-
- // new: assume this is a less-sign
- haveText("&lt;"+c);
- state = STATE_INITIAL;
- }
- else if ('/'==c) {
- buf.append((char)c);
- state = STATE_TAG;
- }
- else if ('<'==c) {
- // assume this is a less-sign
- haveText("&lt;&lt;");
- state = STATE_INITIAL;
- }
- else if ('>'==c) {
- // assume this is a less-sign
- haveText("&lt;&gt;");
- state = STATE_INITIAL;
- }
- //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
- else if (!Character.isLetter((char)c)) {
- // assume this is a less-sign
- haveText("&lt;"+(char)c);
- state = STATE_INITIAL;
- }
- else {
- buf.append((char)c);
- state = STATE_TAG;
- }
- break;
-
- case STATE_TAG:
- if ('\"'==c) {
- buf.append((char)c);
- state = STATE_TAG_DOUBLEQUOTE;
- }
- else if ('\''==c) {
- buf.append((char)c);
- state = STATE_TAG_SINGLEQUOTE;
- }
- else if ('>'==c) {
- state = STATE_INITIAL;
- haveStartOrEndTag(buf.toString());
- buf.setLength(0);
- }
- else if ('<'==c) {
- // notify user, missing greater-than sign
- haveStartOrEndTag(buf.toString());
- buf.setLength(0);
- }
- else if (-1==c) {
- printWarning("Unclosed tag at end-of-comment: <"+buf);
- haveStartOrEndTag(buf.toString());
- buf.setLength(0);
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_TAG_DOUBLEQUOTE:
- if ('\"'==c) {
- buf.append((char)c);
- state = STATE_TAG;
- }
- else if (-1==c) {
- printWarning("Unclosed attribute value at end-of-comment.");
- haveStartOrEndTag(buf.toString()+"\"");
- }
- else {
- buf.append((char)c);
- }
- break;
-
- case STATE_TAG_SINGLEQUOTE:
- if ('\''==c) {
- buf.append((char)c);
- state = STATE_TAG;
- }
- else if (-1==c) {
- printWarning("Unclosed attribute value at end-of-comment.");
- haveStartOrEndTag(buf.toString()+"'");
- }
- else {
- buf.append((char)c);
- }
- break;
- }
+ break;
+
+ case STATE_TAG_START:
+ if (" \t\r\n".indexOf(c)>=0) {
+ //continue;
+
+ // new: assume this is a less-sign
+ haveText("&lt;"+c);
+ state = STATE_INITIAL;
+ }
+ else if ('/'==c) {
+ buf.append((char)c);
+ state = STATE_TAG;
+ }
+ else if ('<'==c) {
+ // assume this is a less-sign
+ haveText("&lt;&lt;");
+ state = STATE_INITIAL;
+ }
+ else if ('>'==c) {
+ // assume this is a less-sign
+ haveText("&lt;&gt;");
+ state = STATE_INITIAL;
+ }
+ //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
+ else if (!Character.isLetter((char)c)) {
+ // assume this is a less-sign
+ haveText("&lt;"+(char)c);
+ state = STATE_INITIAL;
+ }
+ else {
+ buf.append((char)c);
+ state = STATE_TAG;
+ }
+ break;
+
+ case STATE_TAG:
+ if ('\"'==c) {
+ buf.append((char)c);
+ state = STATE_TAG_DOUBLEQUOTE;
+ }
+ else if ('\''==c) {
+ buf.append((char)c);
+ state = STATE_TAG_SINGLEQUOTE;
+ }
+ else if ('>'==c) {
+ state = STATE_INITIAL;
+ haveStartOrEndTag(buf.toString());
+ buf.setLength(0);
+ }
+ else if ('<'==c) {
+ // notify user, missing greater-than sign
+ haveStartOrEndTag(buf.toString());
+ buf.setLength(0);
+ }
+ else if (-1==c) {
+ printWarning("Unclosed tag at end-of-comment: <"+buf);
+ haveStartOrEndTag(buf.toString());
+ buf.setLength(0);
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_TAG_DOUBLEQUOTE:
+ if ('\"'==c) {
+ buf.append((char)c);
+ state = STATE_TAG;
+ }
+ else if (-1==c) {
+ printWarning("Unclosed attribute value at end-of-comment.");
+ haveStartOrEndTag(buf.toString()+"\"");
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+
+ case STATE_TAG_SINGLEQUOTE:
+ if ('\''==c) {
+ buf.append((char)c);
+ state = STATE_TAG;
+ }
+ else if (-1==c) {
+ printWarning("Unclosed attribute value at end-of-comment.");
+ haveStartOrEndTag(buf.toString()+"'");
+ }
+ else {
+ buf.append((char)c);
+ }
+ break;
+ }
}
return output.toString();
@@ -688,4 +688,3 @@ public final class HtmlRepairer {
return output.toString();
}
}
-