diff options
author | Ito Kazumitsu <kaz@maczuka.gcd.org> | 2006-08-20 14:47:30 +0000 |
---|---|---|
committer | Ito Kazumitsu <kaz@maczuka.gcd.org> | 2006-08-20 14:47:30 +0000 |
commit | cc4b3d34f0b1854867f160727dcfc78bcb90442c (patch) | |
tree | 0e92990411754ff5bab246005a48417fff21b683 /gnu | |
parent | 94103ccfadf1a006fbd18e4751e5a8394403aed4 (diff) | |
download | classpath-cc4b3d34f0b1854867f160727dcfc78bcb90442c.tar.gz |
2006-08-20 Ito Kazumitsu <kaz@maczuka.gcd.org>
Fixes bug #28412
* gnu/java/util/regex/CharIndexed.java(move1, setHitEnd, hitEnd):
New methods.
* gnu/java/util/regex/CharIndexedCharSequence.java,
gnu/java/util/regex/CharIndexedInputStream.java: Implemented the
new methods above.
* gnu/java/util/regex/RE.java(REG_FIX_STARTING_POSITION): New flag,
(match): call the new method setHitEnd of the input,
(getMatchImpl): Handle the new flag REG_FIX_STARTING_POSITION,
Some optimization commented out, Use CharIndexed#move1 instead of move.
* gnu/java/util/regex/REMatch.java: Made some debugging methods public.
* gnu/java/util/regex/REToken.java(match): The method body has been
moved to an internal private method, (matchFake): New method,
(setHitEnd): New method.
* gnu/java/util/regex/RETokenChar.java(matchThis): Call setHitEnd
if the match is not complete, (matchOneString): Count the number of
characters which matched the pattern.
* gnu/java/util/regex/RETokenEnd.java(fake): New field,
(setFake): New method, (match): Call super.match or super.matchFake.
* gnu/java/util/regex/RETokenEndSub.java(setHitEnd): New method.
* gnu/java/util/regex/RETokenOneOf.java(match): call the new method
setHitEnd of the input,
* gnu/java/util/regex/RETokenRepeated.java(match): Likewise.
* java/util/regex/Matcher.java(lookingAt, match): Use the new flag
RE.REG_FIX_STARTING_POSITION, (hitEnd, toString): New methods.
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/java/util/regex/CharIndexed.java | 17 | ||||
-rw-r--r-- | gnu/java/util/regex/CharIndexedCharSequence.java | 13 | ||||
-rw-r--r-- | gnu/java/util/regex/CharIndexedInputStream.java | 14 | ||||
-rw-r--r-- | gnu/java/util/regex/RE.java | 26 | ||||
-rw-r--r-- | gnu/java/util/regex/REMatch.java | 4 | ||||
-rw-r--r-- | gnu/java/util/regex/REToken.java | 15 | ||||
-rw-r--r-- | gnu/java/util/regex/RETokenChar.java | 12 | ||||
-rw-r--r-- | gnu/java/util/regex/RETokenEnd.java | 15 | ||||
-rw-r--r-- | gnu/java/util/regex/RETokenEndSub.java | 4 | ||||
-rw-r--r-- | gnu/java/util/regex/RETokenOneOf.java | 1 | ||||
-rw-r--r-- | gnu/java/util/regex/RETokenRepeated.java | 1 |
11 files changed, 114 insertions, 8 deletions
diff --git a/gnu/java/util/regex/CharIndexed.java b/gnu/java/util/regex/CharIndexed.java index 6cd857e3b..27e07b2f8 100644 --- a/gnu/java/util/regex/CharIndexed.java +++ b/gnu/java/util/regex/CharIndexed.java @@ -77,6 +77,13 @@ public interface CharIndexed { boolean move(int index); /** + * Shifts the input buffer by a given number of positions. Returns + * true if the new cursor position is valid or cursor position is at + * the end of input. + */ + boolean move1(int index); // I cannot think of a better name for this. + + /** * Returns true if the most recent move() operation placed the cursor * position at a valid position in the input. */ @@ -105,6 +112,16 @@ public interface CharIndexed { REMatch getLastMatch(); /** + * Sets the information used for hitEnd(). + */ + void setHitEnd(REMatch match); + + /** + * Returns whether the matcher has hit the end of input. + */ + boolean hitEnd(); + + /** * Returns the anchor. */ int getAnchor(); diff --git a/gnu/java/util/regex/CharIndexedCharSequence.java b/gnu/java/util/regex/CharIndexedCharSequence.java index 2eb753b0f..8a0578eb8 100644 --- a/gnu/java/util/regex/CharIndexedCharSequence.java +++ b/gnu/java/util/regex/CharIndexedCharSequence.java @@ -62,6 +62,10 @@ class CharIndexedCharSequence implements CharIndexed, Serializable { return ((anchor += index) < len); } + public boolean move1(int index) { + return ((anchor += index) <= len); + } + public CharIndexed lookBehind(int index, int length) { if (length > (anchor + index)) length = anchor + index; return new CharIndexedCharSequence(s, anchor + index - length); @@ -77,6 +81,15 @@ class CharIndexedCharSequence implements CharIndexed, Serializable { lastMatch.anchor = anchor; } public REMatch getLastMatch() { return lastMatch; } + + private int rightmostTriedPosition = 0; + public void setHitEnd(REMatch match) { + int pos = anchor + match.index; + if (pos > rightmostTriedPosition) rightmostTriedPosition = pos; + } + public boolean hitEnd() { return rightmostTriedPosition >= len; } + public int getAnchor() { return anchor; } public void setAnchor(int anchor) { this.anchor = anchor; } + } diff --git a/gnu/java/util/regex/CharIndexedInputStream.java b/gnu/java/util/regex/CharIndexedInputStream.java index 77cd1abd5..844fada51 100644 --- a/gnu/java/util/regex/CharIndexedInputStream.java +++ b/gnu/java/util/regex/CharIndexedInputStream.java @@ -166,6 +166,16 @@ class CharIndexedInputStream implements CharIndexed { "difficult to support getLastMatch for an input stream"); } + public void setHitEnd(REMatch match) { + throw new UnsupportedOperationException( + "difficult to support setHitEnd for an input stream"); + } + + public boolean hitEnd() { + throw new UnsupportedOperationException( + "difficult to support hitEnd for an input stream"); + } + public int getAnchor() { throw new UnsupportedOperationException( "difficult to support getAnchor for an input stream"); @@ -176,6 +186,10 @@ class CharIndexedInputStream implements CharIndexed { "difficult to support setAnchor for an input stream"); } + public boolean move1(int index) { + throw new UnsupportedOperationException( + "difficult to support move1 for an input stream"); + } } diff --git a/gnu/java/util/regex/RE.java b/gnu/java/util/regex/RE.java index 1aab3b781..94aa0142c 100644 --- a/gnu/java/util/regex/RE.java +++ b/gnu/java/util/regex/RE.java @@ -252,6 +252,13 @@ public class RE extends REToken { */ public static final int REG_ICASE_USASCII = 0x0800; + /** + * Execution flag. + * Do not move the position at which the search begins. If not set, + * the starting position will be moved until a match is found. + */ + public static final int REG_FIX_STARTING_POSITION = 0x1000; + /** Returns a string representing the version of the gnu.regexp package. */ public static final String version() { return VERSION; @@ -1643,6 +1650,7 @@ public class RE extends REToken { /* Implements abstract method REToken.match() */ boolean match(CharIndexed input, REMatch mymatch) { + input.setHitEnd(mymatch); if (firstToken == null) { return next(input, mymatch); } @@ -1720,15 +1728,23 @@ public class RE extends REToken { REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) { boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0); + boolean doMove = ((eflags & REG_FIX_STARTING_POSITION) == 0); RE re = (tryEntireMatch ? (RE) this.clone() : this); if (tryEntireMatch) { - re.chain(new RETokenEnd(0, null)); + RETokenEnd reEnd = new RETokenEnd(0, null); + reEnd.setFake(true); + re.chain(reEnd); } // Create a new REMatch to hold results REMatch mymatch = new REMatch(numSubs, anchor, eflags); do { + /* The following potimization is commented out because + the matching should be tried even if the length of + input is obviously too short in order that + java.util.regex.Matcher#hitEnd() may work correctly. // Optimization: check if anchor + minimumLength > length if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) { + */ if (re.match(input, mymatch)) { REMatch best = mymatch; // We assume that the match that coms first is the best. @@ -1749,13 +1765,17 @@ public class RE extends REToken { input.setLastMatch(best); return best; } - } + /* End of the optimization commented out + } + */ mymatch.clear(++anchor); // Append character to buffer if needed if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) { buffer.append(input.charAt(0)); } - } while (input.move(1)); + // java.util.regex.Matcher#hitEnd() requires that the search should + // be tried at the end of input, so we use move1(1) instead of move(1) + } while (doMove && input.move1(1)); // Special handling at end of input for e.g. "$" if (minimumLength == 0) { diff --git a/gnu/java/util/regex/REMatch.java b/gnu/java/util/regex/REMatch.java index 3ff5ad794..d89948293 100644 --- a/gnu/java/util/regex/REMatch.java +++ b/gnu/java/util/regex/REMatch.java @@ -307,12 +307,12 @@ public final class REMatch implements Serializable, Cloneable { } /* The following are used for debugging purpose - static String d(REMatch m) { + public static String d(REMatch m) { if (m == null) return "null"; else return "[" + m.index + "]"; } - String substringUptoIndex(CharIndexed input) { + public String substringUptoIndex(CharIndexed input) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < index; i++) { sb.append(input.charAt(i)); diff --git a/gnu/java/util/regex/REToken.java b/gnu/java/util/regex/REToken.java index 155c01878..9affd4ee3 100644 --- a/gnu/java/util/regex/REToken.java +++ b/gnu/java/util/regex/REToken.java @@ -72,6 +72,16 @@ abstract class REToken implements Serializable, Cloneable { /** Returns true if the match succeeded, false if it failed. */ boolean match(CharIndexed input, REMatch mymatch) { + return match(input, mymatch, false); + } + boolean matchFake(CharIndexed input, REMatch mymatch) { + return match(input, mymatch, true); + } + + private boolean match(CharIndexed input, REMatch mymatch, boolean fake) { + if (!fake) { + setHitEnd(input, mymatch); + } REMatch m = matchThis(input, mymatch); if (m == null) return false; if (next(input, m)) { @@ -81,6 +91,11 @@ abstract class REToken implements Serializable, Cloneable { return false; } + /** Sets whether the matching occurs at the end of input */ + void setHitEnd(CharIndexed input, REMatch mymatch) { + input.setHitEnd(mymatch); + } + /** Returns true if the match succeeded, false if it failed. * The matching is done against this REToken only. Chained * tokens are not checked. diff --git a/gnu/java/util/regex/RETokenChar.java b/gnu/java/util/regex/RETokenChar.java index 92d3efcf8..b70e6b1d8 100644 --- a/gnu/java/util/regex/RETokenChar.java +++ b/gnu/java/util/regex/RETokenChar.java @@ -58,15 +58,20 @@ final class RETokenChar extends REToken { } REMatch matchThis(CharIndexed input, REMatch mymatch) { - int z = ch.length; if (matchOneString(input, mymatch.index)) { - mymatch.index += z; + mymatch.index += matchedLength; return mymatch; } + // java.util.regex.Matcher#hitEnd() requires that the length of + // partial match be counted. + mymatch.index += matchedLength; + input.setHitEnd(mymatch); return null; } - boolean matchOneString(CharIndexed input, int index) { + private int matchedLength; + private boolean matchOneString(CharIndexed input, int index) { + matchedLength = 0; int z = ch.length; char c; for (int i=0; i<z; i++) { @@ -74,6 +79,7 @@ final class RETokenChar extends REToken { if (! charEquals(c, ch[i])) { return false; } + ++matchedLength; } return true; } diff --git a/gnu/java/util/regex/RETokenEnd.java b/gnu/java/util/regex/RETokenEnd.java index 00efdb6a7..294e32085 100644 --- a/gnu/java/util/regex/RETokenEnd.java +++ b/gnu/java/util/regex/RETokenEnd.java @@ -45,6 +45,12 @@ final class RETokenEnd extends REToken { private String newline; private boolean check_java_line_terminators; + /** + * Indicates whether this token is a real one generated at compile time, + * or a fake one temporarily added by RE#getMatchImpl. + */ + private boolean fake = false; + RETokenEnd(int subIndex,String newline) { super(subIndex); this.newline = newline; @@ -57,10 +63,19 @@ final class RETokenEnd extends REToken { this.check_java_line_terminators = b; } + void setFake(boolean fake) { + this.fake = fake; + } + int getMaximumLength() { return 0; } + boolean match(CharIndexed input, REMatch mymatch) { + if (!fake) return super.match(input, mymatch); + return super.matchFake(input, mymatch); + } + REMatch matchThis(CharIndexed input, REMatch mymatch) { char ch = input.charAt(mymatch.index); if (ch == CharIndexed.OUT_OF_BOUNDS) diff --git a/gnu/java/util/regex/RETokenEndSub.java b/gnu/java/util/regex/RETokenEndSub.java index 57a146d03..b3a28a3e8 100644 --- a/gnu/java/util/regex/RETokenEndSub.java +++ b/gnu/java/util/regex/RETokenEndSub.java @@ -58,6 +58,10 @@ final class RETokenEndSub extends REToken { return super.findMatch(input, mymatch); } + void setHitEnd(CharIndexed input, REMatch mymatch) { + // Do nothing + } + void dump(StringBuffer os) { // handled by RE // But add something for debugging. diff --git a/gnu/java/util/regex/RETokenOneOf.java b/gnu/java/util/regex/RETokenOneOf.java index bccc78311..239c2201c 100644 --- a/gnu/java/util/regex/RETokenOneOf.java +++ b/gnu/java/util/regex/RETokenOneOf.java @@ -120,6 +120,7 @@ final class RETokenOneOf extends REToken { } boolean match(CharIndexed input, REMatch mymatch) { + setHitEnd(input, mymatch); if (matchesOneChar) return matchOneChar(input, mymatch); else return matchOneRE(input, mymatch); } diff --git a/gnu/java/util/regex/RETokenRepeated.java b/gnu/java/util/regex/RETokenRepeated.java index 531c4a311..b32a316c4 100644 --- a/gnu/java/util/regex/RETokenRepeated.java +++ b/gnu/java/util/regex/RETokenRepeated.java @@ -318,6 +318,7 @@ final class RETokenRepeated extends REToken { } boolean match(CharIndexed input, REMatch mymatch) { + setHitEnd(input, mymatch); REMatch m1 = findMatch(input, mymatch); if (m1 != null) { mymatch.assignFrom(m1); |