summaryrefslogtreecommitdiff
path: root/gnu/java/util/regex
diff options
context:
space:
mode:
authorAndrew John Hughes <gnu_andrew@member.fsf.org>2006-09-05 20:47:38 +0000
committerAndrew John Hughes <gnu_andrew@member.fsf.org>2006-09-05 20:47:38 +0000
commit228fc33f661314b6723b691635085ee4ff6a69b2 (patch)
tree50e78696593f8df21fe9330258dff98537d39f65 /gnu/java/util/regex
parentf090c35d5776db64813ccf19d8327cf0ae756ac7 (diff)
downloadclasspath-228fc33f661314b6723b691635085ee4ff6a69b2.tar.gz
2006-09-05 Andrew John Hughes <gnu_andrew@member.fsf.org>
* Merge of HEAD-->generics from 2006/08/12 to 2006/09/03.
Diffstat (limited to 'gnu/java/util/regex')
-rw-r--r--gnu/java/util/regex/CharIndexed.java17
-rw-r--r--gnu/java/util/regex/CharIndexedCharSequence.java13
-rw-r--r--gnu/java/util/regex/CharIndexedInputStream.java14
-rw-r--r--gnu/java/util/regex/RE.java26
-rw-r--r--gnu/java/util/regex/REMatch.java4
-rw-r--r--gnu/java/util/regex/REToken.java15
-rw-r--r--gnu/java/util/regex/RETokenChar.java12
-rw-r--r--gnu/java/util/regex/RETokenEnd.java15
-rw-r--r--gnu/java/util/regex/RETokenEndSub.java4
-rw-r--r--gnu/java/util/regex/RETokenOneOf.java1
-rw-r--r--gnu/java/util/regex/RETokenRepeated.java1
11 files changed, 114 insertions, 8 deletions
diff --git a/gnu/java/util/regex/CharIndexed.java b/gnu/java/util/regex/CharIndexed.java
index 6cd857e3b..27e07b2f8 100644
--- a/gnu/java/util/regex/CharIndexed.java
+++ b/gnu/java/util/regex/CharIndexed.java
@@ -77,6 +77,13 @@ public interface CharIndexed {
boolean move(int index);
/**
+ * Shifts the input buffer by a given number of positions. Returns
+ * true if the new cursor position is valid or cursor position is at
+ * the end of input.
+ */
+ boolean move1(int index); // I cannot think of a better name for this.
+
+ /**
* Returns true if the most recent move() operation placed the cursor
* position at a valid position in the input.
*/
@@ -105,6 +112,16 @@ public interface CharIndexed {
REMatch getLastMatch();
/**
+ * Sets the information used for hitEnd().
+ */
+ void setHitEnd(REMatch match);
+
+ /**
+ * Returns whether the matcher has hit the end of input.
+ */
+ boolean hitEnd();
+
+ /**
* Returns the anchor.
*/
int getAnchor();
diff --git a/gnu/java/util/regex/CharIndexedCharSequence.java b/gnu/java/util/regex/CharIndexedCharSequence.java
index 2eb753b0f..8a0578eb8 100644
--- a/gnu/java/util/regex/CharIndexedCharSequence.java
+++ b/gnu/java/util/regex/CharIndexedCharSequence.java
@@ -62,6 +62,10 @@ class CharIndexedCharSequence implements CharIndexed, Serializable {
return ((anchor += index) < len);
}
+ public boolean move1(int index) {
+ return ((anchor += index) <= len);
+ }
+
public CharIndexed lookBehind(int index, int length) {
if (length > (anchor + index)) length = anchor + index;
return new CharIndexedCharSequence(s, anchor + index - length);
@@ -77,6 +81,15 @@ class CharIndexedCharSequence implements CharIndexed, Serializable {
lastMatch.anchor = anchor;
}
public REMatch getLastMatch() { return lastMatch; }
+
+ private int rightmostTriedPosition = 0;
+ public void setHitEnd(REMatch match) {
+ int pos = anchor + match.index;
+ if (pos > rightmostTriedPosition) rightmostTriedPosition = pos;
+ }
+ public boolean hitEnd() { return rightmostTriedPosition >= len; }
+
public int getAnchor() { return anchor; }
public void setAnchor(int anchor) { this.anchor = anchor; }
+
}
diff --git a/gnu/java/util/regex/CharIndexedInputStream.java b/gnu/java/util/regex/CharIndexedInputStream.java
index 77cd1abd5..844fada51 100644
--- a/gnu/java/util/regex/CharIndexedInputStream.java
+++ b/gnu/java/util/regex/CharIndexedInputStream.java
@@ -166,6 +166,16 @@ class CharIndexedInputStream implements CharIndexed {
"difficult to support getLastMatch for an input stream");
}
+ public void setHitEnd(REMatch match) {
+ throw new UnsupportedOperationException(
+ "difficult to support setHitEnd for an input stream");
+ }
+
+ public boolean hitEnd() {
+ throw new UnsupportedOperationException(
+ "difficult to support hitEnd for an input stream");
+ }
+
public int getAnchor() {
throw new UnsupportedOperationException(
"difficult to support getAnchor for an input stream");
@@ -176,6 +186,10 @@ class CharIndexedInputStream implements CharIndexed {
"difficult to support setAnchor for an input stream");
}
+ public boolean move1(int index) {
+ throw new UnsupportedOperationException(
+ "difficult to support move1 for an input stream");
+ }
}
diff --git a/gnu/java/util/regex/RE.java b/gnu/java/util/regex/RE.java
index 1aab3b781..94aa0142c 100644
--- a/gnu/java/util/regex/RE.java
+++ b/gnu/java/util/regex/RE.java
@@ -252,6 +252,13 @@ public class RE extends REToken {
*/
public static final int REG_ICASE_USASCII = 0x0800;
+ /**
+ * Execution flag.
+ * Do not move the position at which the search begins. If not set,
+ * the starting position will be moved until a match is found.
+ */
+ public static final int REG_FIX_STARTING_POSITION = 0x1000;
+
/** Returns a string representing the version of the gnu.regexp package. */
public static final String version() {
return VERSION;
@@ -1643,6 +1650,7 @@ public class RE extends REToken {
/* Implements abstract method REToken.match() */
boolean match(CharIndexed input, REMatch mymatch) {
+ input.setHitEnd(mymatch);
if (firstToken == null) {
return next(input, mymatch);
}
@@ -1720,15 +1728,23 @@ public class RE extends REToken {
REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0);
+ boolean doMove = ((eflags & REG_FIX_STARTING_POSITION) == 0);
RE re = (tryEntireMatch ? (RE) this.clone() : this);
if (tryEntireMatch) {
- re.chain(new RETokenEnd(0, null));
+ RETokenEnd reEnd = new RETokenEnd(0, null);
+ reEnd.setFake(true);
+ re.chain(reEnd);
}
// Create a new REMatch to hold results
REMatch mymatch = new REMatch(numSubs, anchor, eflags);
do {
+ /* The following potimization is commented out because
+ the matching should be tried even if the length of
+ input is obviously too short in order that
+ java.util.regex.Matcher#hitEnd() may work correctly.
// Optimization: check if anchor + minimumLength > length
if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
+ */
if (re.match(input, mymatch)) {
REMatch best = mymatch;
// We assume that the match that coms first is the best.
@@ -1749,13 +1765,17 @@ public class RE extends REToken {
input.setLastMatch(best);
return best;
}
- }
+ /* End of the optimization commented out
+ }
+ */
mymatch.clear(++anchor);
// Append character to buffer if needed
if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
buffer.append(input.charAt(0));
}
- } while (input.move(1));
+ // java.util.regex.Matcher#hitEnd() requires that the search should
+ // be tried at the end of input, so we use move1(1) instead of move(1)
+ } while (doMove && input.move1(1));
// Special handling at end of input for e.g. "$"
if (minimumLength == 0) {
diff --git a/gnu/java/util/regex/REMatch.java b/gnu/java/util/regex/REMatch.java
index 3ff5ad794..d89948293 100644
--- a/gnu/java/util/regex/REMatch.java
+++ b/gnu/java/util/regex/REMatch.java
@@ -307,12 +307,12 @@ public final class REMatch implements Serializable, Cloneable {
}
/* The following are used for debugging purpose
- static String d(REMatch m) {
+ public static String d(REMatch m) {
if (m == null) return "null";
else return "[" + m.index + "]";
}
- String substringUptoIndex(CharIndexed input) {
+ public String substringUptoIndex(CharIndexed input) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < index; i++) {
sb.append(input.charAt(i));
diff --git a/gnu/java/util/regex/REToken.java b/gnu/java/util/regex/REToken.java
index 155c01878..9affd4ee3 100644
--- a/gnu/java/util/regex/REToken.java
+++ b/gnu/java/util/regex/REToken.java
@@ -72,6 +72,16 @@ abstract class REToken implements Serializable, Cloneable {
/** Returns true if the match succeeded, false if it failed. */
boolean match(CharIndexed input, REMatch mymatch) {
+ return match(input, mymatch, false);
+ }
+ boolean matchFake(CharIndexed input, REMatch mymatch) {
+ return match(input, mymatch, true);
+ }
+
+ private boolean match(CharIndexed input, REMatch mymatch, boolean fake) {
+ if (!fake) {
+ setHitEnd(input, mymatch);
+ }
REMatch m = matchThis(input, mymatch);
if (m == null) return false;
if (next(input, m)) {
@@ -81,6 +91,11 @@ abstract class REToken implements Serializable, Cloneable {
return false;
}
+ /** Sets whether the matching occurs at the end of input */
+ void setHitEnd(CharIndexed input, REMatch mymatch) {
+ input.setHitEnd(mymatch);
+ }
+
/** Returns true if the match succeeded, false if it failed.
* The matching is done against this REToken only. Chained
* tokens are not checked.
diff --git a/gnu/java/util/regex/RETokenChar.java b/gnu/java/util/regex/RETokenChar.java
index 92d3efcf8..b70e6b1d8 100644
--- a/gnu/java/util/regex/RETokenChar.java
+++ b/gnu/java/util/regex/RETokenChar.java
@@ -58,15 +58,20 @@ final class RETokenChar extends REToken {
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
- int z = ch.length;
if (matchOneString(input, mymatch.index)) {
- mymatch.index += z;
+ mymatch.index += matchedLength;
return mymatch;
}
+ // java.util.regex.Matcher#hitEnd() requires that the length of
+ // partial match be counted.
+ mymatch.index += matchedLength;
+ input.setHitEnd(mymatch);
return null;
}
- boolean matchOneString(CharIndexed input, int index) {
+ private int matchedLength;
+ private boolean matchOneString(CharIndexed input, int index) {
+ matchedLength = 0;
int z = ch.length;
char c;
for (int i=0; i<z; i++) {
@@ -74,6 +79,7 @@ final class RETokenChar extends REToken {
if (! charEquals(c, ch[i])) {
return false;
}
+ ++matchedLength;
}
return true;
}
diff --git a/gnu/java/util/regex/RETokenEnd.java b/gnu/java/util/regex/RETokenEnd.java
index 00efdb6a7..294e32085 100644
--- a/gnu/java/util/regex/RETokenEnd.java
+++ b/gnu/java/util/regex/RETokenEnd.java
@@ -45,6 +45,12 @@ final class RETokenEnd extends REToken {
private String newline;
private boolean check_java_line_terminators;
+ /**
+ * Indicates whether this token is a real one generated at compile time,
+ * or a fake one temporarily added by RE#getMatchImpl.
+ */
+ private boolean fake = false;
+
RETokenEnd(int subIndex,String newline) {
super(subIndex);
this.newline = newline;
@@ -57,10 +63,19 @@ final class RETokenEnd extends REToken {
this.check_java_line_terminators = b;
}
+ void setFake(boolean fake) {
+ this.fake = fake;
+ }
+
int getMaximumLength() {
return 0;
}
+ boolean match(CharIndexed input, REMatch mymatch) {
+ if (!fake) return super.match(input, mymatch);
+ return super.matchFake(input, mymatch);
+ }
+
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
if (ch == CharIndexed.OUT_OF_BOUNDS)
diff --git a/gnu/java/util/regex/RETokenEndSub.java b/gnu/java/util/regex/RETokenEndSub.java
index 57a146d03..b3a28a3e8 100644
--- a/gnu/java/util/regex/RETokenEndSub.java
+++ b/gnu/java/util/regex/RETokenEndSub.java
@@ -58,6 +58,10 @@ final class RETokenEndSub extends REToken {
return super.findMatch(input, mymatch);
}
+ void setHitEnd(CharIndexed input, REMatch mymatch) {
+ // Do nothing
+ }
+
void dump(StringBuffer os) {
// handled by RE
// But add something for debugging.
diff --git a/gnu/java/util/regex/RETokenOneOf.java b/gnu/java/util/regex/RETokenOneOf.java
index bccc78311..239c2201c 100644
--- a/gnu/java/util/regex/RETokenOneOf.java
+++ b/gnu/java/util/regex/RETokenOneOf.java
@@ -120,6 +120,7 @@ final class RETokenOneOf extends REToken {
}
boolean match(CharIndexed input, REMatch mymatch) {
+ setHitEnd(input, mymatch);
if (matchesOneChar) return matchOneChar(input, mymatch);
else return matchOneRE(input, mymatch);
}
diff --git a/gnu/java/util/regex/RETokenRepeated.java b/gnu/java/util/regex/RETokenRepeated.java
index 531c4a311..b32a316c4 100644
--- a/gnu/java/util/regex/RETokenRepeated.java
+++ b/gnu/java/util/regex/RETokenRepeated.java
@@ -318,6 +318,7 @@ final class RETokenRepeated extends REToken {
}
boolean match(CharIndexed input, REMatch mymatch) {
+ setHitEnd(input, mymatch);
REMatch m1 = findMatch(input, mymatch);
if (m1 != null) {
mymatch.assignFrom(m1);