summaryrefslogtreecommitdiff
path: root/libjava/classpath/java/util/regex/Matcher.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/java/util/regex/Matcher.java')
-rw-r--r--libjava/classpath/java/util/regex/Matcher.java289
1 files changed, 280 insertions, 9 deletions
diff --git a/libjava/classpath/java/util/regex/Matcher.java b/libjava/classpath/java/util/regex/Matcher.java
index bf833673b0e..50cb065f287 100644
--- a/libjava/classpath/java/util/regex/Matcher.java
+++ b/libjava/classpath/java/util/regex/Matcher.java
@@ -38,6 +38,8 @@ exception statement from your version. */
package java.util.regex;
+import gnu.java.lang.CPStringBuilder;
+
import gnu.java.util.regex.CharIndexed;
import gnu.java.util.regex.RE;
import gnu.java.util.regex.REMatch;
@@ -59,11 +61,45 @@ public final class Matcher implements MatchResult
private int appendPosition;
private REMatch match;
+ /**
+ * The start of the region of the input on which to match.
+ */
+ private int regionStart;
+
+ /**
+ * The end of the region of the input on which to match.
+ */
+ private int regionEnd;
+
+ /**
+ * True if the match process should look beyond the
+ * region marked by regionStart to regionEnd when
+ * performing lookAhead, lookBehind and boundary
+ * matching.
+ */
+ private boolean transparentBounds;
+
+ /**
+ * The flags that affect the anchoring bounds.
+ * If {@link #hasAnchoringBounds()} is {@code true},
+ * the match process will honour the
+ * anchoring bounds: ^, \A, \Z, \z and $. If
+ * {@link #hasAnchoringBounds()} is {@code false},
+ * the anchors are ignored and appropriate flags,
+ * stored in this variable, are used to provide this
+ * behaviour.
+ */
+ private int anchoringBounds;
+
Matcher(Pattern pattern, CharSequence input)
{
this.pattern = pattern;
this.input = input;
this.inputCharIndexed = RE.makeCharIndexed(input, 0);
+ regionStart = 0;
+ regionEnd = input.length();
+ transparentBounds = false;
+ anchoringBounds = 0;
}
/**
@@ -125,13 +161,17 @@ public final class Matcher implements MatchResult
public boolean find ()
{
boolean first = (match == null);
- match = pattern.getRE().getMatch(inputCharIndexed, position);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
+ position, anchoringBounds);
if (match != null)
{
int endIndex = match.getEndIndex();
// Are we stuck at the same position?
if (!first && endIndex == position)
- {
+ {
match = null;
// Not at the end of the input yet?
if (position < input.length() - 1)
@@ -156,7 +196,11 @@ public final class Matcher implements MatchResult
*/
public boolean find (int start)
{
- match = pattern.getRE().getMatch(inputCharIndexed, start);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
+ start, anchoringBounds);
if (match != null)
{
position = match.getEndIndex();
@@ -218,7 +262,12 @@ public final class Matcher implements MatchResult
public boolean lookingAt ()
{
- match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_FIX_STARTING_POSITION, null);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+ anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
+ anchoringBounds|RE.REG_FIX_STARTING_POSITION);
if (match != null)
{
if (match.getStartIndex() == 0)
@@ -243,7 +292,12 @@ public final class Matcher implements MatchResult
*/
public boolean matches ()
{
- match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION, null);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+ anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
+ anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
if (match != null)
{
if (match.getStartIndex() == 0)
@@ -265,15 +319,40 @@ public final class Matcher implements MatchResult
return pattern;
}
+ /**
+ * Resets the internal state of the matcher, including
+ * resetting the region to its default state of encompassing
+ * the whole input. The state of {@link #hasTransparentBounds()}
+ * and {@link #hasAnchoringBounds()} are unaffected.
+ *
+ * @return a reference to this matcher.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #hasAnchoringBounds()
+ */
public Matcher reset ()
{
position = 0;
match = null;
+ regionStart = 0;
+ regionEnd = input.length();
+ appendPosition = 0;
return this;
}
/**
- * @param input The new input character sequence
+ * Resets the internal state of the matcher, including
+ * resetting the region to its default state of encompassing
+ * the whole input. The state of {@link #hasTransparentBounds()}
+ * and {@link #hasAnchoringBounds()} are unaffected.
+ *
+ * @param input The new input character sequence.
+ * @return a reference to this matcher.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #hasAnchoringBounds()
*/
public Matcher reset (CharSequence input)
{
@@ -283,7 +362,7 @@ public final class Matcher implements MatchResult
}
/**
- * @returns the index of a capturing group in this matcher's pattern
+ * @return the index of a capturing group in this matcher's pattern
*
* @exception IllegalStateException If no match has yet been attempted,
* or if the previous match operation failed
@@ -312,6 +391,7 @@ public final class Matcher implements MatchResult
/**
* @return True if and only if the matcher hit the end of input.
+ * @since 1.5
*/
public boolean hitEnd()
{
@@ -323,10 +403,12 @@ public final class Matcher implements MatchResult
*/
public String toString()
{
- StringBuilder sb = new StringBuilder();
+ CPStringBuilder sb = new CPStringBuilder();
sb.append(this.getClass().getName())
.append("[pattern=").append(pattern.pattern())
- .append(" region=").append("0").append(",").append(input.length())
+ .append(" region=").append(regionStart).append(",").append(regionEnd)
+ .append(" anchoringBounds=").append(anchoringBounds == 0)
+ .append(" transparentBounds=").append(transparentBounds)
.append(" lastmatch=").append(match == null ? "" : match.toString())
.append("]");
return sb.toString();
@@ -336,4 +418,193 @@ public final class Matcher implements MatchResult
{
if (match == null) throw new IllegalStateException();
}
+
+ /**
+ * <p>
+ * Defines the region of the input on which to match.
+ * By default, the {@link Matcher} attempts to match
+ * the whole string (from 0 to the length of the input),
+ * but a region between {@code start} (inclusive) and
+ * {@code end} (exclusive) on which to match may instead
+ * be defined using this method.
+ * </p>
+ * <p>
+ * The behaviour of region matching is further affected
+ * by the use of transparent or opaque bounds (see
+ * {@link #useTransparentBounds(boolean)}) and whether or not
+ * anchors ({@code ^} and {@code $}) are in use
+ * (see {@link #useAnchoringBounds(boolean)}). With transparent
+ * bounds, the matcher is aware of input outside the bounds
+ * set by this method, whereas, with opaque bounds (the default)
+ * only the input within the bounds is used. The use of
+ * anchors are affected by this setting; with transparent
+ * bounds, anchors will match the beginning of the real input,
+ * while with opaque bounds they match the beginning of the
+ * region. {@link #useAnchoringBounds(boolean)} can be used
+ * to turn on or off the matching of anchors.
+ * </p>
+ *
+ * @param start the start of the region (inclusive).
+ * @param end the end of the region (exclusive).
+ * @return a reference to this matcher.
+ * @throws IndexOutOfBoundsException if either {@code start} or
+ * {@code end} are less than zero,
+ * if either {@code start} or
+ * {@code end} are greater than the
+ * length of the input, or if
+ * {@code start} is greater than
+ * {@code end}.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #useTransparentBounds(boolean)
+ * @see #hasAnchoringBounds()
+ * @see #useAnchoringBounds(boolean)
+ * @since 1.5
+ */
+ public Matcher region(int start, int end)
+ {
+ int length = input.length();
+ if (start < 0)
+ throw new IndexOutOfBoundsException("The start position was less than zero.");
+ if (start >= length)
+ throw new IndexOutOfBoundsException("The start position is after the end of the input.");
+ if (end < 0)
+ throw new IndexOutOfBoundsException("The end position was less than zero.");
+ if (end > length)
+ throw new IndexOutOfBoundsException("The end position is after the end of the input.");
+ if (start > end)
+ throw new IndexOutOfBoundsException("The start position is after the end position.");
+ reset();
+ regionStart = start;
+ regionEnd = end;
+ return this;
+ }
+
+ /**
+ * The start of the region on which to perform matches (inclusive).
+ *
+ * @return the start index of the region.
+ * @see #region(int,int)
+ * #see #regionEnd()
+ * @since 1.5
+ */
+ public int regionStart()
+ {
+ return regionStart;
+ }
+
+ /**
+ * The end of the region on which to perform matches (exclusive).
+ *
+ * @return the end index of the region.
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @since 1.5
+ */
+ public int regionEnd()
+ {
+ return regionEnd;
+ }
+
+ /**
+ * Returns true if the bounds of the region marked by
+ * {@link #regionStart()} and {@link #regionEnd()} are
+ * transparent. When these bounds are transparent, the
+ * matching process can look beyond them to perform
+ * lookahead, lookbehind and boundary matching operations.
+ * By default, the bounds are opaque.
+ *
+ * @return true if the bounds of the matching region are
+ * transparent.
+ * @see #useTransparentBounds(boolean)
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @since 1.5
+ */
+ public boolean hasTransparentBounds()
+ {
+ return transparentBounds;
+ }
+
+ /**
+ * Sets the transparency of the bounds of the region
+ * marked by {@link #regionStart()} and {@link #regionEnd()}.
+ * A value of {@code true} makes the bounds transparent,
+ * so the matcher can see beyond them to perform lookahead,
+ * lookbehind and boundary matching operations. A value
+ * of {@code false} (the default) makes the bounds opaque,
+ * restricting the match to the input region denoted
+ * by {@link #regionStart()} and {@link #regionEnd()}.
+ *
+ * @param transparent true if the bounds should be transparent.
+ * @return a reference to this matcher.
+ * @see #hasTransparentBounds()
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @since 1.5
+ */
+ public Matcher useTransparentBounds(boolean transparent)
+ {
+ transparentBounds = transparent;
+ return this;
+ }
+
+ /**
+ * Returns true if the matcher will honour the use of
+ * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z},
+ * {@code \z} and {@code $}. By default, the anchors
+ * are used. Note that the effect of the anchors is
+ * also affected by {@link #hasTransparentBounds()}.
+ *
+ * @return true if the matcher will attempt to match
+ * the anchoring bounds.
+ * @see #useAnchoringBounds(boolean)
+ * @see #hasTransparentBounds()
+ * @since 1.5
+ */
+ public boolean hasAnchoringBounds()
+ {
+ return anchoringBounds == 0;
+ }
+
+ /**
+ * Enables or disables the use of the anchoring bounds:
+ * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and
+ * {@code $}. By default, their use is enabled. When
+ * disabled, the matcher will not attempt to match
+ * the anchors.
+ *
+ * @param useAnchors true if anchoring bounds should be used.
+ * @return a reference to this matcher.
+ * @since 1.5
+ * @see #hasAnchoringBounds()
+ */
+ public Matcher useAnchoringBounds(boolean useAnchors)
+ {
+ if (useAnchors)
+ anchoringBounds = 0;
+ else
+ anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
+ return this;
+ }
+
+ /**
+ * Returns a read-only snapshot of the current state of
+ * the {@link Matcher} as a {@link MatchResult}. Any
+ * subsequent changes to this instance are not reflected
+ * in the returned {@link MatchResult}.
+ *
+ * @return a {@link MatchResult} instance representing the
+ * current state of the {@link Matcher}.
+ */
+ public MatchResult toMatchResult()
+ {
+ Matcher snapshot = new Matcher(pattern, input);
+ snapshot.match = (REMatch) match.clone();
+ return snapshot;
+ }
+
}