summaryrefslogtreecommitdiff
path: root/java
diff options
context:
space:
mode:
authorAndrew John Hughes <gnu_andrew@member.fsf.org>2008-05-12 20:43:52 +0000
committerAndrew John Hughes <gnu_andrew@member.fsf.org>2008-05-12 20:43:52 +0000
commite3e1dcdb5d0f824dff35d185ac94e87a7fb0660e (patch)
tree472150cf2029f03bc2668945573d2bd5ba841234 /java
parentb81a06c356c75e29d45a61d12fd1d0e48f20c8eb (diff)
downloadclasspath-e3e1dcdb5d0f824dff35d185ac94e87a7fb0660e.tar.gz
2008-05-11 Andrew John Hughes <gnu_andrew@member.fsf.org>
* java/util/regex/Matcher.java: (regionStart): New variable. (regionEnd): Likewise. (transparentBounds): Likewise. (anchoringBounds): Likewise. (Matcher()): Initialise new variables. (find()): Alter to use new settings. (find(int)): Likewise. (lookingAt()): Likewise. (matches()): Likewise. (reset()): Reset region. (reset(CharSequence)): Documented. (toString()): Include new variables. (region(int,int)): Implemented. (regionStart()): Likewise. (regionEnd()): Likewise. (hasTransparentBounds()): Likewise. (useTransparentBounds(boolean)): Likewise. (hasAnchoringBounds()): Likewise. (useAnchoringBounds(boolean)): Likewise.
Diffstat (limited to 'java')
-rw-r--r--java/util/regex/Matcher.java266
1 files changed, 259 insertions, 7 deletions
diff --git a/java/util/regex/Matcher.java b/java/util/regex/Matcher.java
index f6c36e01f..cc2a2e5e9 100644
--- a/java/util/regex/Matcher.java
+++ b/java/util/regex/Matcher.java
@@ -61,11 +61,45 @@ public final class Matcher implements MatchResult
private int appendPosition;
private REMatch match;
+ /**
+ * The start of the region of the input on which to match.
+ */
+ private int regionStart;
+
+ /**
+ * The end of the region of the input on which to match.
+ */
+ private int regionEnd;
+
+ /**
+ * True if the match process should look beyond the
+ * region marked by regionStart to regionEnd when
+ * performing lookAhead, lookBehind and boundary
+ * matching.
+ */
+ private boolean transparentBounds;
+
+ /**
+ * The flags that affect the anchoring bounds.
+ * If {@link #hasAnchoringBounds()} is {@code true},
+ * the match process will honour the
+ * anchoring bounds: ^, \A, \Z, \z and $. If
+ * {@link #hasAnchoringBounds()} is {@code false},
+ * the anchors are ignored and appropriate flags,
+ * stored in this variable, are used to provide this
+ * behaviour.
+ */
+ private int anchoringBounds;
+
Matcher(Pattern pattern, CharSequence input)
{
this.pattern = pattern;
this.input = input;
this.inputCharIndexed = RE.makeCharIndexed(input, 0);
+ regionStart = 0;
+ regionEnd = input.length();
+ transparentBounds = false;
+ anchoringBounds = 0;
}
/**
@@ -127,7 +161,11 @@ public final class Matcher implements MatchResult
public boolean find ()
{
boolean first = (match == null);
- match = pattern.getRE().getMatch(inputCharIndexed, position);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
+ position, anchoringBounds);
if (match != null)
{
int endIndex = match.getEndIndex();
@@ -158,7 +196,11 @@ public final class Matcher implements MatchResult
*/
public boolean find (int start)
{
- match = pattern.getRE().getMatch(inputCharIndexed, start);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
+ start, anchoringBounds);
if (match != null)
{
position = match.getEndIndex();
@@ -220,7 +262,12 @@ public final class Matcher implements MatchResult
public boolean lookingAt ()
{
- match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_FIX_STARTING_POSITION, null);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+ anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
+ anchoringBounds|RE.REG_FIX_STARTING_POSITION);
if (match != null)
{
if (match.getStartIndex() == 0)
@@ -245,7 +292,12 @@ public final class Matcher implements MatchResult
*/
public boolean matches ()
{
- match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION, null);
+ if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+ match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+ anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+ else
+ match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
+ anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
if (match != null)
{
if (match.getStartIndex() == 0)
@@ -267,15 +319,39 @@ public final class Matcher implements MatchResult
return pattern;
}
+ /**
+ * Resets the internal state of the matcher, including
+ * resetting the region to its default state of encompassing
+ * the whole input. The state of {@link #hasTransparentBounds()}
+ * and {@link #hasAnchoringBounds()} are unaffected.
+ *
+ * @return a reference to this matcher.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #hasAnchoringBounds()
+ */
public Matcher reset ()
{
position = 0;
match = null;
+ regionStart = 0;
+ regionEnd = input.length();
return this;
}
/**
- * @param input The new input character sequence
+ * Resets the internal state of the matcher, including
+ * resetting the region to its default state of encompassing
+ * the whole input. The state of {@link #hasTransparentBounds()}
+ * and {@link #hasAnchoringBounds()} are unaffected.
+ *
+ * @param input The new input character sequence.
+ * @return a reference to this matcher.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #hasAnchoringBounds()
*/
public Matcher reset (CharSequence input)
{
@@ -285,7 +361,7 @@ public final class Matcher implements MatchResult
}
/**
- * @returns the index of a capturing group in this matcher's pattern
+ * @return the index of a capturing group in this matcher's pattern
*
* @exception IllegalStateException If no match has yet been attempted,
* or if the previous match operation failed
@@ -314,6 +390,7 @@ public final class Matcher implements MatchResult
/**
* @return True if and only if the matcher hit the end of input.
+ * @since 1.5
*/
public boolean hitEnd()
{
@@ -328,7 +405,9 @@ public final class Matcher implements MatchResult
CPStringBuilder sb = new CPStringBuilder();
sb.append(this.getClass().getName())
.append("[pattern=").append(pattern.pattern())
- .append(" region=").append("0").append(",").append(input.length())
+ .append(" region=").append(regionStart).append(",").append(regionEnd)
+ .append(" anchoringBounds=").append(anchoringBounds == 0)
+ .append(" transparentBounds=").append(transparentBounds)
.append(" lastmatch=").append(match == null ? "" : match.toString())
.append("]");
return sb.toString();
@@ -338,4 +417,177 @@ public final class Matcher implements MatchResult
{
if (match == null) throw new IllegalStateException();
}
+
+ /**
+ * <p>
+ * Defines the region of the input on which to match.
+ * By default, the {@link Matcher} attempts to match
+ * the whole string (from 0 to the length of the input),
+ * but a region between {@code start} (inclusive) and
+ * {@code end} (exclusive) on which to match may instead
+ * be defined using this method.
+ * </p>
+ * <p>
+ * The behaviour of region matching is further affected
+ * by the use of transparent or opaque bounds (see
+ * {@link #useTransparentBounds(boolean)}) and whether or not
+ * anchors ({@code ^} and {@code $}) are in use
+ * (see {@link #useAnchoringBounds(boolean)}). With transparent
+ * bounds, the matcher is aware of input outside the bounds
+ * set by this method, whereas, with opaque bounds (the default)
+ * only the input within the bounds is used. The use of
+ * anchors are affected by this setting; with transparent
+ * bounds, anchors will match the beginning of the real input,
+ * while with opaque bounds they match the beginning of the
+ * region. {@link #useAnchoringBounds(boolean)} can be used
+ * to turn on or off the matching of anchors.
+ * </p>
+ *
+ * @param start the start of the region (inclusive).
+ * @param end the end of the region (exclusive).
+ * @return a reference to this matcher.
+ * @throws IndexOutOfBoundsException if either {@code start} or
+ * {@code end} are less than zero,
+ * if either {@code start} or
+ * {@code end} are greater than the
+ * length of the input, or if
+ * {@code start} is greater than
+ * {@code end}.
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @see #hasTransparentBounds()
+ * @see #useTransparentBounds(boolean)
+ * @see #hasAnchoringBounds()
+ * @see #useAnchoringBounds(boolean)
+ * @since 1.5
+ */
+ public Matcher region(int start, int end)
+ {
+ int length = input.length();
+ if (start < 0)
+ throw new IndexOutOfBoundsException("The start position was less than zero.");
+ if (start >= length)
+ throw new IndexOutOfBoundsException("The start position is after the end of the input.");
+ if (end < 0)
+ throw new IndexOutOfBoundsException("The end position was less than zero.");
+ if (end > length)
+ throw new IndexOutOfBoundsException("The end position is after the end of the input.");
+ if (start > end)
+ throw new IndexOutOfBoundsException("The start position is after the end position.");
+ reset();
+ regionStart = start;
+ regionEnd = end;
+ return this;
+ }
+
+ /**
+ * The start of the region on which to perform matches (inclusive).
+ *
+ * @return the start index of the region.
+ * @see #region(int,int)
+ * #see #regionEnd()
+ * @since 1.5
+ */
+ public int regionStart()
+ {
+ return regionStart;
+ }
+
+ /**
+ * The end of the region on which to perform matches (exclusive).
+ *
+ * @return the end index of the region.
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @since 1.5
+ */
+ public int regionEnd()
+ {
+ return regionEnd;
+ }
+
+ /**
+ * Returns true if the bounds of the region marked by
+ * {@link #regionStart()} and {@link #regionEnd()} are
+ * transparent. When these bounds are transparent, the
+ * matching process can look beyond them to perform
+ * lookahead, lookbehind and boundary matching operations.
+ * By default, the bounds are opaque.
+ *
+ * @return true if the bounds of the matching region are
+ * transparent.
+ * @see #useTransparentBounds(boolean)
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @since 1.5
+ */
+ public boolean hasTransparentBounds()
+ {
+ return transparentBounds;
+ }
+
+ /**
+ * Sets the transparency of the bounds of the region
+ * marked by {@link #regionStart()} and {@link #regionEnd()}.
+ * A value of {@code true} makes the bounds transparent,
+ * so the matcher can see beyond them to perform lookahead,
+ * lookbehind and boundary matching operations. A value
+ * of {@code false} (the default) makes the bounds opaque,
+ * restricting the match to the input region denoted
+ * by {@link #regionStart()} and {@link #regionEnd()}.
+ *
+ * @param transparent true if the bounds should be transparent.
+ * @return a reference to this matcher.
+ * @see #hasTransparentBounds()
+ * @see #region(int,int)
+ * @see #regionStart()
+ * @see #regionEnd()
+ * @since 1.5
+ */
+ public Matcher useTransparentBounds(boolean transparent)
+ {
+ transparentBounds = transparent;
+ return this;
+ }
+
+ /**
+ * Returns true if the matcher will honour the use of
+ * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z},
+ * {@code \z} and {@code $}. By default, the anchors
+ * are used. Note that the effect of the anchors is
+ * also affected by {@link #hasTransparentBounds()}.
+ *
+ * @return true if the matcher will attempt to match
+ * the anchoring bounds.
+ * @see #useAnchoringBounds(boolean)
+ * @see #hasTransparentBounds()
+ * @since 1.5
+ */
+ public boolean hasAnchoringBounds()
+ {
+ return anchoringBounds == 0;
+ }
+
+ /**
+ * Enables or disables the use of the anchoring bounds:
+ * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and
+ * {@code $}. By default, their use is enabled. When
+ * disabled, the matcher will not attempt to match
+ * the anchors.
+ *
+ * @param useAnchors true if anchoring bounds should be used.
+ * @return a reference to this matcher.
+ * @since 1.5
+ * @see #hasAnchoringBounds()
+ */
+ public Matcher useAnchoringBounds(boolean useAnchors)
+ {
+ if (useAnchors)
+ anchoringBounds = 0;
+ else
+ anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
+ return this;
+ }
+
}