diff options
Diffstat (limited to 'libjava/classpath/java/util/regex/Matcher.java')
-rw-r--r-- | libjava/classpath/java/util/regex/Matcher.java | 289 |
1 files changed, 280 insertions, 9 deletions
diff --git a/libjava/classpath/java/util/regex/Matcher.java b/libjava/classpath/java/util/regex/Matcher.java index bf833673b0e..50cb065f287 100644 --- a/libjava/classpath/java/util/regex/Matcher.java +++ b/libjava/classpath/java/util/regex/Matcher.java @@ -38,6 +38,8 @@ exception statement from your version. */ package java.util.regex; +import gnu.java.lang.CPStringBuilder; + import gnu.java.util.regex.CharIndexed; import gnu.java.util.regex.RE; import gnu.java.util.regex.REMatch; @@ -59,11 +61,45 @@ public final class Matcher implements MatchResult private int appendPosition; private REMatch match; + /** + * The start of the region of the input on which to match. + */ + private int regionStart; + + /** + * The end of the region of the input on which to match. + */ + private int regionEnd; + + /** + * True if the match process should look beyond the + * region marked by regionStart to regionEnd when + * performing lookAhead, lookBehind and boundary + * matching. + */ + private boolean transparentBounds; + + /** + * The flags that affect the anchoring bounds. + * If {@link #hasAnchoringBounds()} is {@code true}, + * the match process will honour the + * anchoring bounds: ^, \A, \Z, \z and $. If + * {@link #hasAnchoringBounds()} is {@code false}, + * the anchors are ignored and appropriate flags, + * stored in this variable, are used to provide this + * behaviour. + */ + private int anchoringBounds; + Matcher(Pattern pattern, CharSequence input) { this.pattern = pattern; this.input = input; this.inputCharIndexed = RE.makeCharIndexed(input, 0); + regionStart = 0; + regionEnd = input.length(); + transparentBounds = false; + anchoringBounds = 0; } /** @@ -125,13 +161,17 @@ public final class Matcher implements MatchResult public boolean find () { boolean first = (match == null); - match = pattern.getRE().getMatch(inputCharIndexed, position); + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), + position, anchoringBounds); if (match != null) { int endIndex = match.getEndIndex(); // Are we stuck at the same position? if (!first && endIndex == position) - { + { match = null; // Not at the end of the input yet? if (position < input.length() - 1) @@ -156,7 +196,11 @@ public final class Matcher implements MatchResult */ public boolean find (int start) { - match = pattern.getRE().getMatch(inputCharIndexed, start); + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), + start, anchoringBounds); if (match != null) { position = match.getEndIndex(); @@ -218,7 +262,12 @@ public final class Matcher implements MatchResult public boolean lookingAt () { - match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_FIX_STARTING_POSITION, null); + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, regionStart, + anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, + anchoringBounds|RE.REG_FIX_STARTING_POSITION); if (match != null) { if (match.getStartIndex() == 0) @@ -243,7 +292,12 @@ public final class Matcher implements MatchResult */ public boolean matches () { - match = pattern.getRE().getMatch(inputCharIndexed, 0, RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION, null); + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, regionStart, + anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, + anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION); if (match != null) { if (match.getStartIndex() == 0) @@ -265,15 +319,40 @@ public final class Matcher implements MatchResult return pattern; } + /** + * Resets the internal state of the matcher, including + * resetting the region to its default state of encompassing + * the whole input. The state of {@link #hasTransparentBounds()} + * and {@link #hasAnchoringBounds()} are unaffected. + * + * @return a reference to this matcher. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #hasAnchoringBounds() + */ public Matcher reset () { position = 0; match = null; + regionStart = 0; + regionEnd = input.length(); + appendPosition = 0; return this; } /** - * @param input The new input character sequence + * Resets the internal state of the matcher, including + * resetting the region to its default state of encompassing + * the whole input. The state of {@link #hasTransparentBounds()} + * and {@link #hasAnchoringBounds()} are unaffected. + * + * @param input The new input character sequence. + * @return a reference to this matcher. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #hasAnchoringBounds() */ public Matcher reset (CharSequence input) { @@ -283,7 +362,7 @@ public final class Matcher implements MatchResult } /** - * @returns the index of a capturing group in this matcher's pattern + * @return the index of a capturing group in this matcher's pattern * * @exception IllegalStateException If no match has yet been attempted, * or if the previous match operation failed @@ -312,6 +391,7 @@ public final class Matcher implements MatchResult /** * @return True if and only if the matcher hit the end of input. + * @since 1.5 */ public boolean hitEnd() { @@ -323,10 +403,12 @@ public final class Matcher implements MatchResult */ public String toString() { - StringBuilder sb = new StringBuilder(); + CPStringBuilder sb = new CPStringBuilder(); sb.append(this.getClass().getName()) .append("[pattern=").append(pattern.pattern()) - .append(" region=").append("0").append(",").append(input.length()) + .append(" region=").append(regionStart).append(",").append(regionEnd) + .append(" anchoringBounds=").append(anchoringBounds == 0) + .append(" transparentBounds=").append(transparentBounds) .append(" lastmatch=").append(match == null ? "" : match.toString()) .append("]"); return sb.toString(); @@ -336,4 +418,193 @@ public final class Matcher implements MatchResult { if (match == null) throw new IllegalStateException(); } + + /** + * <p> + * Defines the region of the input on which to match. + * By default, the {@link Matcher} attempts to match + * the whole string (from 0 to the length of the input), + * but a region between {@code start} (inclusive) and + * {@code end} (exclusive) on which to match may instead + * be defined using this method. + * </p> + * <p> + * The behaviour of region matching is further affected + * by the use of transparent or opaque bounds (see + * {@link #useTransparentBounds(boolean)}) and whether or not + * anchors ({@code ^} and {@code $}) are in use + * (see {@link #useAnchoringBounds(boolean)}). With transparent + * bounds, the matcher is aware of input outside the bounds + * set by this method, whereas, with opaque bounds (the default) + * only the input within the bounds is used. The use of + * anchors are affected by this setting; with transparent + * bounds, anchors will match the beginning of the real input, + * while with opaque bounds they match the beginning of the + * region. {@link #useAnchoringBounds(boolean)} can be used + * to turn on or off the matching of anchors. + * </p> + * + * @param start the start of the region (inclusive). + * @param end the end of the region (exclusive). + * @return a reference to this matcher. + * @throws IndexOutOfBoundsException if either {@code start} or + * {@code end} are less than zero, + * if either {@code start} or + * {@code end} are greater than the + * length of the input, or if + * {@code start} is greater than + * {@code end}. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #useTransparentBounds(boolean) + * @see #hasAnchoringBounds() + * @see #useAnchoringBounds(boolean) + * @since 1.5 + */ + public Matcher region(int start, int end) + { + int length = input.length(); + if (start < 0) + throw new IndexOutOfBoundsException("The start position was less than zero."); + if (start >= length) + throw new IndexOutOfBoundsException("The start position is after the end of the input."); + if (end < 0) + throw new IndexOutOfBoundsException("The end position was less than zero."); + if (end > length) + throw new IndexOutOfBoundsException("The end position is after the end of the input."); + if (start > end) + throw new IndexOutOfBoundsException("The start position is after the end position."); + reset(); + regionStart = start; + regionEnd = end; + return this; + } + + /** + * The start of the region on which to perform matches (inclusive). + * + * @return the start index of the region. + * @see #region(int,int) + * #see #regionEnd() + * @since 1.5 + */ + public int regionStart() + { + return regionStart; + } + + /** + * The end of the region on which to perform matches (exclusive). + * + * @return the end index of the region. + * @see #region(int,int) + * @see #regionStart() + * @since 1.5 + */ + public int regionEnd() + { + return regionEnd; + } + + /** + * Returns true if the bounds of the region marked by + * {@link #regionStart()} and {@link #regionEnd()} are + * transparent. When these bounds are transparent, the + * matching process can look beyond them to perform + * lookahead, lookbehind and boundary matching operations. + * By default, the bounds are opaque. + * + * @return true if the bounds of the matching region are + * transparent. + * @see #useTransparentBounds(boolean) + * @see #region(int,int) + * @see #regionStart() + * @see #regionEnd() + * @since 1.5 + */ + public boolean hasTransparentBounds() + { + return transparentBounds; + } + + /** + * Sets the transparency of the bounds of the region + * marked by {@link #regionStart()} and {@link #regionEnd()}. + * A value of {@code true} makes the bounds transparent, + * so the matcher can see beyond them to perform lookahead, + * lookbehind and boundary matching operations. A value + * of {@code false} (the default) makes the bounds opaque, + * restricting the match to the input region denoted + * by {@link #regionStart()} and {@link #regionEnd()}. + * + * @param transparent true if the bounds should be transparent. + * @return a reference to this matcher. + * @see #hasTransparentBounds() + * @see #region(int,int) + * @see #regionStart() + * @see #regionEnd() + * @since 1.5 + */ + public Matcher useTransparentBounds(boolean transparent) + { + transparentBounds = transparent; + return this; + } + + /** + * Returns true if the matcher will honour the use of + * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z}, + * {@code \z} and {@code $}. By default, the anchors + * are used. Note that the effect of the anchors is + * also affected by {@link #hasTransparentBounds()}. + * + * @return true if the matcher will attempt to match + * the anchoring bounds. + * @see #useAnchoringBounds(boolean) + * @see #hasTransparentBounds() + * @since 1.5 + */ + public boolean hasAnchoringBounds() + { + return anchoringBounds == 0; + } + + /** + * Enables or disables the use of the anchoring bounds: + * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and + * {@code $}. By default, their use is enabled. When + * disabled, the matcher will not attempt to match + * the anchors. + * + * @param useAnchors true if anchoring bounds should be used. + * @return a reference to this matcher. + * @since 1.5 + * @see #hasAnchoringBounds() + */ + public Matcher useAnchoringBounds(boolean useAnchors) + { + if (useAnchors) + anchoringBounds = 0; + else + anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL; + return this; + } + + /** + * Returns a read-only snapshot of the current state of + * the {@link Matcher} as a {@link MatchResult}. Any + * subsequent changes to this instance are not reflected + * in the returned {@link MatchResult}. + * + * @return a {@link MatchResult} instance representing the + * current state of the {@link Matcher}. + */ + public MatchResult toMatchResult() + { + Matcher snapshot = new Matcher(pattern, input); + snapshot.match = (REMatch) match.clone(); + return snapshot; + } + } |