summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wielaard <mark@klomp.org>2004-03-07 23:58:53 +0000
committerMark Wielaard <mark@klomp.org>2004-03-07 23:58:53 +0000
commitf64c1c1904d02d26783cfe5593ec2b530a825fb0 (patch)
treed4ea374258ee904871d49edcf8cee68add212458
parent25b46270ed5a0771d1bc47f1ec16bd7f2268c8ab (diff)
downloadclasspath-f64c1c1904d02d26783cfe5593ec2b530a825fb0.tar.gz
* configure.ac: Add gnu/regexp/Makefile and
resource/gnu/regexp/Makefile * gnu/Makefile.am (SUBDIRS): Add regexp. * gnu/regexp/Makefile.am: New file. * gnu/regexp/CharIndexed.java: Imported. * gnu/regexp/CharIndexedCharArray.java: Likewise. * gnu/regexp/CharIndexedInputStream.java: Likewise. * gnu/regexp/CharIndexedString.java: Likewise. * gnu/regexp/CharIndexedStringBuffer.java: Likewise. * gnu/regexp/RE.java: Likewise. * gnu/regexp/REException.java: Likewise. * gnu/regexp/REFilterInputStream.java: Likewise. * gnu/regexp/REMatch.java: Likewise. * gnu/regexp/REMatchEnumeration.java: Likewise. * gnu/regexp/RESyntax.java: Likewise. * gnu/regexp/REToken.java: Likewise. * gnu/regexp/RETokenAny.java: Likewise. * gnu/regexp/RETokenBackRef.java: Likewise. * gnu/regexp/RETokenChar.java: Likewise. * gnu/regexp/RETokenEnd.java: Likewise. * gnu/regexp/RETokenEndSub.java: Likewise. * gnu/regexp/RETokenOneOf.java: Likewise. * gnu/regexp/RETokenPOSIX.java: Likewise. * gnu/regexp/RETokenRange.java: Likewise. * gnu/regexp/RETokenRepeated.java: Likewise. * gnu/regexp/RETokenStart.java: Likewise. * gnu/regexp/RETokenWordBoundary.java: Likewise. * gnu/regexp/UncheckedRE.java: Likewise. * java/util/regex/Matcher.java: Add gnu.regexp wrappers. * java/util/regex/Pattern.java: Likewise. * resource/gnu/Makefile.am (SUBDIRS): Add regexp. * resource/gnu/regexp/Makefile.am: New file. * resource/gnu/regexp/MessagesBundle.properties: Imported. * resource/gnu/regexp/MessagesBundle_fr.properties: Likewise.
-rw-r--r--ChangeLog37
-rw-r--r--configure.ac2
-rw-r--r--gnu/Makefile.am2
-rw-r--r--gnu/regexp/CharIndexed.java84
-rw-r--r--gnu/regexp/CharIndexedCharArray.java62
-rw-r--r--gnu/regexp/CharIndexedInputStream.java149
-rw-r--r--gnu/regexp/CharIndexedString.java64
-rw-r--r--gnu/regexp/CharIndexedStringBuffer.java62
-rw-r--r--gnu/regexp/Makefile.am27
-rw-r--r--gnu/regexp/RE.java1350
-rw-r--r--gnu/regexp/REException.java182
-rw-r--r--gnu/regexp/REFilterInputStream.java140
-rw-r--r--gnu/regexp/REMatch.java263
-rw-r--r--gnu/regexp/REMatchEnumeration.java135
-rw-r--r--gnu/regexp/RESyntax.java521
-rw-r--r--gnu/regexp/REToken.java86
-rw-r--r--gnu/regexp/RETokenAny.java73
-rw-r--r--gnu/regexp/RETokenBackRef.java72
-rw-r--r--gnu/regexp/RETokenChar.java91
-rw-r--r--gnu/regexp/RETokenEnd.java75
-rw-r--r--gnu/regexp/RETokenEndSub.java53
-rw-r--r--gnu/regexp/RETokenOneOf.java130
-rw-r--r--gnu/regexp/RETokenPOSIX.java144
-rw-r--r--gnu/regexp/RETokenRange.java69
-rw-r--r--gnu/regexp/RETokenRepeated.java227
-rw-r--r--gnu/regexp/RETokenStart.java87
-rw-r--r--gnu/regexp/RETokenWordBoundary.java104
-rw-r--r--gnu/regexp/UncheckedRE.java109
-rw-r--r--java/util/regex/Matcher.java115
-rw-r--r--java/util/regex/Pattern.java129
-rw-r--r--resource/gnu/Makefile.am2
-rw-r--r--resource/gnu/regexp/Makefile.am3
-rw-r--r--resource/gnu/regexp/MessagesBundle.properties22
-rw-r--r--resource/gnu/regexp/MessagesBundle_fr.properties22
34 files changed, 4656 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index b8e2a484c..ccc10bffd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,42 @@
2004-03-07 Mark Wielaard <mark@klomp.org>
+ * configure.ac: Add gnu/regexp/Makefile and
+ resource/gnu/regexp/Makefile
+ * gnu/Makefile.am (SUBDIRS): Add regexp.
+ * gnu/regexp/Makefile.am: New file.
+ * gnu/regexp/CharIndexed.java: Imported.
+ * gnu/regexp/CharIndexedCharArray.java: Likewise.
+ * gnu/regexp/CharIndexedInputStream.java: Likewise.
+ * gnu/regexp/CharIndexedString.java: Likewise.
+ * gnu/regexp/CharIndexedStringBuffer.java: Likewise.
+ * gnu/regexp/RE.java: Likewise.
+ * gnu/regexp/REException.java: Likewise.
+ * gnu/regexp/REFilterInputStream.java: Likewise.
+ * gnu/regexp/REMatch.java: Likewise.
+ * gnu/regexp/REMatchEnumeration.java: Likewise.
+ * gnu/regexp/RESyntax.java: Likewise.
+ * gnu/regexp/REToken.java: Likewise.
+ * gnu/regexp/RETokenAny.java: Likewise.
+ * gnu/regexp/RETokenBackRef.java: Likewise.
+ * gnu/regexp/RETokenChar.java: Likewise.
+ * gnu/regexp/RETokenEnd.java: Likewise.
+ * gnu/regexp/RETokenEndSub.java: Likewise.
+ * gnu/regexp/RETokenOneOf.java: Likewise.
+ * gnu/regexp/RETokenPOSIX.java: Likewise.
+ * gnu/regexp/RETokenRange.java: Likewise.
+ * gnu/regexp/RETokenRepeated.java: Likewise.
+ * gnu/regexp/RETokenStart.java: Likewise.
+ * gnu/regexp/RETokenWordBoundary.java: Likewise.
+ * gnu/regexp/UncheckedRE.java: Likewise.
+ * java/util/regex/Matcher.java: Add gnu.regexp wrappers.
+ * java/util/regex/Pattern.java: Likewise.
+ * resource/gnu/Makefile.am (SUBDIRS): Add regexp.
+ * resource/gnu/regexp/Makefile.am: New file.
+ * resource/gnu/regexp/MessagesBundle.properties: Imported.
+ * resource/gnu/regexp/MessagesBundle_fr.properties: Likewise.
+
+2004-03-07 Mark Wielaard <mark@klomp.org>
+
* lib/Makefile.am (propertydirs, propertyfiles): New variables.
(resources): Create dirs and install property files.
diff --git a/configure.ac b/configure.ac
index df68f3274..cc323a8c4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -327,6 +327,7 @@ gnu/java/util/prefs/Makefile
gnu/javax/Makefile
gnu/javax/rmi/Makefile
gnu/javax/rmi/CORBA/Makefile
+gnu/regexp/Makefile
include/Makefile
include/jni.h
java/Makefile
@@ -428,6 +429,7 @@ resource/gnu/java/Makefile
resource/gnu/java/awt/Makefile
resource/gnu/java/awt/peer/Makefile
resource/gnu/java/awt/peer/gtk/Makefile
+resource/gnu/regexp/Makefile
resource/java/Makefile
resource/java/util/Makefile
resource/java/security/Makefile
diff --git a/gnu/Makefile.am b/gnu/Makefile.am
index 4ad169a74..98110b961 100644
--- a/gnu/Makefile.am
+++ b/gnu/Makefile.am
@@ -1,4 +1,4 @@
## Input file for automake to generate the Makefile.in used by configure
-SUBDIRS = classpath java javax
+SUBDIRS = classpath java javax regexp
diff --git a/gnu/regexp/CharIndexed.java b/gnu/regexp/CharIndexed.java
new file mode 100644
index 000000000..eb1be13fd
--- /dev/null
+++ b/gnu/regexp/CharIndexed.java
@@ -0,0 +1,84 @@
+/* gnu/regexp/CharIndexed.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+/**
+ * Defines the interface used internally so that different types of source
+ * text can be accessed in the same way. Built-in concrete classes provide
+ * support for String, StringBuffer, InputStream and char[] types.
+ * A class that is CharIndexed supports the notion of a cursor within a
+ * block of text. The cursor must be able to be advanced via the move()
+ * method. The charAt() method returns the character at the cursor position
+ * plus a given offset.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ */
+public interface CharIndexed {
+ /**
+ * Defines a constant (0xFFFF was somewhat arbitrarily chosen)
+ * that can be returned by the charAt() function indicating that
+ * the specified index is out of range.
+ */
+ char OUT_OF_BOUNDS = '\uFFFF';
+
+ /**
+ * Returns the character at the given offset past the current cursor
+ * position in the input. The index of the current position is zero.
+ * It is possible for this method to be called with a negative index.
+ * This happens when using the '^' operator in multiline matching mode
+ * or the '\b' or '\<' word boundary operators. In any case, the lower
+ * bound is currently fixed at -2 (for '^' with a two-character newline).
+ *
+ * @param index the offset position in the character field to examine
+ * @return the character at the specified index, or the OUT_OF_BOUNDS
+ * character defined by this interface.
+ */
+ char charAt(int index);
+
+ /**
+ * Shifts the input buffer by a given number of positions. Returns
+ * true if the new cursor position is valid.
+ */
+ boolean move(int index);
+
+ /**
+ * Returns true if the most recent move() operation placed the cursor
+ * position at a valid position in the input.
+ */
+ boolean isValid();
+}
diff --git a/gnu/regexp/CharIndexedCharArray.java b/gnu/regexp/CharIndexedCharArray.java
new file mode 100644
index 000000000..dc488ba44
--- /dev/null
+++ b/gnu/regexp/CharIndexedCharArray.java
@@ -0,0 +1,62 @@
+/* gnu/regexp/CharIndexedCharArray.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+class CharIndexedCharArray implements CharIndexed, Serializable {
+ private char[] s;
+ private int anchor;
+
+ CharIndexedCharArray(char[] str, int index) {
+ s = str;
+ anchor = index;
+ }
+
+ public char charAt(int index) {
+ int pos = anchor + index;
+ return ((pos < s.length) && (pos >= 0)) ? s[pos] : OUT_OF_BOUNDS;
+ }
+
+ public boolean isValid() {
+ return (anchor < s.length);
+ }
+
+ public boolean move(int index) {
+ return ((anchor += index) < s.length);
+ }
+}
diff --git a/gnu/regexp/CharIndexedInputStream.java b/gnu/regexp/CharIndexedInputStream.java
new file mode 100644
index 000000000..776f533ca
--- /dev/null
+++ b/gnu/regexp/CharIndexedInputStream.java
@@ -0,0 +1,149 @@
+/* gnu/regexp/CharIndexedInputStream.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+// TODO: move(x) shouldn't rely on calling next() x times
+
+class CharIndexedInputStream implements CharIndexed {
+ private static final int BUFFER_INCREMENT = 1024;
+ private static final int UNKNOWN = Integer.MAX_VALUE; // value for end
+
+ private BufferedInputStream br;
+
+ // so that we don't try to reset() right away
+ private int index = -1;
+
+ private int bufsize = BUFFER_INCREMENT;
+
+ private int end = UNKNOWN;
+
+ private char cached = OUT_OF_BOUNDS;
+
+ // Big enough for a \r\n pair
+ // lookBehind[0] = most recent
+ // lookBehind[1] = second most recent
+ private char[] lookBehind = new char[] { OUT_OF_BOUNDS, OUT_OF_BOUNDS };
+
+ CharIndexedInputStream(InputStream str, int index) {
+ if (str instanceof BufferedInputStream) br = (BufferedInputStream) str;
+ else br = new BufferedInputStream(str,BUFFER_INCREMENT);
+ next();
+ if (index > 0) move(index);
+ }
+
+ private boolean next() {
+ if (end == 1) return false;
+ end--; // closer to end
+
+ try {
+ if (index != -1) {
+ br.reset();
+ }
+ int i = br.read();
+ br.mark(bufsize);
+ if (i == -1) {
+ end = 1;
+ cached = OUT_OF_BOUNDS;
+ return false;
+ }
+ cached = (char) i;
+ index = 1;
+ } catch (IOException e) {
+ e.printStackTrace();
+ cached = OUT_OF_BOUNDS;
+ return false;
+ }
+ return true;
+ }
+
+ public char charAt(int index) {
+ if (index == 0) {
+ return cached;
+ } else if (index >= end) {
+ return OUT_OF_BOUNDS;
+ } else if (index == -1) {
+ return lookBehind[0];
+ } else if (index == -2) {
+ return lookBehind[1];
+ } else if (index < -2) {
+ return OUT_OF_BOUNDS;
+ } else if (index >= bufsize) {
+ // Allocate more space in the buffer.
+ try {
+ while (bufsize <= index) bufsize += BUFFER_INCREMENT;
+ br.reset();
+ br.mark(bufsize);
+ br.skip(index-1);
+ } catch (IOException e) { }
+ } else if (this.index != index) {
+ try {
+ br.reset();
+ br.skip(index-1);
+ } catch (IOException e) { }
+ }
+ char ch = OUT_OF_BOUNDS;
+
+ try {
+ int i = br.read();
+ this.index = index+1; // this.index is index of next pos relative to charAt(0)
+ if (i == -1) {
+ // set flag that next should fail next time?
+ end = index;
+ return ch;
+ }
+ ch = (char) i;
+ } catch (IOException ie) { }
+
+ return ch;
+ }
+
+ public boolean move(int index) {
+ // move read position [index] clicks from 'charAt(0)'
+ boolean retval = true;
+ while (retval && (index-- > 0)) retval = next();
+ return retval;
+ }
+
+ public boolean isValid() {
+ return (cached != OUT_OF_BOUNDS);
+ }
+}
+
diff --git a/gnu/regexp/CharIndexedString.java b/gnu/regexp/CharIndexedString.java
new file mode 100644
index 000000000..adff7ac71
--- /dev/null
+++ b/gnu/regexp/CharIndexedString.java
@@ -0,0 +1,64 @@
+/* gnu/regexp/CharIndexedString.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+class CharIndexedString implements CharIndexed, Serializable {
+ private String s;
+ private int anchor;
+ private int len;
+
+ CharIndexedString(String str, int index) {
+ s = str;
+ len = s.length();
+ anchor = index;
+ }
+
+ public char charAt(int index) {
+ int pos = anchor + index;
+ return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
+ }
+
+ public boolean isValid() {
+ return (anchor < len);
+ }
+
+ public boolean move(int index) {
+ return ((anchor += index) < len);
+ }
+}
diff --git a/gnu/regexp/CharIndexedStringBuffer.java b/gnu/regexp/CharIndexedStringBuffer.java
new file mode 100644
index 000000000..2eb8c23f3
--- /dev/null
+++ b/gnu/regexp/CharIndexedStringBuffer.java
@@ -0,0 +1,62 @@
+/* gnu/regexp/CharIndexedStringBuffer.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+class CharIndexedStringBuffer implements CharIndexed, Serializable {
+ private StringBuffer s;
+ private int anchor;
+
+ CharIndexedStringBuffer(StringBuffer str, int index) {
+ s = str;
+ anchor = index;
+ }
+
+ public char charAt(int index) {
+ int pos = anchor + index;
+ return ((pos < s.length()) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
+ }
+
+ public boolean isValid() {
+ return (anchor < s.length());
+ }
+
+ public boolean move(int index) {
+ return ((anchor += index) < s.length());
+ }
+}
diff --git a/gnu/regexp/Makefile.am b/gnu/regexp/Makefile.am
new file mode 100644
index 000000000..a0fa7a4c5
--- /dev/null
+++ b/gnu/regexp/Makefile.am
@@ -0,0 +1,27 @@
+# automake uses this file to generate Makefile.in
+
+EXTRA_DIST = \
+ CharIndexed.java \
+ CharIndexedCharArray.java \
+ CharIndexedInputStream.java \
+ CharIndexedString.java \
+ CharIndexedStringBuffer.java \
+ RE.java \
+ REException.java \
+ REFilterInputStream.java \
+ REMatch.java \
+ REMatchEnumeration.java \
+ RESyntax.java \
+ REToken.java \
+ RETokenAny.java \
+ RETokenBackRef.java \
+ RETokenChar.java \
+ RETokenEnd.java \
+ RETokenEndSub.java \
+ RETokenOneOf.java \
+ RETokenPOSIX.java \
+ RETokenRange.java \
+ RETokenRepeated.java \
+ RETokenStart.java \
+ RETokenWordBoundary.java \
+ UncheckedRE.java
diff --git a/gnu/regexp/RE.java b/gnu/regexp/RE.java
new file mode 100644
index 000000000..fdc00feb3
--- /dev/null
+++ b/gnu/regexp/RE.java
@@ -0,0 +1,1350 @@
+/* gnu/regexp/RE.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Locale;
+import java.util.PropertyResourceBundle;
+import java.util.ResourceBundle;
+import java.util.Vector;
+
+class IntPair implements Serializable {
+ public int first, second;
+}
+
+class CharUnit implements Serializable {
+ public char ch;
+ public boolean bk;
+}
+
+/**
+ * RE provides the user interface for compiling and matching regular
+ * expressions.
+ * <P>
+ * A regular expression object (class RE) is compiled by constructing it
+ * from a String, StringBuffer or character array, with optional
+ * compilation flags (below)
+ * and an optional syntax specification (see RESyntax; if not specified,
+ * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
+ * <P>
+ * Once compiled, a regular expression object is reusable as well as
+ * threadsafe: multiple threads can use the RE instance simultaneously
+ * to match against different input text.
+ * <P>
+ * Various methods attempt to match input text against a compiled
+ * regular expression. These methods are:
+ * <LI><code>isMatch</code>: returns true if the input text in its
+ * entirety matches the regular expression pattern.
+ * <LI><code>getMatch</code>: returns the first match found in the
+ * input text, or null if no match is found.
+ * <LI><code>getAllMatches</code>: returns an array of all
+ * non-overlapping matches found in the input text. If no matches are
+ * found, the array is zero-length.
+ * <LI><code>substitute</code>: substitute the first occurence of the
+ * pattern in the input text with a replacement string (which may
+ * include metacharacters $0-$9, see REMatch.substituteInto).
+ * <LI><code>substituteAll</code>: same as above, but repeat for each
+ * match before returning.
+ * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration
+ * object that allows iteration over the matches (see
+ * REMatchEnumeration for some reasons why you may want to do this
+ * instead of using <code>getAllMatches</code>.
+ * <P>
+ *
+ * These methods all have similar argument lists. The input can be a
+ * String, a character array, a StringBuffer, or an
+ * InputStream of some sort. Note that when using an
+ * InputStream, the stream read position cannot be guaranteed after
+ * attempting a match (this is not a bug, but a consequence of the way
+ * regular expressions work). Using an REMatchEnumeration can
+ * eliminate most positioning problems.
+ *
+ * <P>
+ *
+ * The optional index argument specifies the offset from the beginning
+ * of the text at which the search should start (see the descriptions
+ * of some of the execution flags for how this can affect positional
+ * pattern operators). For an InputStream, this means an
+ * offset from the current read position, so subsequent calls with the
+ * same index argument on an InputStream will not
+ * necessarily access the same position on the stream, whereas
+ * repeated searches at a given index in a fixed string will return
+ * consistent results.
+ *
+ * <P>
+ * You can optionally affect the execution environment by using a
+ * combination of execution flags (constants listed below).
+ *
+ * <P>
+ * All operations on a regular expression are performed in a
+ * thread-safe manner.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ * @version 1.1.5-dev, to be released
+ */
+
+public class RE extends REToken {
+ // This String will be returned by getVersion()
+ private static final String VERSION = "1.1.5-dev";
+
+ // The localized strings are kept in a separate file
+ private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
+
+ // These are, respectively, the first and last tokens in our linked list
+ // If there is only one token, firstToken == lastToken
+ private REToken firstToken, lastToken;
+
+ // This is the number of subexpressions in this regular expression,
+ // with a minimum value of zero. Returned by getNumSubs()
+ private int numSubs;
+
+ /** Minimum length, in characters, of any possible match. */
+ private int minimumLength;
+
+ /**
+ * Compilation flag. Do not differentiate case. Subsequent
+ * searches using this RE will be case insensitive.
+ */
+ public static final int REG_ICASE = 2;
+
+ /**
+ * Compilation flag. The match-any-character operator (dot)
+ * will match a newline character. When set this overrides the syntax
+ * bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to
+ * the "/s" operator in Perl.
+ */
+ public static final int REG_DOT_NEWLINE = 4;
+
+ /**
+ * Compilation flag. Use multiline mode. In this mode, the ^ and $
+ * anchors will match based on newlines within the input. This is
+ * equivalent to the "/m" operator in Perl.
+ */
+ public static final int REG_MULTILINE = 8;
+
+ /**
+ * Execution flag.
+ * The match-beginning operator (^) will not match at the beginning
+ * of the input string. Useful for matching on a substring when you
+ * know the context of the input is such that position zero of the
+ * input to the match test is not actually position zero of the text.
+ * <P>
+ * This example demonstrates the results of various ways of matching on
+ * a substring.
+ * <P>
+ * <CODE>
+ * String s = "food bar fool";<BR>
+ * RE exp = new RE("^foo.");<BR>
+ * REMatch m0 = exp.getMatch(s);<BR>
+ * REMatch m1 = exp.getMatch(s.substring(8));<BR>
+ * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
+ * REMatch m3 = exp.getMatch(s,8); <BR>
+ * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX); <BR>
+ * <P>
+ * // Results:<BR>
+ * // m0.toString(): "food"<BR>
+ * // m1.toString(): "fool"<BR>
+ * // m2.toString(): null<BR>
+ * // m3.toString(): null<BR>
+ * // m4.toString(): "fool"<BR>
+ * </CODE>
+ */
+ public static final int REG_NOTBOL = 16;
+
+ /**
+ * Execution flag.
+ * The match-end operator ($) does not match at the end
+ * of the input string. Useful for matching on substrings.
+ */
+ public static final int REG_NOTEOL = 32;
+
+ /**
+ * Execution flag.
+ * When a match method is invoked that starts matching at a non-zero
+ * index into the input, treat the input as if it begins at the index
+ * given. The effect of this flag is that the engine does not "see"
+ * any text in the input before the given index. This is useful so
+ * that the match-beginning operator (^) matches not at position 0
+ * in the input string, but at the position the search started at
+ * (based on the index input given to the getMatch function). See
+ * the example under REG_NOTBOL. It also affects the use of the \&lt;
+ * and \b operators.
+ */
+ public static final int REG_ANCHORINDEX = 64;
+
+ /**
+ * Execution flag.
+ * The substitute and substituteAll methods will not attempt to
+ * interpolate occurrences of $1-$9 in the replacement text with
+ * the corresponding subexpressions. For example, you may want to
+ * replace all matches of "one dollar" with "$1".
+ */
+ public static final int REG_NO_INTERPOLATE = 128;
+
+ /** Returns a string representing the version of the gnu.regexp package. */
+ public static final String version() {
+ return VERSION;
+ }
+
+ // Retrieves a message from the ResourceBundle
+ static final String getLocalizedMessage(String key) {
+ return messages.getString(key);
+ }
+
+ /**
+ * Constructs a regular expression pattern buffer without any compilation
+ * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @exception REException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public RE(Object pattern) throws REException {
+ this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
+ }
+
+ /**
+ * Constructs a regular expression pattern buffer using the specified
+ * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer, or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @param cflags The logical OR of any combination of the compilation flags listed above.
+ * @exception REException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public RE(Object pattern, int cflags) throws REException {
+ this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
+ }
+
+ /**
+ * Constructs a regular expression pattern buffer using the specified
+ * compilation flags and regular expression syntax.
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer, or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @param cflags The logical OR of any combination of the compilation flags listed above.
+ * @param syntax The type of regular expression syntax to use.
+ * @exception REException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
+ this(pattern,cflags,syntax,0,0);
+ }
+
+ // internal constructor used for alternation
+ private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
+ super(subIndex);
+ firstToken = first;
+ lastToken = last;
+ numSubs = subs;
+ minimumLength = minLength;
+ addToken(new RETokenEndSub(subIndex));
+ }
+
+ private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
+ super(myIndex); // Subexpression index of this token.
+ initialize(patternObj, cflags, syntax, myIndex, nextSub);
+ }
+
+ // For use by subclasses
+ protected RE() { super(0); }
+
+ // The meat of construction
+ protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
+ char[] pattern;
+ if (patternObj instanceof String) {
+ pattern = ((String) patternObj).toCharArray();
+ } else if (patternObj instanceof char[]) {
+ pattern = (char[]) patternObj;
+ } else if (patternObj instanceof StringBuffer) {
+ pattern = new char [((StringBuffer) patternObj).length()];
+ ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
+ } else {
+ pattern = patternObj.toString().toCharArray();
+ }
+
+ int pLength = pattern.length;
+
+ numSubs = 0; // Number of subexpressions in this token.
+ Vector branches = null;
+
+ // linked list of tokens (sort of -- some closed loops can exist)
+ firstToken = lastToken = null;
+
+ // Precalculate these so we don't pay for the math every time we
+ // need to access them.
+ boolean insens = ((cflags & REG_ICASE) > 0);
+
+ // Parse pattern into tokens. Does anyone know if it's more efficient
+ // to use char[] than a String.charAt()? I'm assuming so.
+
+ // index tracks the position in the char array
+ int index = 0;
+
+ // this will be the current parse character (pattern[index])
+ CharUnit unit = new CharUnit();
+
+ // This is used for {x,y} calculations
+ IntPair minMax = new IntPair();
+
+ // Buffer a token so we can create a TokenRepeated, etc.
+ REToken currentToken = null;
+ char ch;
+
+ while (index < pLength) {
+ // read the next character unit (including backslash escapes)
+ index = getCharUnit(pattern,index,unit);
+
+ // ALTERNATION OPERATOR
+ // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
+ // not available if RE_LIMITED_OPS is set
+
+ // TODO: the '\n' literal here should be a test against REToken.newline,
+ // which unfortunately may be more than a single character.
+ if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
+ || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
+ && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
+ // make everything up to here be a branch. create vector if nec.
+ addToken(currentToken);
+ RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
+ minimumLength = 0;
+ if (branches == null) {
+ branches = new Vector();
+ }
+ branches.addElement(theBranch);
+ firstToken = lastToken = currentToken = null;
+ }
+
+ // INTERVAL OPERATOR:
+ // {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES)
+ // \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
+ //
+ // OPEN QUESTION:
+ // what is proper interpretation of '{' at start of string?
+
+ else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
+ int newIndex = getMinMax(pattern,index,minMax,syntax);
+ if (newIndex > index) {
+ if (minMax.first > minMax.second)
+ throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
+ if (currentToken == null)
+ throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
+ if (currentToken instanceof RETokenRepeated)
+ throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
+ if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+ throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
+ if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
+ throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
+ index = newIndex;
+ currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
+ }
+ else {
+ addToken(currentToken);
+ currentToken = new RETokenChar(subIndex,unit.ch,insens);
+ }
+ }
+
+ // LIST OPERATOR:
+ // [...] | [^...]
+
+ else if ((unit.ch == '[') && !unit.bk) {
+ Vector options = new Vector();
+ boolean negative = false;
+ char lastChar = 0;
+ if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
+
+ // Check for initial caret, negation
+ if ((ch = pattern[index]) == '^') {
+ negative = true;
+ if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+ ch = pattern[index];
+ }
+
+ // Check for leading right bracket literal
+ if (ch == ']') {
+ lastChar = ch;
+ if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+ }
+
+ while ((ch = pattern[index++]) != ']') {
+ if ((ch == '-') && (lastChar != 0)) {
+ if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+ if ((ch = pattern[index]) == ']') {
+ options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ lastChar = '-';
+ } else {
+ options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
+ lastChar = 0;
+ index++;
+ }
+ } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
+ if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+ int posixID = -1;
+ boolean negate = false;
+ char asciiEsc = 0;
+ if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
+ switch (pattern[index]) {
+ case 'D':
+ negate = true;
+ case 'd':
+ posixID = RETokenPOSIX.DIGIT;
+ break;
+ case 'S':
+ negate = true;
+ case 's':
+ posixID = RETokenPOSIX.SPACE;
+ break;
+ case 'W':
+ negate = true;
+ case 'w':
+ posixID = RETokenPOSIX.ALNUM;
+ break;
+ }
+ }
+ else if ("nrt".indexOf(pattern[index]) != -1) {
+ switch (pattern[index]) {
+ case 'n':
+ asciiEsc = '\n';
+ break;
+ case 't':
+ asciiEsc = '\t';
+ break;
+ case 'r':
+ asciiEsc = '\r';
+ break;
+ }
+ }
+ if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+
+ if (posixID != -1) {
+ options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
+ } else if (asciiEsc != 0) {
+ lastChar = asciiEsc;
+ } else {
+ lastChar = pattern[index];
+ }
+ ++index;
+ } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
+ StringBuffer posixSet = new StringBuffer();
+ index = getPosixSet(pattern,index+1,posixSet);
+ int posixId = RETokenPOSIX.intValue(posixSet.toString());
+ if (posixId != -1)
+ options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
+ } else {
+ if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ lastChar = ch;
+ }
+ if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+ } // while in list
+ // Out of list, index is one past ']'
+
+ if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+
+ // Create a new RETokenOneOf
+ addToken(currentToken);
+ options.trimToSize();
+ currentToken = new RETokenOneOf(subIndex,options,negative);
+ }
+
+ // SUBEXPRESSIONS
+ // (...) | \(...\) depending on RE_NO_BK_PARENS
+
+ else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
+ boolean pure = false;
+ boolean comment = false;
+ if ((index+1 < pLength) && (pattern[index] == '?')) {
+ switch (pattern[index+1]) {
+ case ':':
+ if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
+ pure = true;
+ index += 2;
+ }
+ break;
+ case '#':
+ if (syntax.get(RESyntax.RE_COMMENTS)) {
+ comment = true;
+ }
+ break;
+ default:
+ throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
+ }
+ }
+
+ if (index >= pLength) {
+ throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
+ }
+
+ // find end of subexpression
+ int endIndex = index;
+ int nextIndex = index;
+ int nested = 0;
+
+ while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
+ && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
+ if ((endIndex = nextIndex) >= pLength)
+ throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
+ else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
+ nested++;
+ else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
+ nested--;
+
+ // endIndex is now position at a ')','\)'
+ // nextIndex is end of string or position after ')' or '\)'
+
+ if (comment) index = nextIndex;
+ else { // not a comment
+ // create RE subexpression as token.
+ addToken(currentToken);
+ if (!pure) {
+ numSubs++;
+ }
+
+ int useIndex = (pure) ? 0 : nextSub + numSubs;
+ currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
+ numSubs += ((RE) currentToken).getNumSubs();
+
+ index = nextIndex;
+ } // not a comment
+ } // subexpression
+
+ // UNMATCHED RIGHT PAREN
+ // ) or \) throw exception if
+ // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
+ else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
+ throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
+ }
+
+ // START OF LINE OPERATOR
+ // ^
+
+ else if ((unit.ch == '^') && !unit.bk) {
+ addToken(currentToken);
+ currentToken = null;
+ addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
+ }
+
+ // END OF LINE OPERATOR
+ // $
+
+ else if ((unit.ch == '$') && !unit.bk) {
+ addToken(currentToken);
+ currentToken = null;
+ addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
+ }
+
+ // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
+ // .
+
+ else if ((unit.ch == '.') && !unit.bk) {
+ addToken(currentToken);
+ currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
+ }
+
+ // ZERO-OR-MORE REPEAT OPERATOR
+ // *
+
+ else if ((unit.ch == '*') && !unit.bk) {
+ if (currentToken == null)
+ throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
+ if (currentToken instanceof RETokenRepeated)
+ throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+ if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+ throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
+ if (currentToken.getMinimumLength() == 0)
+ throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
+ currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
+ }
+
+ // ONE-OR-MORE REPEAT OPERATOR
+ // + | \+ depending on RE_BK_PLUS_QM
+ // not available if RE_LIMITED_OPS is set
+
+ else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ if (currentToken == null)
+ throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
+ if (currentToken instanceof RETokenRepeated)
+ throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+ if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+ throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
+ if (currentToken.getMinimumLength() == 0)
+ throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
+ currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
+ }
+
+ // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
+ // ? | \? depending on RE_BK_PLUS_QM
+ // not available if RE_LIMITED_OPS is set
+ // stingy matching if RE_STINGY_OPS is set and it follows a quantifier
+
+ else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
+
+ // Check for stingy matching on RETokenRepeated
+ if (currentToken instanceof RETokenRepeated) {
+ if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
+ ((RETokenRepeated)currentToken).makeStingy();
+ else
+ throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+ }
+ else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+ throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
+ else
+ currentToken = setRepeated(currentToken,0,1,index);
+ }
+
+ // BACKREFERENCE OPERATOR
+ // \1 \2 ... \9
+ // not available if RE_NO_BK_REFS is set
+
+ else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
+ addToken(currentToken);
+ currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
+ }
+
+ // START OF STRING OPERATOR
+ // \A if RE_STRING_ANCHORS is set
+
+ else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ addToken(currentToken);
+ currentToken = new RETokenStart(subIndex,null);
+ }
+
+ // WORD BREAK OPERATOR
+ // \b if ????
+
+ else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ addToken(currentToken);
+ currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
+ }
+
+ // WORD BEGIN OPERATOR
+ // \< if ????
+ else if (unit.bk && (unit.ch == '<')) {
+ addToken(currentToken);
+ currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
+ }
+
+ // WORD END OPERATOR
+ // \> if ????
+ else if (unit.bk && (unit.ch == '>')) {
+ addToken(currentToken);
+ currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
+ }
+
+ // NON-WORD BREAK OPERATOR
+ // \B if ????
+
+ else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ addToken(currentToken);
+ currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
+ }
+
+
+ // DIGIT OPERATOR
+ // \d if RE_CHAR_CLASS_ESCAPES is set
+
+ else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
+ }
+
+ // NON-DIGIT OPERATOR
+ // \D
+
+ else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
+ }
+
+ // NEWLINE ESCAPE
+ // \n
+
+ else if (unit.bk && (unit.ch == 'n')) {
+ addToken(currentToken);
+ currentToken = new RETokenChar(subIndex,'\n',false);
+ }
+
+ // RETURN ESCAPE
+ // \r
+
+ else if (unit.bk && (unit.ch == 'r')) {
+ addToken(currentToken);
+ currentToken = new RETokenChar(subIndex,'\r',false);
+ }
+
+ // WHITESPACE OPERATOR
+ // \s if RE_CHAR_CLASS_ESCAPES is set
+
+ else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
+ }
+
+ // NON-WHITESPACE OPERATOR
+ // \S
+
+ else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
+ }
+
+ // TAB ESCAPE
+ // \t
+
+ else if (unit.bk && (unit.ch == 't')) {
+ addToken(currentToken);
+ currentToken = new RETokenChar(subIndex,'\t',false);
+ }
+
+ // ALPHANUMERIC OPERATOR
+ // \w
+
+ else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
+ }
+
+ // NON-ALPHANUMERIC OPERATOR
+ // \W
+
+ else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
+ addToken(currentToken);
+ currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
+ }
+
+ // END OF STRING OPERATOR
+ // \Z
+
+ else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ addToken(currentToken);
+ currentToken = new RETokenEnd(subIndex,null);
+ }
+
+ // NON-SPECIAL CHARACTER (or escape to make literal)
+ // c | \* for example
+
+ else { // not a special character
+ addToken(currentToken);
+ currentToken = new RETokenChar(subIndex,unit.ch,insens);
+ }
+ } // end while
+
+ // Add final buffered token and an EndSub marker
+ addToken(currentToken);
+
+ if (branches != null) {
+ branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
+ branches.trimToSize(); // compact the Vector
+ minimumLength = 0;
+ firstToken = lastToken = null;
+ addToken(new RETokenOneOf(subIndex,branches,false));
+ }
+ else addToken(new RETokenEndSub(subIndex));
+
+ }
+
+ private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
+ unit.ch = input[index++];
+ if (unit.bk = (unit.ch == '\\'))
+ if (index < input.length)
+ unit.ch = input[index++];
+ else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
+ return index;
+ }
+
+ /**
+ * Checks if the regular expression matches the input in its entirety.
+ *
+ * @param input The input text.
+ */
+ public boolean isMatch(Object input) {
+ return isMatch(input,0,0);
+ }
+
+ /**
+ * Checks if the input string, starting from index, is an exact match of
+ * this regular expression.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ */
+ public boolean isMatch(Object input,int index) {
+ return isMatch(input,index,0);
+ }
+
+
+ /**
+ * Checks if the input, starting from index and using the specified
+ * execution flags, is an exact match of this regular expression.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ */
+ public boolean isMatch(Object input,int index,int eflags) {
+ return isMatchImpl(makeCharIndexed(input,index),index,eflags);
+ }
+
+ private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
+ if (firstToken == null) // Trivial case
+ return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
+ REMatch m = new REMatch(numSubs, index, eflags);
+ if (firstToken.match(input, m)) {
+ while (m != null) {
+ if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
+ return true;
+ }
+ m = m.next;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns the maximum number of subexpressions in this regular expression.
+ * If the expression contains branches, the value returned will be the
+ * maximum subexpressions in any of the branches.
+ */
+ public int getNumSubs() {
+ return numSubs;
+ }
+
+ // Overrides REToken.setUncle
+ void setUncle(REToken uncle) {
+ if (lastToken != null) {
+ lastToken.setUncle(uncle);
+ } else super.setUncle(uncle); // to deal with empty subexpressions
+ }
+
+ // Overrides REToken.chain
+
+ boolean chain(REToken next) {
+ super.chain(next);
+ setUncle(next);
+ return true;
+ }
+
+ /**
+ * Returns the minimum number of characters that could possibly
+ * constitute a match of this regular expression.
+ */
+ public int getMinimumLength() {
+ return minimumLength;
+ }
+
+ /**
+ * Returns an array of all matches found in the input.
+ *
+ * If the regular expression allows the empty string to match, it will
+ * substitute matches at all positions except the end of the input.
+ *
+ * @param input The input text.
+ * @return a non-null (but possibly zero-length) array of matches
+ */
+ public REMatch[] getAllMatches(Object input) {
+ return getAllMatches(input,0,0);
+ }
+
+ /**
+ * Returns an array of all matches found in the input,
+ * beginning at the specified index position.
+ *
+ * If the regular expression allows the empty string to match, it will
+ * substitute matches at all positions except the end of the input.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @return a non-null (but possibly zero-length) array of matches
+ */
+ public REMatch[] getAllMatches(Object input, int index) {
+ return getAllMatches(input,index,0);
+ }
+
+ /**
+ * Returns an array of all matches found in the input string,
+ * beginning at the specified index position and using the specified
+ * execution flags.
+ *
+ * If the regular expression allows the empty string to match, it will
+ * substitute matches at all positions except the end of the input.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @return a non-null (but possibly zero-length) array of matches
+ */
+ public REMatch[] getAllMatches(Object input, int index, int eflags) {
+ return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
+ }
+
+ // this has been changed since 1.03 to be non-overlapping matches
+ private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
+ Vector all = new Vector();
+ REMatch m = null;
+ while ((m = getMatchImpl(input,index,eflags,null)) != null) {
+ all.addElement(m);
+ index = m.getEndIndex();
+ if (m.end[0] == 0) { // handle pathological case of zero-length match
+ index++;
+ input.move(1);
+ } else {
+ input.move(m.end[0]);
+ }
+ if (!input.isValid()) break;
+ }
+ REMatch[] mset = new REMatch[all.size()];
+ all.copyInto(mset);
+ return mset;
+ }
+
+ /* Implements abstract method REToken.match() */
+ boolean match(CharIndexed input, REMatch mymatch) {
+ if (firstToken == null) return next(input, mymatch);
+
+ // Note the start of this subexpression
+ mymatch.start[subIndex] = mymatch.index;
+
+ return firstToken.match(input, mymatch);
+ }
+
+ /**
+ * Returns the first match found in the input. If no match is found,
+ * null is returned.
+ *
+ * @param input The input text.
+ * @return An REMatch instance referencing the match, or null if none.
+ */
+ public REMatch getMatch(Object input) {
+ return getMatch(input,0,0);
+ }
+
+ /**
+ * Returns the first match found in the input, beginning
+ * the search at the specified index. If no match is found,
+ * returns null.
+ *
+ * @param input The input text.
+ * @param index The offset within the text to begin looking for a match.
+ * @return An REMatch instance referencing the match, or null if none.
+ */
+ public REMatch getMatch(Object input, int index) {
+ return getMatch(input,index,0);
+ }
+
+ /**
+ * Returns the first match found in the input, beginning
+ * the search at the specified index, and using the specified
+ * execution flags. If no match is found, returns null.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @return An REMatch instance referencing the match, or null if none.
+ */
+ public REMatch getMatch(Object input, int index, int eflags) {
+ return getMatch(input,index,eflags,null);
+ }
+
+ /**
+ * Returns the first match found in the input, beginning the search
+ * at the specified index, and using the specified execution flags.
+ * If no match is found, returns null. If a StringBuffer is
+ * provided and is non-null, the contents of the input text from the
+ * index to the beginning of the match (or to the end of the input,
+ * if there is no match) are appended to the StringBuffer.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @param buffer The StringBuffer to save pre-match text in.
+ * @return An REMatch instance referencing the match, or null if none. */
+ public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
+ return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
+ }
+
+ REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
+ // Create a new REMatch to hold results
+ REMatch mymatch = new REMatch(numSubs, anchor, eflags);
+ do {
+ // Optimization: check if anchor + minimumLength > length
+ if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
+ if (match(input, mymatch)) {
+ // Find longest match of them all to observe leftmost longest
+ REMatch longest = mymatch;
+ while ((mymatch = mymatch.next) != null) {
+ if (mymatch.index > longest.index) {
+ longest = mymatch;
+ }
+ }
+
+ longest.end[0] = longest.index;
+ longest.finish(input);
+ return longest;
+ }
+ }
+ mymatch.clear(++anchor);
+ // Append character to buffer if needed
+ if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
+ buffer.append(input.charAt(0));
+ }
+ } while (input.move(1));
+
+ // Special handling at end of input for e.g. "$"
+ if (minimumLength == 0) {
+ if (match(input, mymatch)) {
+ mymatch.finish(input);
+ return mymatch;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Returns an REMatchEnumeration that can be used to iterate over the
+ * matches found in the input text.
+ *
+ * @param input The input text.
+ * @return A non-null REMatchEnumeration instance.
+ */
+ public REMatchEnumeration getMatchEnumeration(Object input) {
+ return getMatchEnumeration(input,0,0);
+ }
+
+
+ /**
+ * Returns an REMatchEnumeration that can be used to iterate over the
+ * matches found in the input text.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @return A non-null REMatchEnumeration instance, with its input cursor
+ * set to the index position specified.
+ */
+ public REMatchEnumeration getMatchEnumeration(Object input, int index) {
+ return getMatchEnumeration(input,index,0);
+ }
+
+ /**
+ * Returns an REMatchEnumeration that can be used to iterate over the
+ * matches found in the input text.
+ *
+ * @param input The input text.
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @return A non-null REMatchEnumeration instance, with its input cursor
+ * set to the index position specified.
+ */
+ public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
+ return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
+ }
+
+
+ /**
+ * Substitutes the replacement text for the first match found in the input.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @return A String interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substitute(Object input,String replace) {
+ return substitute(input,replace,0,0);
+ }
+
+ /**
+ * Substitutes the replacement text for the first match found in the input
+ * beginning at the specified index position. Specifying an index
+ * effectively causes the regular expression engine to throw away the
+ * specified number of characters.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @param index The offset index at which the search should be begin.
+ * @return A String containing the substring of the input, starting
+ * at the index position, and interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substitute(Object input,String replace,int index) {
+ return substitute(input,replace,index,0);
+ }
+
+ /**
+ * Substitutes the replacement text for the first match found in the input
+ * string, beginning at the specified index position and using the
+ * specified execution flags.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @return A String containing the substring of the input, starting
+ * at the index position, and interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substitute(Object input,String replace,int index,int eflags) {
+ return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
+ }
+
+ private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
+ StringBuffer buffer = new StringBuffer();
+ REMatch m = getMatchImpl(input,index,eflags,buffer);
+ if (m==null) return buffer.toString();
+ buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
+ replace : m.substituteInto(replace) );
+ if (input.move(m.end[0])) {
+ do {
+ buffer.append(input.charAt(0));
+ } while (input.move(1));
+ }
+ return buffer.toString();
+ }
+
+ /**
+ * Substitutes the replacement text for each non-overlapping match found
+ * in the input text.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @return A String interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substituteAll(Object input,String replace) {
+ return substituteAll(input,replace,0,0);
+ }
+
+ /**
+ * Substitutes the replacement text for each non-overlapping match found
+ * in the input text, starting at the specified index.
+ *
+ * If the regular expression allows the empty string to match, it will
+ * substitute matches at all positions except the end of the input.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @param index The offset index at which the search should be begin.
+ * @return A String containing the substring of the input, starting
+ * at the index position, and interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substituteAll(Object input,String replace,int index) {
+ return substituteAll(input,replace,index,0);
+ }
+
+ /**
+ * Substitutes the replacement text for each non-overlapping match found
+ * in the input text, starting at the specified index and using the
+ * specified execution flags.
+ *
+ * @param input The input text.
+ * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
+ * @param index The offset index at which the search should be begin.
+ * @param eflags The logical OR of any execution flags above.
+ * @return A String containing the substring of the input, starting
+ * at the index position, and interpolating the substituted text.
+ * @see REMatch#substituteInto
+ */
+ public String substituteAll(Object input,String replace,int index,int eflags) {
+ return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
+ }
+
+ private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
+ StringBuffer buffer = new StringBuffer();
+ REMatch m;
+ while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
+ buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
+ replace : m.substituteInto(replace) );
+ index = m.getEndIndex();
+ if (m.end[0] == 0) {
+ char ch = input.charAt(0);
+ if (ch != CharIndexed.OUT_OF_BOUNDS)
+ buffer.append(ch);
+ input.move(1);
+ } else {
+ input.move(m.end[0]);
+ }
+
+ if (!input.isValid()) break;
+ }
+ return buffer.toString();
+ }
+
+ /* Helper function for constructor */
+ private void addToken(REToken next) {
+ if (next == null) return;
+ minimumLength += next.getMinimumLength();
+ if (firstToken == null) {
+ lastToken = firstToken = next;
+ } else {
+ // if chain returns false, it "rejected" the token due to
+ // an optimization, and next was combined with lastToken
+ if (lastToken.chain(next)) {
+ lastToken = next;
+ }
+ }
+ }
+
+ private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
+ if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
+ return new RETokenRepeated(current.subIndex,current,min,max);
+ }
+
+ private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
+ // Precondition: pattern[index-1] == ':'
+ // we will return pos of closing ']'.
+ int i;
+ for (i=index; i<(pattern.length-1); i++) {
+ if ((pattern[i] == ':') && (pattern[i+1] == ']'))
+ return i+2;
+ buf.append(pattern[i]);
+ }
+ return index; // didn't match up
+ }
+
+ private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
+ // Precondition: input[index-1] == '{', minMax != null
+
+ boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
+ int startIndex = index;
+ if (index == input.length) {
+ if (mustMatch)
+ throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
+ else
+ return startIndex;
+ }
+
+ int min,max=0;
+ CharUnit unit = new CharUnit();
+ StringBuffer buf = new StringBuffer();
+
+ // Read string of digits
+ do {
+ index = getCharUnit(input,index,unit);
+ if (Character.isDigit(unit.ch))
+ buf.append(unit.ch);
+ } while ((index != input.length) && Character.isDigit(unit.ch));
+
+ // Check for {} tomfoolery
+ if (buf.length() == 0) {
+ if (mustMatch)
+ throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
+ else
+ return startIndex;
+ }
+
+ min = Integer.parseInt(buf.toString());
+
+ if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
+ max = min;
+ else if (index == input.length)
+ if (mustMatch)
+ throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
+ else
+ return startIndex;
+ else if ((unit.ch == ',') && !unit.bk) {
+ buf = new StringBuffer();
+ // Read string of digits
+ while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
+ buf.append(unit.ch);
+
+ if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
+ if (mustMatch)
+ throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
+ else
+ return startIndex;
+
+ // This is the case of {x,}
+ if (buf.length() == 0) max = Integer.MAX_VALUE;
+ else max = Integer.parseInt(buf.toString());
+ } else
+ if (mustMatch)
+ throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
+ else
+ return startIndex;
+
+ // We know min and max now, and they are valid.
+
+ minMax.first = min;
+ minMax.second = max;
+
+ // return the index following the '}'
+ return index;
+ }
+
+ /**
+ * Return a human readable form of the compiled regular expression,
+ * useful for debugging.
+ */
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ dump(sb);
+ return sb.toString();
+ }
+
+ void dump(StringBuffer os) {
+ os.append('(');
+ if (subIndex == 0)
+ os.append("?:");
+ if (firstToken != null)
+ firstToken.dumpAll(os);
+ os.append(')');
+ }
+
+ // Cast input appropriately or throw exception
+ private static CharIndexed makeCharIndexed(Object input, int index) {
+ // We could let a String fall through to final input, but since
+ // it's the most likely input type, we check it first.
+ if (input instanceof String)
+ return new CharIndexedString((String) input,index);
+ else if (input instanceof char[])
+ return new CharIndexedCharArray((char[]) input,index);
+ else if (input instanceof StringBuffer)
+ return new CharIndexedStringBuffer((StringBuffer) input,index);
+ else if (input instanceof InputStream)
+ return new CharIndexedInputStream((InputStream) input,index);
+ else if (input instanceof CharIndexed)
+ return (CharIndexed) input; // do we lose index info?
+ else
+ return new CharIndexedString(input.toString(), index);
+ }
+}
diff --git a/gnu/regexp/REException.java b/gnu/regexp/REException.java
new file mode 100644
index 000000000..a10d2fc71
--- /dev/null
+++ b/gnu/regexp/REException.java
@@ -0,0 +1,182 @@
+/* gnu/regexp/REException.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+import java.text.MessageFormat;
+
+/**
+ * This is the regular expression exception class. An exception of this type
+ * defines the three attributes:
+ * <OL>
+ * <LI> A descriptive message of the error.
+ * <LI> An integral type code equivalent to one of the statically
+ * defined symbols listed below.
+ * <LI> The approximate position in the input string where the error
+ * occurred.
+ * </OL>
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ */
+
+public class REException extends Exception {
+ private int type;
+ private int pos;
+
+ // Error conditions from GNU regcomp(3) manual
+
+ /**
+ * Error flag.
+ * Invalid use of repetition operators such as using
+ * `*' as the first character.
+ */
+ public static final int REG_BADRPT = 1;
+
+ /**
+ * Error flag.
+ * Invalid use of back reference operator.
+ */
+ public static final int REG_BADBR = 2;
+
+ /**
+ * Error flag.
+ * Un-matched brace interval operators.
+ */
+ public static final int REG_EBRACE = 3;
+
+ /**
+ * Error flag.
+ * Un-matched bracket list operators.
+ */
+ public static final int REG_EBRACK = 4;
+
+ /**
+ * Error flag.
+ * Invalid use of the range operator, eg. the ending
+ * point of the range occurs prior to the starting
+ * point.
+ */
+ public static final int REG_ERANGE = 5;
+
+ /**
+ * Error flag.
+ * Unknown character class name. <B>Not implemented</B>.
+ */
+ public static final int REG_ECTYPE = 6;
+
+ /**
+ * Error flag.
+ * Un-matched parenthesis group operators.
+ */
+ public static final int REG_EPAREN = 7;
+
+ /**
+ * Error flag.
+ * Invalid back reference to a subexpression.
+ */
+ public static final int REG_ESUBREG = 8;
+
+ /**
+ * Error flag.
+ * Non specific error. <B>Not implemented</B>.
+ */
+ public static final int REG_EEND = 9;
+
+ /**
+ * Error flag.
+ * Invalid escape sequence. <B>Not implemented</B>.
+ */
+ public static final int REG_ESCAPE = 10;
+
+ /**
+ * Error flag.
+ * Invalid use of pattern operators such as group or list.
+ */
+ public static final int REG_BADPAT = 11;
+
+ /**
+ * Error flag.
+ * Compiled regular expression requires a pattern
+ * buffer larger than 64Kb. <B>Not implemented</B>.
+ */
+ public static final int REG_ESIZE = 12;
+
+ /**
+ * Error flag.
+ * The regex routines ran out of memory. <B>Not implemented</B>.
+ */
+ public static final int REG_ESPACE = 13;
+
+ REException(String msg, int type, int position) {
+ super(msg);
+ this.type = type;
+ this.pos = position;
+ }
+
+ /**
+ * Returns the type of the exception, one of the constants listed above.
+ */
+
+ public int getType() {
+ return type;
+ }
+
+ /**
+ * Returns the position, relative to the string or character array being
+ * compiled, where the error occurred. This position is generally the point
+ * where the error was detected, not necessarily the starting index of
+ * a bad subexpression.
+ */
+ public int getPosition() {
+ return pos;
+ }
+
+ /**
+ * Reports the descriptive message associated with this exception
+ * as well as its index position in the string or character array
+ * being compiled.
+ */
+ public String getMessage() {
+ Object[] args = {new Integer(pos)};
+ StringBuffer sb = new StringBuffer();
+ String prefix = RE.getLocalizedMessage("error.prefix");
+ sb.append(MessageFormat.format(prefix, args));
+ sb.append('\n');
+ sb.append(super.getMessage());
+ return sb.toString();
+ }
+}
diff --git a/gnu/regexp/REFilterInputStream.java b/gnu/regexp/REFilterInputStream.java
new file mode 100644
index 000000000..f56a9a2a9
--- /dev/null
+++ b/gnu/regexp/REFilterInputStream.java
@@ -0,0 +1,140 @@
+/* gnu/regexp/REFilterInputStream.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+import java.io.FilterInputStream;
+import java.io.InputStream;
+
+/**
+ * Replaces instances of a given RE found within an InputStream
+ * with replacement text. The replacements are interpolated into the
+ * stream when a match is found.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ * @deprecated This class cannot properly handle all character
+ * encodings. For proper handling, use the REFilterReader
+ * class instead.
+ */
+
+public class REFilterInputStream extends FilterInputStream {
+
+ private RE expr;
+ private String replace;
+ private String buffer;
+ private int bufpos;
+ private int offset;
+ private CharIndexedInputStream stream;
+
+ /**
+ * Creates an REFilterInputStream. When reading from this stream,
+ * occurrences of patterns matching the supplied regular expression
+ * will be replaced with the supplied replacement text (the
+ * metacharacters $0 through $9 may be used to refer to the full
+ * match or subexpression matches).
+ *
+ * @param stream The InputStream to be filtered.
+ * @param expr The regular expression to search for.
+ * @param replace The text pattern to replace matches with.
+ */
+ public REFilterInputStream(InputStream stream, RE expr, String replace) {
+ super(stream);
+ this.stream = new CharIndexedInputStream(stream,0);
+ this.expr = expr;
+ this.replace = replace;
+ }
+
+ /**
+ * Reads the next byte from the stream per the general contract of
+ * InputStream.read(). Returns -1 on error or end of stream.
+ */
+ public int read() {
+ // If we have buffered replace data, use it.
+ if ((buffer != null) && (bufpos < buffer.length())) {
+ return (int) buffer.charAt(bufpos++);
+ }
+
+ // check if input is at a valid position
+ if (!stream.isValid()) return -1;
+
+ REMatch mymatch = new REMatch(expr.getNumSubs(),offset,0);
+ if (expr.match(stream, mymatch)) {
+ mymatch.end[0] = mymatch.index;
+ mymatch.finish(stream);
+ stream.move(mymatch.toString().length());
+ offset += mymatch.toString().length();
+ buffer = mymatch.substituteInto(replace);
+ bufpos = 1;
+
+ // This is prone to infinite loops if replace string turns out empty.
+ if (buffer.length() > 0) {
+ return buffer.charAt(0);
+ }
+ }
+ char ch = stream.charAt(0);
+ if (ch == CharIndexed.OUT_OF_BOUNDS) return -1;
+ stream.move(1);
+ offset++;
+ return ch;
+ }
+
+ /**
+ * Returns false. REFilterInputStream does not support mark() and
+ * reset() methods.
+ */
+ public boolean markSupported() {
+ return false;
+ }
+
+ /** Reads from the stream into the provided array. */
+ public int read(byte[] b, int off, int len) {
+ int i;
+ int ok = 0;
+ while (len-- > 0) {
+ i = read();
+ if (i == -1) return (ok == 0) ? -1 : ok;
+ b[off++] = (byte) i;
+ ok++;
+ }
+ return ok;
+ }
+
+ /** Reads from the stream into the provided array. */
+ public int read(byte[] b) {
+ return read(b,0,b.length);
+ }
+}
diff --git a/gnu/regexp/REMatch.java b/gnu/regexp/REMatch.java
new file mode 100644
index 000000000..ac6c80e91
--- /dev/null
+++ b/gnu/regexp/REMatch.java
@@ -0,0 +1,263 @@
+/* gnu/regexp/REMatch.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+import java.io.Serializable;
+
+/**
+ * An instance of this class represents a match
+ * completed by a gnu.regexp matching function. It can be used
+ * to obtain relevant information about the location of a match
+ * or submatch.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ */
+public final class REMatch implements Serializable, Cloneable {
+ private String matchedText;
+
+ // These variables are package scope for fast access within the engine
+ int eflags; // execution flags this match was made using
+
+ // Offset in source text where match was tried. This is zero-based;
+ // the actual position in the source text is given by (offset + anchor).
+ int offset;
+
+ // Anchor position refers to the index into the source input
+ // at which the matching operation began.
+ // This is also useful for the ANCHORINDEX option.
+ int anchor;
+
+ // Package scope; used by RE.
+ int index; // used while matching to mark current match position in input
+ int[] start; // start positions (relative to offset) for each (sub)exp.
+ int[] end; // end positions for the same
+ REMatch next; // other possibility (to avoid having to use arrays)
+
+ public Object clone() {
+ try {
+ REMatch copy = (REMatch) super.clone();
+ copy.next = null;
+
+ copy.start = (int[]) start.clone();
+ copy.end = (int[]) end.clone();
+
+ return copy;
+ } catch (CloneNotSupportedException e) {
+ throw new Error(); // doesn't happen
+ }
+ }
+
+ void assignFrom(REMatch other) {
+ start = other.start;
+ end = other.end;
+ index = other.index;
+ // need to deep clone?
+ next = other.next;
+ }
+
+ REMatch(int subs, int anchor, int eflags) {
+ start = new int[subs+1];
+ end = new int[subs+1];
+ this.anchor = anchor;
+ this.eflags = eflags;
+ clear(anchor);
+ }
+
+ void finish(CharIndexed text) {
+ start[0] = 0;
+ StringBuffer sb = new StringBuffer();
+ int i;
+ for (i = 0; i < end[0]; i++)
+ sb.append(text.charAt(i));
+ matchedText = sb.toString();
+ for (i = 0; i < start.length; i++) {
+ // If any subexpressions didn't terminate, they don't count
+ // TODO check if this code ever gets hit
+ if ((start[i] == -1) ^ (end[i] == -1)) {
+ start[i] = -1;
+ end[i] = -1;
+ }
+ }
+ next = null; // cut off alternates
+ }
+
+ /** Clears the current match and moves the offset to the new index. */
+ void clear(int index) {
+ offset = index;
+ this.index = 0;
+ for (int i = 0; i < start.length; i++) {
+ start[i] = end[i] = -1;
+ }
+ next = null; // cut off alternates
+ }
+
+ /**
+ * Returns the string matching the pattern. This makes it convenient
+ * to write code like the following:
+ * <P>
+ * <code>
+ * REMatch myMatch = myExpression.getMatch(myString);<br>
+ * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
+ * </code>
+ */
+ public String toString() {
+ return matchedText;
+ }
+
+ /**
+ * Returns the index within the input text where the match in its entirety
+ * began.
+ */
+ public int getStartIndex() {
+ return offset + start[0];
+ }
+
+ /**
+ * Returns the index within the input string where the match in
+ * its entirety ends. The return value is the next position after
+ * the end of the string; therefore, a match created by the
+ * following call:
+ *
+ * <P>
+ * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
+ * <P>
+ * can be viewed (given that myMatch is not null) by creating
+ * <P>
+ * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
+ * myMatch.getEndIndex());</code>
+ * <P>
+ * But you can save yourself that work, since the <code>toString()</code>
+ * method (above) does exactly that for you.
+ */
+ public int getEndIndex() {
+ return offset + end[0];
+ }
+
+ /**
+ * Returns the string matching the given subexpression. The subexpressions
+ * are indexed starting with one, not zero. That is, the subexpression
+ * identified by the first set of parentheses in a regular expression
+ * could be retrieved from an REMatch by calling match.toString(1).
+ *
+ * @param sub Index of the subexpression.
+ */
+ public String toString(int sub) {
+ if ((sub >= start.length) || (start[sub] == -1)) return "";
+ return (matchedText.substring(start[sub],end[sub]));
+ }
+
+ /**
+ * Returns the index within the input string used to generate this match
+ * where subexpression number <i>sub</i> begins, or <code>-1</code> if
+ * the subexpression does not exist. The initial position is zero.
+ *
+ * @param sub Subexpression index
+ * @deprecated Use getStartIndex(int) instead.
+ */
+ public int getSubStartIndex(int sub) {
+ if (sub >= start.length) return -1;
+ int x = start[sub];
+ return (x == -1) ? x : offset + x;
+ }
+
+ /**
+ * Returns the index within the input string used to generate this match
+ * where subexpression number <i>sub</i> begins, or <code>-1</code> if
+ * the subexpression does not exist. The initial position is zero.
+ *
+ * @param sub Subexpression index
+ * @since gnu.regexp 1.1.0
+ */
+ public int getStartIndex(int sub) {
+ if (sub >= start.length) return -1;
+ int x = start[sub];
+ return (x == -1) ? x : offset + x;
+ }
+
+ /**
+ * Returns the index within the input string used to generate this match
+ * where subexpression number <i>sub</i> ends, or <code>-1</code> if
+ * the subexpression does not exist. The initial position is zero.
+ *
+ * @param sub Subexpression index
+ * @deprecated Use getEndIndex(int) instead
+ */
+ public int getSubEndIndex(int sub) {
+ if (sub >= start.length) return -1;
+ int x = end[sub];
+ return (x == -1) ? x : offset + x;
+ }
+
+ /**
+ * Returns the index within the input string used to generate this match
+ * where subexpression number <i>sub</i> ends, or <code>-1</code> if
+ * the subexpression does not exist. The initial position is zero.
+ *
+ * @param sub Subexpression index
+ */
+ public int getEndIndex(int sub) {
+ if (sub >= start.length) return -1;
+ int x = end[sub];
+ return (x == -1) ? x : offset + x;
+ }
+
+ /**
+ * Substitute the results of this match to create a new string.
+ * This is patterned after PERL, so the tokens to watch out for are
+ * <code>$0</code> through <code>$9</code>. <code>$0</code> matches
+ * the full substring matched; <code>$<i>n</i></code> matches
+ * subexpression number <i>n</i>.
+ *
+ * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
+ */
+ public String substituteInto(String input) {
+ // a la Perl, $0 is whole thing, $1 - $9 are subexpressions
+ StringBuffer output = new StringBuffer();
+ int pos;
+ for (pos = 0; pos < input.length()-1; pos++) {
+ if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
+ int val = Character.digit(input.charAt(++pos),10);
+ if (val < start.length) {
+ output.append(toString(val));
+ }
+ } else output.append(input.charAt(pos));
+ }
+ if (pos < input.length()) output.append(input.charAt(pos));
+ return output.toString();
+ }
+}
diff --git a/gnu/regexp/REMatchEnumeration.java b/gnu/regexp/REMatchEnumeration.java
new file mode 100644
index 000000000..c8e208a94
--- /dev/null
+++ b/gnu/regexp/REMatchEnumeration.java
@@ -0,0 +1,135 @@
+/* gnu/regexp/REMatchEnumeration.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+import java.util.Enumeration;
+import java.util.NoSuchElementException;
+
+/**
+ * An REMatchEnumeration enumerates regular expression matches over a
+ * given input text. You obtain a reference to an enumeration using
+ * the <code>getMatchEnumeration()</code> methods on an instance of
+ * RE.
+ *
+ * <P>
+ *
+ * REMatchEnumeration does lazy computation; that is, it will not
+ * search for a match until it needs to. If you'd rather just get all
+ * the matches at once in a big array, use the
+ * <code>getAllMatches()</code> methods on RE. However, using an
+ * enumeration can help speed performance when the entire text does
+ * not need to be searched immediately.
+ *
+ * <P>
+ *
+ * The enumerated type is especially useful when searching on a Reader
+ * or InputStream, because the InputStream read position cannot be
+ * guaranteed after calling <code>getMatch()</code> (see the
+ * description of that method for an explanation of why). Enumeration
+ * also saves a lot of overhead required when calling
+ * <code>getMatch()</code> multiple times.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ */
+public class REMatchEnumeration implements Enumeration, Serializable {
+ private static final int YES = 1;
+ private static final int MAYBE = 0;
+ private static final int NO = -1;
+
+ private int more;
+ private REMatch match;
+ private RE expr;
+ private CharIndexed input;
+ private int eflags;
+ private int index;
+
+ // Package scope constructor is used by RE.getMatchEnumeration()
+ REMatchEnumeration(RE expr, CharIndexed input, int index, int eflags) {
+ more = MAYBE;
+ this.expr = expr;
+ this.input = input;
+ this.index = index;
+ this.eflags = eflags;
+ }
+
+ /** Returns true if there are more matches in the input text. */
+ public boolean hasMoreElements() {
+ return hasMoreMatches(null);
+ }
+
+ /** Returns true if there are more matches in the input text. */
+ public boolean hasMoreMatches() {
+ return hasMoreMatches(null);
+ }
+
+ /** Returns true if there are more matches in the input text.
+ * Saves the text leading up to the match (or to the end of the input)
+ * in the specified buffer.
+ */
+ public boolean hasMoreMatches(StringBuffer buffer) {
+ if (more == MAYBE) {
+ match = expr.getMatchImpl(input,index,eflags,buffer);
+ if (match != null) {
+ input.move((match.end[0] > 0) ? match.end[0] : 1);
+
+ index = (match.end[0] > 0) ? match.end[0] + match.offset : index + 1;
+ more = YES;
+ } else more = NO;
+ }
+ return (more == YES);
+ }
+
+ /** Returns the next match in the input text. */
+ public Object nextElement() throws NoSuchElementException {
+ return nextMatch();
+ }
+
+ /**
+ * Returns the next match in the input text. This method is provided
+ * for convenience to avoid having to explicitly cast the return value
+ * to class REMatch.
+ */
+ public REMatch nextMatch() throws NoSuchElementException {
+ if (hasMoreElements()) {
+ more = (input.isValid()) ? MAYBE : NO;
+ return match;
+ }
+ throw new NoSuchElementException();
+ }
+}
+
diff --git a/gnu/regexp/RESyntax.java b/gnu/regexp/RESyntax.java
new file mode 100644
index 000000000..649bd0df5
--- /dev/null
+++ b/gnu/regexp/RESyntax.java
@@ -0,0 +1,521 @@
+/* gnu/regexp/RESyntax.java
+ Copyright (C) 1998-2002, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+import java.io.Serializable;
+import java.util.BitSet;
+
+/**
+ * An RESyntax specifies the way a regular expression will be compiled.
+ * This class provides a number of predefined useful constants for
+ * emulating popular regular expression syntaxes. Additionally the
+ * user may construct his or her own syntax, using any combination of the
+ * syntax bit constants. The syntax is an optional argument to any of the
+ * matching methods on class RE.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ */
+
+public final class RESyntax implements Serializable {
+ static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
+
+ private static final String SYNTAX_IS_FINAL = RE.getLocalizedMessage("syntax.final");
+
+ private BitSet bits;
+
+ // true for the constant defined syntaxes
+ private boolean isFinal = false;
+
+ private String lineSeparator = DEFAULT_LINE_SEPARATOR;
+
+ // Values for constants are bit indexes
+
+ /**
+ * Syntax bit. Backslash is an escape character in lists.
+ */
+ public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
+
+ /**
+ * Syntax bit. Use \? instead of ? and \+ instead of +.
+ */
+ public static final int RE_BK_PLUS_QM = 1;
+
+ /**
+ * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
+ */
+ public static final int RE_CHAR_CLASSES = 2;
+
+ /**
+ * Syntax bit. ^ and $ are special everywhere.
+ * <B>Not implemented.</B>
+ */
+ public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
+
+ /**
+ * Syntax bit. Repetition operators are only special in valid positions.
+ * <B>Not implemented.</B>
+ */
+ public static final int RE_CONTEXT_INDEP_OPS = 4;
+
+ /**
+ * Syntax bit. Repetition and alternation operators are invalid
+ * at start and end of pattern and other places.
+ * <B>Not implemented</B>.
+ */
+ public static final int RE_CONTEXT_INVALID_OPS = 5;
+
+ /**
+ * Syntax bit. Match-any-character operator (.) matches a newline.
+ */
+ public static final int RE_DOT_NEWLINE = 6;
+
+ /**
+ * Syntax bit. Match-any-character operator (.) does not match a null.
+ */
+ public static final int RE_DOT_NOT_NULL = 7;
+
+ /**
+ * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
+ */
+ public static final int RE_INTERVALS = 8;
+
+ /**
+ * Syntax bit. No alternation (|), match one-or-more (+), or
+ * match zero-or-one (?) operators.
+ */
+ public static final int RE_LIMITED_OPS = 9;
+
+ /**
+ * Syntax bit. Newline is an alternation operator.
+ */
+ public static final int RE_NEWLINE_ALT = 10; // impl.
+
+ /**
+ * Syntax bit. Intervals use { } instead of \{ \}
+ */
+ public static final int RE_NO_BK_BRACES = 11;
+
+ /**
+ * Syntax bit. Grouping uses ( ) instead of \( \).
+ */
+ public static final int RE_NO_BK_PARENS = 12;
+
+ /**
+ * Syntax bit. Backreferences not allowed.
+ */
+ public static final int RE_NO_BK_REFS = 13;
+
+ /**
+ * Syntax bit. Alternation uses | instead of \|
+ */
+ public static final int RE_NO_BK_VBAR = 14;
+
+ /**
+ * Syntax bit. <B>Not implemented</B>.
+ */
+ public static final int RE_NO_EMPTY_RANGES = 15;
+
+ /**
+ * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
+ * on RE_NO_BK_PARENS) will throw an exception when compiling.
+ */
+ public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
+
+ /**
+ * Syntax bit. <B>Not implemented.</B>
+ */
+ public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
+
+ /**
+ * Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
+ */
+ public static final int RE_STINGY_OPS = 18;
+
+ /**
+ * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
+ */
+ public static final int RE_CHAR_CLASS_ESCAPES = 19;
+
+ /**
+ * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
+ */
+ public static final int RE_PURE_GROUPING = 20;
+
+ /**
+ * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
+ * to the text following the current position without consuming that text.
+ */
+ public static final int RE_LOOKAHEAD = 21;
+
+ /**
+ * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
+ */
+ public static final int RE_STRING_ANCHORS = 22;
+
+ /**
+ * Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
+ */
+ public static final int RE_COMMENTS = 23;
+
+ /**
+ * Syntax bit. Allow character class escapes within lists, as in Perl5.
+ */
+ public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
+
+ private static final int BIT_TOTAL = 25;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the awk utility.
+ */
+ public static final RESyntax RE_SYNTAX_AWK;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the ed utility.
+ */
+ public static final RESyntax RE_SYNTAX_ED;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the egrep utility.
+ */
+ public static final RESyntax RE_SYNTAX_EGREP;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the GNU Emacs editor.
+ */
+ public static final RESyntax RE_SYNTAX_EMACS;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the grep utility.
+ */
+ public static final RESyntax RE_SYNTAX_GREP;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the POSIX awk specification.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_AWK;
+
+ /**
+ * Predefined syntax.
+ * Emulates POSIX basic regular expression support.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_BASIC;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the POSIX egrep specification.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_EGREP;
+
+ /**
+ * Predefined syntax.
+ * Emulates POSIX extended regular expression support.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
+
+ /**
+ * Predefined syntax.
+ * Emulates POSIX basic minimal regular expressions.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
+
+ /**
+ * Predefined syntax.
+ * Emulates POSIX extended minimal regular expressions.
+ */
+ public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in the sed utility.
+ */
+ public static final RESyntax RE_SYNTAX_SED;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in Larry Wall's perl, version 4,
+ */
+ public static final RESyntax RE_SYNTAX_PERL4;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in Larry Wall's perl, version 4,
+ * using single line mode (/s modifier).
+ */
+ public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in Larry Wall's perl, version 5.
+ */
+ public static final RESyntax RE_SYNTAX_PERL5;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in Larry Wall's perl, version 5,
+ * using single line mode (/s modifier).
+ */
+ public static final RESyntax RE_SYNTAX_PERL5_S;
+
+ /**
+ * Predefined syntax.
+ * Emulates regular expression support in Java 1.4's java.util.regex
+ * package.
+ */
+ public static final RESyntax RE_SYNTAX_JAVA_1_4;
+
+ static {
+ // Define syntaxes
+
+ RE_SYNTAX_EMACS = new RESyntax().makeFinal();
+
+ RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
+ .set(RE_CHAR_CLASSES)
+ .set(RE_DOT_NEWLINE)
+ .set(RE_DOT_NOT_NULL)
+ .set(RE_INTERVALS)
+ .set(RE_NO_EMPTY_RANGES)
+ .makeFinal();
+
+ RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
+ .set(RE_BK_PLUS_QM)
+ .makeFinal();
+
+ RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
+ .set(RE_CONTEXT_INDEP_ANCHORS)
+ .set(RE_CONTEXT_INDEP_OPS)
+ .set(RE_NO_BK_BRACES)
+ .set(RE_NO_BK_PARENS)
+ .set(RE_NO_BK_VBAR)
+ .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
+ .makeFinal();
+
+ RE_SYNTAX_AWK = new RESyntax()
+ .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
+ .set(RE_DOT_NOT_NULL)
+ .set(RE_NO_BK_PARENS)
+ .set(RE_NO_BK_REFS)
+ .set(RE_NO_BK_VBAR)
+ .set(RE_NO_EMPTY_RANGES)
+ .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
+ .makeFinal();
+
+ RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
+ .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
+ .makeFinal();
+
+ RE_SYNTAX_GREP = new RESyntax()
+ .set(RE_BK_PLUS_QM)
+ .set(RE_CHAR_CLASSES)
+ .set(RE_HAT_LISTS_NOT_NEWLINE)
+ .set(RE_INTERVALS)
+ .set(RE_NEWLINE_ALT)
+ .makeFinal();
+
+ RE_SYNTAX_EGREP = new RESyntax()
+ .set(RE_CHAR_CLASSES)
+ .set(RE_CONTEXT_INDEP_ANCHORS)
+ .set(RE_CONTEXT_INDEP_OPS)
+ .set(RE_HAT_LISTS_NOT_NEWLINE)
+ .set(RE_NEWLINE_ALT)
+ .set(RE_NO_BK_PARENS)
+ .set(RE_NO_BK_VBAR)
+ .makeFinal();
+
+ RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
+ .set(RE_INTERVALS)
+ .set(RE_NO_BK_BRACES)
+ .makeFinal();
+
+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+
+ RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
+ .makeFinal();
+
+ RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
+ .makeFinal();
+
+ RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
+ .set(RE_LIMITED_OPS)
+ .makeFinal();
+
+ /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+
+ RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
+ .set(RE_CONTEXT_INDEP_ANCHORS)
+ .set(RE_CONTEXT_INVALID_OPS)
+ .set(RE_NO_BK_BRACES)
+ .set(RE_NO_BK_PARENS)
+ .set(RE_NO_BK_REFS)
+ .set(RE_NO_BK_VBAR)
+ .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
+ .makeFinal();
+
+ /* There is no official Perl spec, but here's a "best guess" */
+
+ RE_SYNTAX_PERL4 = new RESyntax()
+ .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
+ .set(RE_CONTEXT_INDEP_ANCHORS)
+ .set(RE_CONTEXT_INDEP_OPS) // except for '{', apparently
+ .set(RE_INTERVALS)
+ .set(RE_NO_BK_BRACES)
+ .set(RE_NO_BK_PARENS)
+ .set(RE_NO_BK_VBAR)
+ .set(RE_NO_EMPTY_RANGES)
+ .set(RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
+ .makeFinal();
+
+ RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
+ .set(RE_DOT_NEWLINE)
+ .makeFinal();
+
+ RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
+ .set(RE_PURE_GROUPING) // (?:)
+ .set(RE_STINGY_OPS) // *?,??,+?,{}?
+ .set(RE_LOOKAHEAD) // (?=)(?!)
+ .set(RE_STRING_ANCHORS) // \A,\Z
+ .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
+ .set(RE_COMMENTS) // (?#)
+ .makeFinal();
+
+ RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
+ .set(RE_DOT_NEWLINE)
+ .makeFinal();
+
+ RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
+ // XXX
+ .makeFinal();
+ }
+
+ /**
+ * Construct a new syntax object with all bits turned off.
+ * This is equivalent to RE_SYNTAX_EMACS.
+ */
+ public RESyntax() {
+ bits = new BitSet(BIT_TOTAL);
+ }
+
+ /**
+ * Called internally when constructing predefined syntaxes
+ * so their interpretation cannot vary. Conceivably useful
+ * for your syntaxes as well. Causes IllegalAccessError to
+ * be thrown if any attempt to modify the syntax is made.
+ *
+ * @return this object for convenient chaining
+ */
+ public RESyntax makeFinal() {
+ isFinal = true;
+ return this;
+ }
+
+ /**
+ * Construct a new syntax object with all bits set the same
+ * as the other syntax.
+ */
+ public RESyntax(RESyntax other) {
+ bits = (BitSet) other.bits.clone();
+ }
+
+ /**
+ * Check if a given bit is set in this syntax.
+ */
+ public boolean get(int index) {
+ return bits.get(index);
+ }
+
+ /**
+ * Set a given bit in this syntax.
+ *
+ * @param index the constant (RESyntax.RE_xxx) bit to set.
+ * @return a reference to this object for easy chaining.
+ */
+ public RESyntax set(int index) {
+ if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
+ bits.set(index);
+ return this;
+ }
+
+ /**
+ * Clear a given bit in this syntax.
+ *
+ * @param index the constant (RESyntax.RE_xxx) bit to clear.
+ * @return a reference to this object for easy chaining.
+ */
+ public RESyntax clear(int index) {
+ if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
+ bits.clear(index);
+ return this;
+ }
+
+ /**
+ * Changes the line separator string for regular expressions
+ * created using this RESyntax. The default separator is the
+ * value returned by the system property "line.separator", which
+ * should be correct when reading platform-specific files from a
+ * filesystem. However, many programs may collect input from
+ * sources where the line separator is differently specified (for
+ * example, in the applet environment, the text box widget
+ * interprets line breaks as single-character newlines,
+ * regardless of the host platform.
+ *
+ * Note that setting the line separator to a character or
+ * characters that have specific meaning within the current syntax
+ * can cause unexpected chronosynclastic infundibula.
+ *
+ * @return this object for convenient chaining
+ */
+ public RESyntax setLineSeparator(String aSeparator) {
+ if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
+ lineSeparator = aSeparator;
+ return this;
+ }
+
+ /**
+ * Returns the currently active line separator string. The default
+ * is the platform-dependent system property "line.separator".
+ */
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+}
diff --git a/gnu/regexp/REToken.java b/gnu/regexp/REToken.java
new file mode 100644
index 000000000..aa576a5ad
--- /dev/null
+++ b/gnu/regexp/REToken.java
@@ -0,0 +1,86 @@
+/* gnu/regexp/REToken.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+abstract class REToken implements Serializable {
+
+ protected REToken next = null;
+ protected REToken uncle = null;
+ protected int subIndex;
+
+ protected REToken(int subIndex) {
+ this.subIndex = subIndex;
+ }
+
+ int getMinimumLength() {
+ return 0;
+ }
+
+ void setUncle(REToken anUncle) {
+ uncle = anUncle;
+ }
+
+ /** Returns true if the match succeeded, false if it failed. */
+ abstract boolean match(CharIndexed input, REMatch mymatch);
+
+ /** Returns true if the rest of the tokens match, false if they fail. */
+ protected boolean next(CharIndexed input, REMatch mymatch) {
+ if (next == null) {
+ if (uncle == null) {
+ return true;
+ } else {
+ return uncle.match(input, mymatch);
+ }
+ } else {
+ return next.match(input, mymatch);
+ }
+ }
+
+ boolean chain(REToken token) {
+ next = token;
+ return true; // Token was accepted
+ }
+
+ abstract void dump(StringBuffer os);
+
+ void dumpAll(StringBuffer os) {
+ dump(os);
+ if (next != null) next.dumpAll(os);
+ }
+}
diff --git a/gnu/regexp/RETokenAny.java b/gnu/regexp/RETokenAny.java
new file mode 100644
index 000000000..42fdd9e28
--- /dev/null
+++ b/gnu/regexp/RETokenAny.java
@@ -0,0 +1,73 @@
+/* gnu/regexp/RETokenAny.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenAny extends REToken {
+ /** True if '.' can match a newline (RE_DOT_NEWLINE) */
+ private boolean newline;
+
+ /** True if '.' can't match a null (RE_DOT_NOT_NULL) */
+ private boolean matchNull;
+
+ RETokenAny(int subIndex, boolean newline, boolean matchNull) {
+ super(subIndex);
+ this.newline = newline;
+ this.matchNull = matchNull;
+ }
+
+ int getMinimumLength() {
+ return 1;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ if ((ch == CharIndexed.OUT_OF_BOUNDS)
+ || (!newline && (ch == '\n'))
+ || (matchNull && (ch == 0))) {
+ return false;
+ }
+ ++mymatch.index;
+ return next(input, mymatch);
+ }
+
+ void dump(StringBuffer os) {
+ os.append('.');
+ }
+}
+
diff --git a/gnu/regexp/RETokenBackRef.java b/gnu/regexp/RETokenBackRef.java
new file mode 100644
index 000000000..a811e16a7
--- /dev/null
+++ b/gnu/regexp/RETokenBackRef.java
@@ -0,0 +1,72 @@
+/* gnu/regexp/RETokenBackRef.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenBackRef extends REToken {
+ private int num;
+ private boolean insens;
+
+ RETokenBackRef(int subIndex, int num, boolean insens) {
+ super(subIndex);
+ this.num = num;
+ this.insens = insens;
+ }
+
+ // should implement getMinimumLength() -- any ideas?
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ int b,e;
+ b = mymatch.start[num];
+ e = mymatch.end[num];
+ if ((b==-1)||(e==-1)) return false; // this shouldn't happen, but...
+ for (int i=b; i<e; i++) {
+ if (input.charAt(mymatch.index+i-b) != input.charAt(i)) {
+ return false;
+ }
+ }
+ mymatch.index += e-b;
+ return next(input, mymatch);
+ }
+
+ void dump(StringBuffer os) {
+ os.append('\\').append(num);
+ }
+}
+
+
diff --git a/gnu/regexp/RETokenChar.java b/gnu/regexp/RETokenChar.java
new file mode 100644
index 000000000..17712e347
--- /dev/null
+++ b/gnu/regexp/RETokenChar.java
@@ -0,0 +1,91 @@
+/* gnu/regexp/RETokenChar.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenChar extends REToken {
+ private char[] ch;
+ private boolean insens;
+
+ RETokenChar(int subIndex, char c, boolean ins) {
+ super(subIndex);
+ ch = new char [1];
+ ch[0] = (insens = ins) ? Character.toLowerCase(c) : c;
+ }
+
+ int getMinimumLength() {
+ return ch.length;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ int z = ch.length;
+ char c;
+ for (int i=0; i<z; i++) {
+ c = input.charAt(mymatch.index+i);
+ if (( (insens) ? Character.toLowerCase(c) : c ) != ch[i]) {
+ return false;
+ }
+ }
+ mymatch.index += z;
+
+ return next(input, mymatch);
+ }
+
+ // Overrides REToken.chain() to optimize for strings
+ boolean chain(REToken next) {
+ if (next instanceof RETokenChar) {
+ RETokenChar cnext = (RETokenChar) next;
+ // assume for now that next can only be one character
+ int newsize = ch.length + cnext.ch.length;
+
+ char[] chTemp = new char [newsize];
+
+ System.arraycopy(ch,0,chTemp,0,ch.length);
+ System.arraycopy(cnext.ch,0,chTemp,ch.length,cnext.ch.length);
+
+ ch = chTemp;
+ return false;
+ } else return super.chain(next);
+ }
+
+ void dump(StringBuffer os) {
+ os.append(ch);
+ }
+}
+
+
diff --git a/gnu/regexp/RETokenEnd.java b/gnu/regexp/RETokenEnd.java
new file mode 100644
index 000000000..08e57084d
--- /dev/null
+++ b/gnu/regexp/RETokenEnd.java
@@ -0,0 +1,75 @@
+/* gnu/regexp/RETokenEnd.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenEnd extends REToken {
+ /**
+ * Indicates whether this token should match on a line break.
+ */
+ private String newline;
+
+ RETokenEnd(int subIndex,String newline) {
+ super(subIndex);
+ this.newline = newline;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ if (ch == CharIndexed.OUT_OF_BOUNDS)
+ return ((mymatch.eflags & RE.REG_NOTEOL)>0) ?
+ false : next(input, mymatch);
+ if (newline != null) {
+ char z;
+ int i = 0; // position in newline
+ do {
+ z = newline.charAt(i);
+ if (ch != z) return false;
+ ++i;
+ ch = input.charAt(mymatch.index + i);
+ } while (i < newline.length());
+
+ return next(input, mymatch);
+ }
+ return false;
+ }
+
+ void dump(StringBuffer os) {
+ os.append('$');
+ }
+}
diff --git a/gnu/regexp/RETokenEndSub.java b/gnu/regexp/RETokenEndSub.java
new file mode 100644
index 000000000..913d3f85c
--- /dev/null
+++ b/gnu/regexp/RETokenEndSub.java
@@ -0,0 +1,53 @@
+/* gnu/regexp/RETokenEndSub.java
+ Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+final class RETokenEndSub extends REToken {
+ RETokenEndSub(int subIndex) {
+ super(subIndex);
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ mymatch.end[subIndex] = mymatch.index;
+ return next(input, mymatch);
+ }
+
+ void dump(StringBuffer os) {
+ // handled by RE
+ }
+}
diff --git a/gnu/regexp/RETokenOneOf.java b/gnu/regexp/RETokenOneOf.java
new file mode 100644
index 000000000..7752b2577
--- /dev/null
+++ b/gnu/regexp/RETokenOneOf.java
@@ -0,0 +1,130 @@
+/* gnu/regexp/RETokenOneOf.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.util.Vector;
+
+final class RETokenOneOf extends REToken {
+ private Vector options;
+ private boolean negative;
+
+ // This constructor is used for convenience when we know the set beforehand,
+ // e.g. \d --> new RETokenOneOf("0123456789",false, ..)
+ // \D --> new RETokenOneOf("0123456789",true, ..)
+
+ RETokenOneOf(int subIndex, String optionsStr, boolean negative, boolean insens) {
+ super(subIndex);
+ options = new Vector();
+ this.negative = negative;
+ for (int i = 0; i < optionsStr.length(); i++)
+ options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
+ }
+
+ RETokenOneOf(int subIndex, Vector options, boolean negative) {
+ super(subIndex);
+ this.options = options;
+ this.negative = negative;
+ }
+
+ int getMinimumLength() {
+ int min = Integer.MAX_VALUE;
+ int x;
+ for (int i=0; i < options.size(); i++) {
+ if ((x = ((REToken) options.elementAt(i)).getMinimumLength()) < min)
+ min = x;
+ }
+ return min;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ if (negative && (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS))
+ return false;
+
+ REMatch newMatch = null;
+ REMatch last = null;
+ REToken tk;
+ boolean isMatch;
+ for (int i=0; i < options.size(); i++) {
+ tk = (REToken) options.elementAt(i);
+ REMatch tryMatch = (REMatch) mymatch.clone();
+ if (tk.match(input, tryMatch)) { // match was successful
+ if (negative) return false;
+
+ if (next(input, tryMatch)) {
+ // Add tryMatch to list of possibilities.
+ if (last == null) {
+ newMatch = tryMatch;
+ last = tryMatch;
+ } else {
+ last.next = tryMatch;
+ last = tryMatch;
+ }
+ } // next succeeds
+ } // is a match
+ } // try next option
+
+ if (newMatch != null) {
+ if (negative) {
+ return false;
+ } else {
+ // set contents of mymatch equal to newMatch
+
+ // try each one that matched
+ mymatch.assignFrom(newMatch);
+ return true;
+ }
+ } else {
+ if (negative) {
+ ++mymatch.index;
+ return next(input, mymatch);
+ } else {
+ return false;
+ }
+ }
+
+ // index+1 works for [^abc] lists, not for generic lookahead (--> index)
+ }
+
+ void dump(StringBuffer os) {
+ os.append(negative ? "[^" : "(?:");
+ for (int i = 0; i < options.size(); i++) {
+ if (!negative && (i > 0)) os.append('|');
+ ((REToken) options.elementAt(i)).dumpAll(os);
+ }
+ os.append(negative ? ']' : ')');
+ }
+}
diff --git a/gnu/regexp/RETokenPOSIX.java b/gnu/regexp/RETokenPOSIX.java
new file mode 100644
index 000000000..00fcf301a
--- /dev/null
+++ b/gnu/regexp/RETokenPOSIX.java
@@ -0,0 +1,144 @@
+/* gnu/regexp/RETokenPOSIX.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenPOSIX extends REToken {
+ int type;
+ boolean insens;
+ boolean negated;
+
+ static final int ALNUM = 0;
+ static final int ALPHA = 1;
+ static final int BLANK = 2;
+ static final int CNTRL = 3;
+ static final int DIGIT = 4;
+ static final int GRAPH = 5;
+ static final int LOWER = 6;
+ static final int PRINT = 7;
+ static final int PUNCT = 8;
+ static final int SPACE = 9;
+ static final int UPPER = 10;
+ static final int XDIGIT = 11;
+
+ // Array indices correspond to constants defined above.
+ static final String[] s_nameTable = {
+ "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower",
+ "print", "punct", "space", "upper", "xdigit"
+ };
+
+ // The RE constructor uses this to look up the constant for a string
+ static int intValue(String key) {
+ for (int i = 0; i < s_nameTable.length; i++) {
+ if (s_nameTable[i].equals(key)) return i;
+ }
+ return -1;
+ }
+
+ RETokenPOSIX(int subIndex, int type, boolean insens, boolean negated) {
+ super(subIndex);
+ this.type = type;
+ this.insens = insens;
+ this.negated = negated;
+ }
+
+ int getMinimumLength() {
+ return 1;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ if (ch == CharIndexed.OUT_OF_BOUNDS)
+ return false;
+
+ boolean retval = false;
+ switch (type) {
+ case ALNUM:
+ // Note that there is some debate over whether '_' should be included
+ retval = Character.isLetterOrDigit(ch) || (ch == '_');
+ break;
+ case ALPHA:
+ retval = Character.isLetter(ch);
+ break;
+ case BLANK:
+ retval = ((ch == ' ') || (ch == '\t'));
+ break;
+ case CNTRL:
+ retval = Character.isISOControl(ch);
+ break;
+ case DIGIT:
+ retval = Character.isDigit(ch);
+ break;
+ case GRAPH:
+ retval = (!(Character.isWhitespace(ch) || Character.isISOControl(ch)));
+ break;
+ case LOWER:
+ retval = ((insens && Character.isLetter(ch)) || Character.isLowerCase(ch));
+ break;
+ case PRINT:
+ retval = (!(Character.isWhitespace(ch) || Character.isISOControl(ch)))
+ || (ch == ' ');
+ break;
+ case PUNCT:
+ // This feels sloppy, especially for non-U.S. locales.
+ retval = ("`~!@#$%^&*()-_=+[]{}\\|;:'\"/?,.<>".indexOf(ch)!=-1);
+ break;
+ case SPACE:
+ retval = Character.isWhitespace(ch);
+ break;
+ case UPPER:
+ retval = ((insens && Character.isLetter(ch)) || Character.isUpperCase(ch));
+ break;
+ case XDIGIT:
+ retval = (Character.isDigit(ch) || ("abcdefABCDEF".indexOf(ch)!=-1));
+ break;
+ }
+
+ if (negated) retval = !retval;
+ if (retval) {
+ ++mymatch.index;
+ return next(input, mymatch);
+ }
+ else return false;
+ }
+
+ void dump(StringBuffer os) {
+ if (negated) os.append('^');
+ os.append("[:" + s_nameTable[type] + ":]");
+ }
+}
diff --git a/gnu/regexp/RETokenRange.java b/gnu/regexp/RETokenRange.java
new file mode 100644
index 000000000..9ce3be926
--- /dev/null
+++ b/gnu/regexp/RETokenRange.java
@@ -0,0 +1,69 @@
+/* gnu/regexp/RETokenRange.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+final class RETokenRange extends REToken {
+ private char lo, hi;
+ private boolean insens;
+
+ RETokenRange(int subIndex, char lo, char hi, boolean ins) {
+ super(subIndex);
+ this.lo = (insens = ins) ? Character.toLowerCase(lo) : lo;
+ this.hi = ins ? Character.toLowerCase(hi) : hi;
+ }
+
+ int getMinimumLength() {
+ return 1;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ char c = input.charAt(mymatch.index);
+ if (c == CharIndexed.OUT_OF_BOUNDS) return false;
+ if (insens) c = Character.toLowerCase(c);
+ if ((c >= lo) && (c <= hi)) {
+ ++mymatch.index;
+ return next(input, mymatch);
+ }
+ return false;
+ }
+
+ void dump(StringBuffer os) {
+ os.append(lo).append('-').append(hi);
+ }
+}
+
diff --git a/gnu/regexp/RETokenRepeated.java b/gnu/regexp/RETokenRepeated.java
new file mode 100644
index 000000000..8c7892712
--- /dev/null
+++ b/gnu/regexp/RETokenRepeated.java
@@ -0,0 +1,227 @@
+/* gnu/regexp/RETokenRepeated.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+import java.util.Vector;
+
+final class RETokenRepeated extends REToken {
+ private REToken token;
+ private int min,max;
+ private boolean stingy;
+
+ RETokenRepeated(int subIndex, REToken token, int min, int max) {
+ super(subIndex);
+ this.token = token;
+ this.min = min;
+ this.max = max;
+ }
+
+ /** Sets the minimal matching mode to true. */
+ void makeStingy() {
+ stingy = true;
+ }
+
+ /** Queries if this token has minimal matching enabled. */
+ boolean isStingy() {
+ return stingy;
+ }
+
+ /**
+ * The minimum length of a repeated token is the minimum length
+ * of the token multiplied by the minimum number of times it must
+ * match.
+ */
+ int getMinimumLength() {
+ return (min * token.getMinimumLength());
+ }
+
+ // We do need to save every possible point, but the number of clone()
+ // invocations here is really a killer for performance on non-stingy
+ // repeat operators. I'm open to suggestions...
+
+ // Hypothetical question: can you have a RE that matches 1 times,
+ // 3 times, 5 times, but not 2 times or 4 times? Does having
+ // the subexpression back-reference operator allow that?
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ // number of times we've matched so far
+ int numRepeats = 0;
+
+ // Possible positions for the next repeat to match at
+ REMatch newMatch = mymatch;
+ REMatch last = null;
+ REMatch current;
+
+ // Add the '0-repeats' index
+ // positions.elementAt(z) == position [] in input after <<z>> matches
+ Vector positions = new Vector();
+ positions.addElement(newMatch);
+
+ // Declare variables used in loop
+ REMatch doables;
+ REMatch doablesLast;
+ REMatch recurrent;
+
+ do {
+ // Check for stingy match for each possibility.
+ if (stingy && (numRepeats >= min)) {
+ REMatch result = matchRest(input, newMatch);
+ if (result != null) {
+ mymatch.assignFrom(result);
+ return true;
+ }
+ }
+
+ doables = null;
+ doablesLast = null;
+
+ // try next repeat at all possible positions
+ for (current = newMatch; current != null; current = current.next) {
+ recurrent = (REMatch) current.clone();
+ if (token.match(input, recurrent)) {
+ // add all items in current to doables array
+ if (doables == null) {
+ doables = recurrent;
+ doablesLast = recurrent;
+ } else {
+ // Order these from longest to shortest
+ // Start by assuming longest (more repeats)
+ doablesLast.next = recurrent;
+ }
+ // Find new doablesLast
+ while (doablesLast.next != null) {
+ doablesLast = doablesLast.next;
+ }
+ }
+ }
+ // if none of the possibilities worked out, break out of do/while
+ if (doables == null) break;
+
+ // reassign where the next repeat can match
+ newMatch = doables;
+
+ // increment how many repeats we've successfully found
+ ++numRepeats;
+
+ positions.addElement(newMatch);
+ } while (numRepeats < max);
+
+ // If there aren't enough repeats, then fail
+ if (numRepeats < min) return false;
+
+ // We're greedy, but ease off until a true match is found
+ int posIndex = positions.size();
+
+ // At this point we've either got too many or just the right amount.
+ // See if this numRepeats works with the rest of the regexp.
+ REMatch allResults = null;
+ REMatch allResultsLast = null;
+
+ REMatch results = null;
+ while (--posIndex >= min) {
+ newMatch = (REMatch) positions.elementAt(posIndex);
+ results = matchRest(input, newMatch);
+ if (results != null) {
+ if (allResults == null) {
+ allResults = results;
+ allResultsLast = results;
+ } else {
+ // Order these from longest to shortest
+ // Start by assuming longest (more repeats)
+ allResultsLast.next = results;
+ }
+ // Find new doablesLast
+ while (allResultsLast.next != null) {
+ allResultsLast = allResultsLast.next;
+ }
+ }
+ // else did not match rest of the tokens, try again on smaller sample
+ }
+ if (allResults != null) {
+ mymatch.assignFrom(allResults); // does this get all?
+ return true;
+ }
+ // If we fall out, no matches.
+ return false;
+ }
+
+ private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
+ REMatch current, single;
+ REMatch doneIndex = null;
+ REMatch doneIndexLast = null;
+ // Test all possible matches for this number of repeats
+ for (current = newMatch; current != null; current = current.next) {
+ // clone() separates a single match from the chain
+ single = (REMatch) current.clone();
+ if (next(input, single)) {
+ // chain results to doneIndex
+ if (doneIndex == null) {
+ doneIndex = single;
+ doneIndexLast = single;
+ } else {
+ doneIndexLast.next = single;
+ }
+ // Find new doneIndexLast
+ while (doneIndexLast.next != null) {
+ doneIndexLast = doneIndexLast.next;
+ }
+ }
+ }
+ return doneIndex;
+ }
+
+ void dump(StringBuffer os) {
+ os.append("(?:");
+ token.dumpAll(os);
+ os.append(')');
+ if ((max == Integer.MAX_VALUE) && (min <= 1))
+ os.append( (min == 0) ? '*' : '+' );
+ else if ((min == 0) && (max == 1))
+ os.append('?');
+ else {
+ os.append('{').append(min);
+ if (max > min) {
+ os.append(',');
+ if (max != Integer.MAX_VALUE) os.append(max);
+ }
+ os.append('}');
+ }
+ if (stingy) os.append('?');
+ }
+}
diff --git a/gnu/regexp/RETokenStart.java b/gnu/regexp/RETokenStart.java
new file mode 100644
index 000000000..8adb8c89c
--- /dev/null
+++ b/gnu/regexp/RETokenStart.java
@@ -0,0 +1,87 @@
+/* gnu/regexp/RETokenStart.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+class RETokenStart extends REToken {
+ private String newline; // matches after a newline
+
+ RETokenStart(int subIndex, String newline) {
+ super(subIndex);
+ this.newline = newline;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ // charAt(index-n) may be unknown on a Reader/InputStream. FIXME
+ // Match after a newline if in multiline mode
+
+ if (newline != null) {
+ int len = newline.length();
+ if (mymatch.offset >= len) {
+ boolean found = true;
+ char z;
+ int i = 0; // position in REToken.newline
+ char ch = input.charAt(mymatch.index - len);
+ do {
+ z = newline.charAt(i);
+ if (ch != z) {
+ found = false;
+ break;
+ }
+ ++i;
+ ch = input.charAt(mymatch.index - len + i);
+ } while (i < len);
+
+ if (found) return next(input, mymatch);
+ }
+ }
+
+ // Don't match at all if REG_NOTBOL is set.
+ if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return false;
+
+ if ((mymatch.eflags & RE.REG_ANCHORINDEX) > 0)
+ return (mymatch.anchor == mymatch.offset) ?
+ next(input, mymatch) : false;
+ else
+ return ((mymatch.index == 0) && (mymatch.offset == 0)) ?
+ next(input, mymatch) : false;
+ }
+
+ void dump(StringBuffer os) {
+ os.append('^');
+ }
+}
diff --git a/gnu/regexp/RETokenWordBoundary.java b/gnu/regexp/RETokenWordBoundary.java
new file mode 100644
index 000000000..38baaec13
--- /dev/null
+++ b/gnu/regexp/RETokenWordBoundary.java
@@ -0,0 +1,104 @@
+/* gnu/regexp/RETokenWordBoundary.java
+ Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+/**
+ * Represents a combination lookahead/lookbehind for POSIX [:alnum:].
+ */
+final class RETokenWordBoundary extends REToken {
+ private boolean negated;
+ private int where;
+ static final int BEGIN = 1;
+ static final int END = 2;
+
+ RETokenWordBoundary(int subIndex, int where, boolean negated) {
+ super(subIndex);
+ this.where = where;
+ this.negated = negated;
+ }
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ // Word boundary means input[index-1] was a word character
+ // and input[index] is not, or input[index] is a word character
+ // and input[index-1] was not
+ // In the string "one two three", these positions match:
+ // |o|n|e| |t|w|o| |t|h|r|e|e|
+ // ^ ^ ^ ^ ^ ^
+ boolean after = false; // is current character a letter or digit?
+ boolean before = false; // is previous character a letter or digit?
+ char ch;
+
+ // TODO: Also check REG_ANCHORINDEX vs. anchor
+ if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX)
+ || (mymatch.offset + mymatch.index > mymatch.anchor)) {
+ if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) {
+ before = Character.isLetterOrDigit(ch) || (ch == '_');
+ }
+ }
+
+ if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) {
+ after = Character.isLetterOrDigit(ch) || (ch == '_');
+ }
+
+ // if (before) and (!after), we're at end (\>)
+ // if (after) and (!before), we're at beginning (\<)
+ boolean doNext = false;
+
+ if ((where & BEGIN) == BEGIN) {
+ doNext = after && !before;
+ }
+ if ((where & END) == END) {
+ doNext ^= before && !after;
+ }
+
+ if (negated) doNext = !doNext;
+
+ return (doNext ? next(input, mymatch) : false);
+ }
+
+ void dump(StringBuffer os) {
+ if (where == (BEGIN | END)) {
+ os.append( negated ? "\\B" : "\\b" );
+ } else if (where == BEGIN) {
+ os.append("\\<");
+ } else {
+ os.append("\\>");
+ }
+ }
+}
diff --git a/gnu/regexp/UncheckedRE.java b/gnu/regexp/UncheckedRE.java
new file mode 100644
index 000000000..660466eab
--- /dev/null
+++ b/gnu/regexp/UncheckedRE.java
@@ -0,0 +1,109 @@
+/* gnu/regexp/UncheckedRE.java
+ Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+/**
+ * UncheckedRE is a subclass of RE that allows programmers an easier means
+ * of programmatically precompiling regular expressions. It is constructed
+ * and used in exactly the same manner as an instance of the RE class; the
+ * only difference is that its constructors do not throw REException.
+ * Instead, if a syntax error is encountered during construction, a
+ * RuntimeException will be thrown.
+ * <P>
+ * Note that this makes UncheckedRE dangerous if constructed with
+ * dynamic data. Do not use UncheckedRE unless you are completely sure
+ * that all input being passed to it contains valid, well-formed
+ * regular expressions for the syntax specified.
+ *
+ * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
+ * @see gnu.regexp.RE
+ * @since gnu.regexp 1.1.4
+ */
+
+public final class UncheckedRE extends RE {
+ /**
+ * Constructs a regular expression pattern buffer without any compilation
+ * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @exception RuntimeException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public UncheckedRE(Object pattern) {
+ this(pattern,0,RESyntax.RE_SYNTAX_PERL5);
+ }
+
+ /**
+ * Constructs a regular expression pattern buffer using the specified
+ * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer, or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @param cflags The logical OR of any combination of the compilation flags in the RE class.
+ * @exception RuntimeException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public UncheckedRE(Object pattern, int cflags) {
+ this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5);
+ }
+
+ /**
+ * Constructs a regular expression pattern buffer using the specified
+ * compilation flags and regular expression syntax.
+ *
+ * @param pattern A regular expression pattern, in the form of a String,
+ * StringBuffer, or char[]. Other input types will be converted to
+ * strings using the toString() method.
+ * @param cflags The logical OR of any combination of the compilation flags in the RE class.
+ * @param syntax The type of regular expression syntax to use.
+ * @exception RuntimeException The input pattern could not be parsed.
+ * @exception NullPointerException The pattern was null.
+ */
+ public UncheckedRE(Object pattern, int cflags, RESyntax syntax) {
+ try {
+ initialize(pattern,cflags,syntax,0,0);
+ } catch (REException e) {
+ throw new RuntimeException(e.getMessage());
+ }
+ }
+}
+
+
diff --git a/java/util/regex/Matcher.java b/java/util/regex/Matcher.java
index ef65557b6..28835d294 100644
--- a/java/util/regex/Matcher.java
+++ b/java/util/regex/Matcher.java
@@ -1,5 +1,5 @@
-/* Matcher.java --
- Copyright (C) 2002 Free Software Foundation, Inc.
+/* Matcher.java -- Instance of a regular expression applied to a char sequence.
+ Copyright (C) 2002, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -38,13 +38,27 @@ exception statement from your version. */
package java.util.regex;
+import gnu.regexp.RE;
+import gnu.regexp.REMatch;
+
/**
- * @author Michael Koch
+ * Instance of a regular expression applied to a char sequence.
+ *
* @since 1.4
*/
public class Matcher
{
private Pattern pattern;
+ private CharSequence input;
+ private int position;
+ private int appendPosition;
+ private REMatch match;
+
+ Matcher(Pattern pattern, CharSequence input)
+ {
+ this.pattern = pattern;
+ this.input = input;
+ }
/**
* @param sb The target string buffer
@@ -58,7 +72,12 @@ public class Matcher
public Matcher appendReplacement (StringBuffer sb, String replacement)
throws IllegalStateException
{
- throw new Error("Not implemented");
+ assertMatchOp();
+ sb.append(input.subSequence(appendPosition,
+ match.getStartIndex()).toString());
+ sb.append(match.substituteInto(replacement));
+ appendPosition = match.getEndIndex();
+ return this;
}
/**
@@ -66,7 +85,8 @@ public class Matcher
*/
public StringBuffer appendTail (StringBuffer sb)
{
- throw new Error("Not implemented");
+ sb.append(input.subSequence(appendPosition, input.length()).toString());
+ return sb;
}
/**
@@ -76,7 +96,8 @@ public class Matcher
public int end ()
throws IllegalStateException
{
- throw new Error ("Not implemented");
+ assertMatchOp();
+ return match.getEndIndex();
}
/**
@@ -90,14 +111,36 @@ public class Matcher
public int end (int group)
throws IllegalStateException
{
- throw new Error ("Not implemented");
+ assertMatchOp();
+ return match.getEndIndex(group);
}
public boolean find ()
{
- throw new Error ("Not implemented");
- }
-
+ boolean first = (match == null);
+ match = pattern.getRE().getMatch(input, position);
+ if (match != null)
+ {
+ int endIndex = match.getEndIndex();
+ // Are we stuck at the same position?
+ if (!first && endIndex == position)
+ {
+ match = null;
+ // Not at the end of the input yet?
+ if (position < input.length() - 1)
+ {
+ position++;
+ return find(position);
+ }
+ else
+ return false;
+ }
+ position = endIndex;
+ return true;
+ }
+ return false;
+ }
+
/**
* @param start The index to start the new pattern matching
*
@@ -106,7 +149,13 @@ public class Matcher
*/
public boolean find (int start)
{
- throw new Error ("Not implemented");
+ match = pattern.getRE().getMatch(input, start);
+ if (match != null)
+ {
+ position = match.getEndIndex();
+ return true;
+ }
+ return false;
}
/**
@@ -115,7 +164,8 @@ public class Matcher
*/
public String group ()
{
- throw new Error ("Not implemented");
+ assertMatchOp();
+ return match.toString();
}
/**
@@ -129,7 +179,8 @@ public class Matcher
public String group (int group)
throws IllegalStateException
{
- throw new Error ("Not implemented");
+ assertMatchOp();
+ return match.toString(group);
}
/**
@@ -137,7 +188,9 @@ public class Matcher
*/
public String replaceFirst (String replacement)
{
- throw new Error ("Not implemented");
+ reset();
+ // Semantics might not quite match
+ return pattern.getRE().substitute(input, replacement, position);
}
/**
@@ -145,17 +198,25 @@ public class Matcher
*/
public String replaceAll (String replacement)
{
- throw new Error ("Not implemented");
+ reset();
+ return pattern.getRE().substituteAll(input, replacement, position);
}
public int groupCount ()
{
- throw new Error("Not implemented");
+ return pattern.getRE().getNumSubs();
}
public boolean lookingAt ()
{
- throw new Error("Not implemented");
+ match = pattern.getRE().getMatch(input, 0);
+ if (match != null)
+ {
+ if (match.getStartIndex() == 0)
+ return true;
+ match = null;
+ }
+ return false;
}
/**
@@ -170,7 +231,7 @@ public class Matcher
*/
public boolean matches ()
{
- throw new Error("Not implemented");
+ return find(0);
}
/**
@@ -183,7 +244,9 @@ public class Matcher
public Matcher reset ()
{
- throw new Error ("Not implemented");
+ position = 0;
+ match = null;
+ return this;
}
/**
@@ -191,7 +254,8 @@ public class Matcher
*/
public Matcher reset (CharSequence input)
{
- throw new Error ("Not implemented");
+ this.input = input;
+ return reset();
}
/**
@@ -203,7 +267,8 @@ public class Matcher
public int start ()
throws IllegalStateException
{
- throw new Error("Not implemented");
+ assertMatchOp();
+ return match.getStartIndex();
}
/**
@@ -217,6 +282,12 @@ public class Matcher
public int start (int group)
throws IllegalStateException
{
- throw new Error("Not implemented");
+ assertMatchOp();
+ return match.getStartIndex(group);
+ }
+
+ private void assertMatchOp()
+ {
+ if (match == null) throw new IllegalStateException();
}
}
diff --git a/java/util/regex/Pattern.java b/java/util/regex/Pattern.java
index 7d99b92bb..d30096049 100644
--- a/java/util/regex/Pattern.java
+++ b/java/util/regex/Pattern.java
@@ -1,5 +1,5 @@
-/* Pattern.java --
- Copyright (C) 2002 Free Software Foundation, Inc.
+/* Pattern.java -- Compiled regular expression ready to be applied.
+ Copyright (C) 2002, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -35,13 +35,19 @@ this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
-// Stub class until java.util.regex is implemented.
package java.util.regex;
+import gnu.regexp.RE;
+import gnu.regexp.RESyntax;
+import gnu.regexp.REException;
+
import java.io.Serializable;
+import java.util.ArrayList;
+
/**
- * @author Michael Koch
+ * Compiled regular expression ready to be applied.
+ *
* @since 1.4
*/
public class Pattern implements Serializable
@@ -56,8 +62,10 @@ public class Pattern implements Serializable
public static final int UNICODE_CASE = 64;
public static final int UNIX_LINES = 1;
- private String regex;
- private int flags;
+ private final String regex;
+ private final int flags;
+
+ private final RE re;
private Pattern (String regex)
throws PatternSyntaxException
@@ -71,9 +79,48 @@ public class Pattern implements Serializable
this.regex = regex;
this.flags = flags;
- throw new Error ("Not implemented");
+ int gnuFlags = 0;
+ if ((flags & CASE_INSENSITIVE) != 0)
+ gnuFlags |= RE.REG_ICASE;
+ if ((flags & MULTILINE) != 0)
+ gnuFlags |= RE.REG_MULTILINE;
+ if ((flags & DOTALL) != 0)
+ gnuFlags |= RE.REG_DOT_NEWLINE;
+ // not yet supported:
+ // if ((flags & UNICODE_CASE) != 0) gnuFlags =
+ // if ((flags & CANON_EQ) != 0) gnuFlags =
+
+ // Eventually there will be such a thing as JDK 1_4 syntax
+ RESyntax syntax = RESyntax.RE_SYNTAX_PERL5;
+ if ((flags & UNIX_LINES) != 0)
+ {
+ // Use a syntax set with \n for linefeeds?
+ syntax = new RESyntax(syntax);
+ syntax.setLineSeparator("\n");
+ }
+
+ if ((flags & COMMENTS) != 0)
+ {
+ // Use a syntax with support for comments?
+ }
+
+ try
+ {
+ this.re = new RE(regex, gnuFlags, syntax);
+ }
+ catch (REException e)
+ {
+ throw new PatternSyntaxException(e.getMessage(),
+ regex, e.getPosition());
+ }
}
+ // package private accessor method
+ RE getRE()
+ {
+ return re;
+ }
+
/**
* @param regex The regular expression
*
@@ -82,7 +129,7 @@ public class Pattern implements Serializable
public static Pattern compile (String regex)
throws PatternSyntaxException
{
- throw new Error ("Not implemented");
+ return compile(regex, 0);
}
/**
@@ -116,7 +163,7 @@ public class Pattern implements Serializable
*/
public static boolean matches (String regex, CharSequence input)
{
- throw new Error ("Not implemented");
+ return compile(regex).matcher(input).matches();
}
/**
@@ -124,7 +171,7 @@ public class Pattern implements Serializable
*/
public Matcher matcher (CharSequence input)
{
- throw new Error ("Not implemented");
+ return new Matcher(this, input);
}
/**
@@ -132,7 +179,7 @@ public class Pattern implements Serializable
*/
public String[] split (CharSequence input)
{
- throw new Error ("Not implemented");
+ return split(input, 0);
}
/**
@@ -141,11 +188,67 @@ public class Pattern implements Serializable
*/
public String[] split (CharSequence input, int limit)
{
- throw new Error ("Not implemented");
+ Matcher matcher = new Matcher(this, input);
+ ArrayList list = new ArrayList();
+ int empties = 0;
+ int count = 0;
+ int start = 0;
+ int end;
+ boolean matched;
+
+ while (matched = matcher.find() && (limit <= 0 || count < limit - 1))
+ {
+ ++count;
+ end = matcher.start();
+ if (start == end)
+ empties++;
+ else
+ {
+ while (empties-- > 0)
+ list.add("");
+
+ String text = input.subSequence(start, end).toString();
+ list.add(text);
+ }
+ start = matcher.end();
+ }
+
+ // We matched nothing.
+ if (!matched && count == 0)
+ return new String[] { input.toString() };
+
+ // Is the last token empty?
+ boolean emptyLast = (start == input.length());
+
+ // Can/Must we add empties or an extra last token at the end?
+ if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
+ {
+ if (limit > list.size())
+ {
+ int max = limit - list.size();
+ empties = (empties > max) ? max : empties;
+ }
+ while (empties-- > 0)
+ list.add("");
+ }
+
+ // last token at end
+ if (limit != 0 || (limit == 0 && !emptyLast))
+ {
+ String t = input.subSequence(start, input.length()).toString();
+ if ("".equals(t) && limit == 0)
+ ; // Don't add.
+ else
+ list.add(t);
+ }
+
+ String[] output = new String [list.size()];
+ list.toArray(output);
+ return output;
}
public String pattern ()
{
- throw new Error ("Not implemented");
+ return regex;
}
}
diff --git a/resource/gnu/Makefile.am b/resource/gnu/Makefile.am
index 8b1b73918..3353b2e09 100644
--- a/resource/gnu/Makefile.am
+++ b/resource/gnu/Makefile.am
@@ -1,4 +1,4 @@
## used by automake to generate Makefile.in
-SUBDIRS = java
+SUBDIRS = java regexp
diff --git a/resource/gnu/regexp/Makefile.am b/resource/gnu/regexp/Makefile.am
new file mode 100644
index 000000000..eac02040f
--- /dev/null
+++ b/resource/gnu/regexp/Makefile.am
@@ -0,0 +1,3 @@
+# automake uses this file to generate Makefile.in
+
+EXTRA_DIST = MessagesBundle.properties MessagesBundle_fr.properties
diff --git a/resource/gnu/regexp/MessagesBundle.properties b/resource/gnu/regexp/MessagesBundle.properties
new file mode 100644
index 000000000..1e077a403
--- /dev/null
+++ b/resource/gnu/regexp/MessagesBundle.properties
@@ -0,0 +1,22 @@
+# Localized error messages for gnu.regexp
+
+# Prefix for REException messages
+error.prefix=At position {0} in regular expression pattern:
+
+# REException (parse error) messages
+repeat.assertion=repeated token is zero-width assertion
+repeat.chained=attempted to repeat a token that is already repeated
+repeat.no.token=quantifier (?*+{}) without preceding token
+repeat.empty.token=repeated token may be empty
+unmatched.brace=unmatched brace
+unmatched.bracket=unmatched bracket
+unmatched.paren=unmatched parenthesis
+interval.no.end=expected end of interval
+class.no.end=expected end of character class
+subexpr.no.end=expected end of subexpression
+interval.order=interval minimum is greater than maximum
+interval.error=interval is empty or contains illegal characters
+ends.with.backslash=backslash at end of pattern
+
+# RESyntax message
+syntax.final=Syntax has been declared final and cannot be modified
diff --git a/resource/gnu/regexp/MessagesBundle_fr.properties b/resource/gnu/regexp/MessagesBundle_fr.properties
new file mode 100644
index 000000000..8ab8356c1
--- /dev/null
+++ b/resource/gnu/regexp/MessagesBundle_fr.properties
@@ -0,0 +1,22 @@
+# Localized error messages for gnu.regexp
+
+# Prefix for REException messages
+error.prefix=A l''index {0} dans le modèle d''expression régulière:
+
+# REException (parse error) messages
+repeat.assertion=l'élément répété est de largeur zéro
+repeat.chained=tentative de répétition d'un élément déjà répété
+repeat.no.token=quantifieur (?*+{}) sans élément précédent
+repeat.empty.token=l'élément répété peut être vide
+unmatched.brace=accolade inégalée
+unmatched.bracket=crochet inégalé
+unmatched.paren=parenthèse inégalée
+interval.no.end=fin d'interval attendue
+class.no.end=fin de classe de caractères attendue
+subexpr.no.end=fin de sous-expression attendue
+interval.order=l'interval minimum est supérieur à l'interval maximum
+interval.error=l'interval est vide ou contient des caractères illégaux
+ends.with.backslash=antislash à la fin du modèle
+
+# RESyntax message
+syntax.final=La syntaxe a été déclarée finale et ne peut pas être modifiée