summaryrefslogtreecommitdiff
path: root/gnu/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/regexp')
-rw-r--r--gnu/regexp/RE.java54
1 files changed, 33 insertions, 21 deletions
diff --git a/gnu/regexp/RE.java b/gnu/regexp/RE.java
index a01de58f1..c8c8a3eb9 100644
--- a/gnu/regexp/RE.java
+++ b/gnu/regexp/RE.java
@@ -331,10 +331,22 @@ public class RE extends REToken {
// Buffer a token so we can create a TokenRepeated, etc.
REToken currentToken = null;
char ch;
+ boolean quot = false;
while (index < pLength) {
// read the next character unit (including backslash escapes)
- index = getCharUnit(pattern,index,unit);
+ index = getCharUnit(pattern,index,unit,quot);
+
+ if (unit.bk)
+ if (unit.ch == 'Q') {
+ quot = true;
+ continue;
+ } else if (unit.ch == 'E') {
+ quot = false;
+ continue;
+ }
+ if (quot)
+ unit.bk = false;
// ALTERNATION OPERATOR
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
@@ -342,8 +354,8 @@ public class RE extends REToken {
// TODO: the '\n' literal here should be a test against REToken.newline,
// which unfortunately may be more than a single character.
- if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
- || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
+ if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
+ || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
&& !syntax.get(RESyntax.RE_LIMITED_OPS)) {
// make everything up to here be a branch. create vector if nec.
addToken(currentToken);
@@ -363,7 +375,7 @@ public class RE extends REToken {
// OPEN QUESTION:
// what is proper interpretation of '{' at start of string?
- else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
+ else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
int newIndex = getMinMax(pattern,index,minMax,syntax);
if (newIndex > index) {
if (minMax.first > minMax.second)
@@ -388,7 +400,7 @@ public class RE extends REToken {
// LIST OPERATOR:
// [...] | [^...]
- else if ((unit.ch == '[') && !unit.bk) {
+ else if ((unit.ch == '[') && !(unit.bk || quot)) {
Vector options = new Vector();
boolean negative = false;
char lastChar = 0;
@@ -490,7 +502,7 @@ public class RE extends REToken {
// SUBEXPRESSIONS
// (...) | \(...\) depending on RE_NO_BK_PARENS
- else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
+ else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
boolean pure = false;
boolean comment = false;
boolean lookAhead = false;
@@ -537,13 +549,13 @@ public class RE extends REToken {
int nextIndex = index;
int nested = 0;
- while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
- && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
+ while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
+ && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
if ((endIndex = nextIndex) >= pLength)
throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
- else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
+ else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
nested++;
- else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
+ else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
nested--;
// endIndex is now position at a ')','\)'
@@ -572,14 +584,14 @@ public class RE extends REToken {
// UNMATCHED RIGHT PAREN
// ) or \) throw exception if
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
- else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
+ else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
}
// START OF LINE OPERATOR
// ^
- else if ((unit.ch == '^') && !unit.bk) {
+ else if ((unit.ch == '^') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
@@ -588,7 +600,7 @@ public class RE extends REToken {
// END OF LINE OPERATOR
// $
- else if ((unit.ch == '$') && !unit.bk) {
+ else if ((unit.ch == '$') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
@@ -597,7 +609,7 @@ public class RE extends REToken {
// MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
// .
- else if ((unit.ch == '.') && !unit.bk) {
+ else if ((unit.ch == '.') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
}
@@ -605,7 +617,7 @@ public class RE extends REToken {
// ZERO-OR-MORE REPEAT OPERATOR
// *
- else if ((unit.ch == '*') && !unit.bk) {
+ else if ((unit.ch == '*') && !(unit.bk || quot)) {
if (currentToken == null)
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@@ -621,7 +633,7 @@ public class RE extends REToken {
// + | \+ depending on RE_BK_PLUS_QM
// not available if RE_LIMITED_OPS is set
- else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null)
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@@ -638,7 +650,7 @@ public class RE extends REToken {
// not available if RE_LIMITED_OPS is set
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
- else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
+ else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
// Check for stingy matching on RETokenRepeated
@@ -805,9 +817,9 @@ public class RE extends REToken {
}
- private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
+ private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
unit.ch = input[index++];
- if (unit.bk = (unit.ch == '\\'))
+ if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || input[index] == 'E')))
if (index < input.length)
unit.ch = input[index++];
else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
@@ -1281,7 +1293,7 @@ public class RE extends REToken {
// Read string of digits
do {
- index = getCharUnit(input,index,unit);
+ index = getCharUnit(input,index,unit,false);
if (Character.isDigit(unit.ch))
buf.append(unit.ch);
} while ((index != input.length) && Character.isDigit(unit.ch));
@@ -1306,7 +1318,7 @@ public class RE extends REToken {
else if ((unit.ch == ',') && !unit.bk) {
buf = new StringBuffer();
// Read string of digits
- while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
+ while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
buf.append(unit.ch);
if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))