summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Love <fx@gnu.org>1999-12-15 15:15:29 +0000
committerDave Love <fx@gnu.org>1999-12-15 15:15:29 +0000
commitd6156acf588e9e8cd572f8723f6ad34f830a68a0 (patch)
tree73938ff21eea8a9eca35ff8998b417166f197268
parent9d5dde0586a8f360fcfbc348a343515e49816e48 (diff)
downloadgnulib-d6156acf588e9e8cd572f8723f6ad34f830a68a0.tar.gz
1999-12-15 Kenichi Handa <handa@etl.go.jp>
* regex.c (regex_compile): Adjusted for the change of CHAR_STRING. 1999-12-04 Stefan Monnier <monnier@cs.yale.edu> * regex.c (regex_compile): Recognize *?, +? and ?? as non-greedy operators and handle them properly. * regex.h (RE_ALL_GREEDY): New option. (RE_UNMATCHED_RIGHT_PAREN_ORD): Moved to the end where alphabetic sorting would put it. (RE_SYNTAX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP) (_RE_SYNTAX_POSIX_COMMON): Use the new option to keep old behavior.
-rw-r--r--regex.c51
1 files changed, 46 insertions, 5 deletions
diff --git a/regex.c b/regex.c
index 3cf8a139a4..6660f07cac 100644
--- a/regex.c
+++ b/regex.c
@@ -2168,6 +2168,7 @@ regex_compile (pattern, size, syntax, bufp)
/* 1 means zero (many) matches is allowed. */
char zero_times_ok = 0, many_times_ok = 0;
+ char greedy = 1;
/* If there is a sequence of repetition chars, collapse it
down to just one (the right one). We can't combine
@@ -2176,8 +2177,14 @@ regex_compile (pattern, size, syntax, bufp)
for (;;)
{
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
+ if (!(syntax & RE_ALL_GREEDY)
+ && c == '?' && (zero_times_ok || many_times_ok))
+ greedy = 0;
+ else
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+ }
if (p == pend)
break;
@@ -2218,6 +2225,8 @@ regex_compile (pattern, size, syntax, bufp)
/* Now we know whether or not zero matches is allowed
and also whether or not two or more matches is allowed. */
+ if (greedy)
+ {
if (many_times_ok)
{ /* More than one repetition is allowed, so put in at the
end a backward relative jump from `b' to before the next
@@ -2276,7 +2285,39 @@ regex_compile (pattern, size, syntax, bufp)
INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
b += 3;
}
- }
+
+ }
+ else /* not greedy */
+ { /* I wish the greedy and non-greedy cases could be merged. */
+
+ if (many_times_ok)
+ {
+ /* The greedy multiple match looks like a repeat..until:
+ we only need a conditional jump at the end of the loop */
+ GET_BUFFER_SPACE (3);
+ STORE_JUMP (on_failure_jump, b, laststart);
+ b += 3;
+ if (zero_times_ok)
+ {
+ /* The repeat...until naturally matches one or more.
+ To also match zero times, we need to first jump to
+ the end of the loop (its conditional jump). */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b);
+ b += 3;
+ }
+ }
+ else
+ {
+ /* non-greedy a?? */
+ GET_BUFFER_SPACE (6);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ }
break;
@@ -3110,8 +3151,8 @@ regex_compile (pattern, size, syntax, bufp)
#ifdef emacs
if (! SINGLE_BYTE_CHAR_P (c))
{
- unsigned char work[4], *str;
- int i = CHAR_STRING (c, work, str);
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ int i = CHAR_STRING (c, str);
int j;
for (j = 0; j < i; j++)
{