summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-15 18:24:05 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-15 18:24:05 +0000
commit546f82145e04a8f8f35c4a2a4e31787f53016fe4 (patch)
treee38d00088116d3ec26fc5c9549bc0f9591e1d1b6
parentb61d2d38ff08b09864d4ec957b65239e8b56b720 (diff)
downloadpcre-546f82145e04a8f8f35c4a2a4e31787f53016fe4.tar.gz
Add PCRE_NO_START_OPTIMIZE
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@389 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog2
-rw-r--r--doc/pcre_dfa_exec.31
-rw-r--r--doc/pcre_exec.31
-rw-r--r--doc/pcreapi.315
-rw-r--r--doc/pcrecallout.312
-rw-r--r--pcre.h.in7
-rw-r--r--pcre_compile.c2
-rw-r--r--pcre_dfa_exec.c130
-rw-r--r--pcre_exec.c156
-rw-r--r--pcre_fullinfo.c4
-rw-r--r--pcre_info.c4
-rw-r--r--pcre_internal.h7
-rw-r--r--pcretest.c4
-rw-r--r--testdata/testinput210
-rw-r--r--testdata/testinput710
-rw-r--r--testdata/testoutput253
-rw-r--r--testdata/testoutput753
17 files changed, 324 insertions, 147 deletions
diff --git a/ChangeLog b/ChangeLog
index 3c9c302..9832dd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -54,6 +54,8 @@ Version 7.9 xx-xxx-09
12. Added the -M command line option to pcretest.
14. Added the non-standard REG_NOTEMPTY option to the POSIX interface.
+
+15. Added the PCRE_NO_START_OPTIMIZE match-time option.
Version 7.8 05-Sep-08
diff --git a/doc/pcre_dfa_exec.3 b/doc/pcre_dfa_exec.3
index 274b97c..0975f4d 100644
--- a/doc/pcre_dfa_exec.3
+++ b/doc/pcre_dfa_exec.3
@@ -49,6 +49,7 @@ The options are:
PCRE_NOTBOL Subject is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match
+ PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
diff --git a/doc/pcre_exec.3 b/doc/pcre_exec.3
index 834a1f2..292fe57 100644
--- a/doc/pcre_exec.3
+++ b/doc/pcre_exec.3
@@ -44,6 +44,7 @@ The options are:
PCRE_NOTBOL Subject is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match
+ PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 0f37679..5e71486 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1237,7 +1237,8 @@ documentation for a discussion of saving compiled patterns for later use.
.sp
The unused bits of the \fIoptions\fP argument for \fBpcre_exec()\fP must be
zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
-PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NO_UTF8_CHECK and PCRE_PARTIAL.
+PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NO_START_OPTIMIZE,
+PCRE_NO_UTF8_CHECK and PCRE_PARTIAL.
.sp
PCRE_ANCHORED
.sp
@@ -1324,6 +1325,16 @@ PCRE_NOTEMPTY and PCRE_ANCHORED, and then if that fails by advancing the
starting offset (see below) and trying an ordinary match again. There is some
code that demonstrates how to do this in the \fIpcredemo.c\fP sample program.
.sp
+ PCRE_NO_START_OPTIMIZE
+.sp
+There are a number of optimizations that \fBpcre_exec()\fP uses at the start of
+a match, in order to speed up the process. For example, if it is known that a
+match must start with a specific character, it searches the subject for that
+character, and fails immediately if it cannot find it, without actually running
+the main matching function. When callouts are in use, these optimizations can
+cause them to be skipped. This option disables the "start-up" optimizations,
+causing performance to suffer, but ensuring that the callouts do occur.
+.sp
PCRE_NO_UTF8_CHECK
.sp
When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
@@ -1982,6 +1993,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 08 March 2009
+Last updated: 15 March 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
diff --git a/doc/pcrecallout.3 b/doc/pcrecallout.3
index 1258c4e..8923689 100644
--- a/doc/pcrecallout.3
+++ b/doc/pcrecallout.3
@@ -44,7 +44,8 @@ trying to optimize the performance of a particular pattern.
.rs
.sp
You should be aware that, because of optimizations in the way PCRE matches
-patterns, callouts sometimes do not happen. For example, if the pattern is
+patterns by default, callouts sometimes do not happen. For example, if the
+pattern is
.sp
ab(?C4)cd
.sp
@@ -52,6 +53,11 @@ PCRE knows that any matching string must contain the letter "d". If the subject
string is "abyz", the lack of "d" means that matching doesn't ever start, and
the callout is never reached. However, with "abyd", though the result is still
no match, the callout is obeyed.
+.P
+You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
+option to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. This slows down the
+matching process, but does ensure that callouts such as the example above are
+obeyed.
.
.
.SH "THE CALLOUT INTERFACE"
@@ -172,6 +178,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 29 May 2007
-Copyright (c) 1997-2007 University of Cambridge.
+Last updated: 15 March 2009
+Copyright (c) 1997-2009 University of Cambridge.
.fi
diff --git a/pcre.h.in b/pcre.h.in
index bb82c82..b4608e8 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,8 @@ it is needed here for malloc. */
extern "C" {
#endif
-/* Options */
+/* Options. Some are compile-time only, some are run-time only, and some are
+both, so we keep them all distinct. */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
@@ -125,6 +126,8 @@ extern "C" {
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
+#define PCRE_NO_START_OPTIMIZE 0x04000000
+#define PCRE_NO_START_OPTIMISE 0x04000000
/* Exec-time and get/set-time error codes */
diff --git a/pcre_compile.c b/pcre_compile.c
index 5f6d89f..25f7dd5 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6055,7 +6055,7 @@ if ((options & PCRE_UTF8) != 0)
}
#endif
-if ((options & ~PUBLIC_OPTIONS) != 0)
+if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
{
errorcode = ERR17;
goto PCRE_EARLY_ERROR_RETURN;
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index da0af62..248a6b1 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -2714,9 +2714,8 @@ if ((re->flags & PCRE_REQCHSET) != 0)
}
/* Call the main matching function, looping for a non-anchored regex after a
-failed match. Unless restarting, optimize by moving to the first match
-character if possible, when not anchored. Then unless wanting a partial match,
-check for a required later character. */
+failed match. If not restarting, perform certain optimizations at the start of
+a match. */
for (;;)
{
@@ -2726,11 +2725,10 @@ for (;;)
{
const uschar *save_end_subject = end_subject;
- /* Advance to a unique first char if possible. If firstline is TRUE, the
- start of the match is constrained to the first line of a multiline string.
- Implement this by temporarily adjusting end_subject so that we stop
- scanning at a newline. If the match fails at the newline, later code breaks
- this loop. */
+ /* If firstline is TRUE, the start of the match is constrained to the first
+ line of a multiline string. Implement this by temporarily adjusting
+ end_subject so that we stop scanning at a newline. If the match fails at
+ the newline, later code breaks this loop. */
if (firstline)
{
@@ -2749,63 +2747,76 @@ for (;;)
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
-
- if (first_byte >= 0)
- {
- if (first_byte_caseless)
- while (current_subject < end_subject &&
- lcc[*current_subject] != first_byte)
- current_subject++;
- else
- while (current_subject < end_subject && *current_subject != first_byte)
- current_subject++;
- }
-
- /* Or to just after a linebreak for a multiline match if possible */
-
- else if (startline)
- {
- if (current_subject > md->start_subject + start_offset)
+
+ /* There are some optimizations that avoid running the match if a known
+ starting point is not found, or if a known later character is not present.
+ However, there is an option that disables these, for testing and for
+ ensuring that all callouts do actually occur. */
+
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0)
+ {
+
+ /* Advance to a known first byte. */
+
+ if (first_byte >= 0)
{
-#ifdef SUPPORT_UTF8
- if (utf8)
+ if (first_byte_caseless)
+ while (current_subject < end_subject &&
+ lcc[*current_subject] != first_byte)
+ current_subject++;
+ else
+ while (current_subject < end_subject &&
+ *current_subject != first_byte)
+ current_subject++;
+ }
+
+ /* Or to just after a linebreak for a multiline match if possible */
+
+ else if (startline)
+ {
+ if (current_subject > md->start_subject + start_offset)
{
- while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+ if (utf8)
{
- current_subject++;
- while(current_subject < end_subject &&
- (*current_subject & 0xc0) == 0x80)
+ while (current_subject < end_subject &&
+ !WAS_NEWLINE(current_subject))
+ {
current_subject++;
+ while(current_subject < end_subject &&
+ (*current_subject & 0xc0) == 0x80)
+ current_subject++;
+ }
}
- }
- else
+ else
#endif
- while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
- current_subject++;
-
- /* If we have just passed a CR and the newline option is ANY or
- ANYCRLF, and we are now at a LF, advance the match position by one more
- character. */
-
- if (current_subject[-1] == '\r' &&
- (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
- current_subject < end_subject &&
- *current_subject == '\n')
- current_subject++;
+ while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+ current_subject++;
+
+ /* If we have just passed a CR and the newline option is ANY or
+ ANYCRLF, and we are now at a LF, advance the match position by one
+ more character. */
+
+ if (current_subject[-1] == '\r' &&
+ (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
+ current_subject < end_subject &&
+ *current_subject == '\n')
+ current_subject++;
+ }
}
- }
-
- /* Or to a non-unique first char after study */
-
- else if (start_bits != NULL)
- {
- while (current_subject < end_subject)
+
+ /* Or to a non-unique first char after study */
+
+ else if (start_bits != NULL)
{
- register unsigned int c = *current_subject;
- if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
- else break;
+ while (current_subject < end_subject)
+ {
+ register unsigned int c = *current_subject;
+ if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
+ else break;
+ }
}
- }
+ }
/* Restore fudged end_subject */
@@ -2825,10 +2836,11 @@ for (;;)
showed up when somebody was matching /^C/ on a 32-megabyte string... so we
don't do this when the string is sufficiently long.
- ALSO: this processing is disabled when partial matching is requested.
- */
+ ALSO: this processing is disabled when partial matching is requested, and can
+ also be explicitly deactivated. */
- if (req_byte >= 0 &&
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
+ req_byte >= 0 &&
end_subject - current_subject < REQ_BYTE_MAX &&
(options & PCRE_PARTIAL) == 0)
{
diff --git a/pcre_exec.c b/pcre_exec.c
index b2eeac9..28b9b93 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -4716,12 +4716,12 @@ for(;;)
register int *iend = iptr + resetcount;
while (iptr < iend) *iptr++ = -1;
}
-
- /* Advance to a unique first char if possible. If firstline is TRUE, the
- start of the match is constrained to the first line of a multiline string.
- That is, the match must be before or at the first newline. Implement this by
- temporarily adjusting end_subject so that we stop scanning at a newline. If
- the match fails at the newline, later code breaks this loop. */
+
+ /* If firstline is TRUE, the start of the match is constrained to the first
+ line of a multiline string. That is, the match must be before or at the first
+ newline. Implement this by temporarily adjusting end_subject so that we stop
+ scanning at a newline. If the match fails at the newline, later code breaks
+ this loop. */
if (firstline)
{
@@ -4740,66 +4740,74 @@ for(;;)
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
-
- /* Now advance to a unique first byte if there is one. */
-
- if (first_byte >= 0)
- {
- if (first_byte_caseless)
- while (start_match < end_subject && md->lcc[*start_match] != first_byte)
- start_match++;
- else
- while (start_match < end_subject && *start_match != first_byte)
- start_match++;
- }
-
- /* Or to just after a linebreak for a multiline match */
-
- else if (startline)
- {
- if (start_match > md->start_subject + start_offset)
+
+ /* There are some optimizations that avoid running the match if a known
+ starting point is not found, or if a known later character is not present.
+ However, there is an option that disables these, for testing and for ensuring
+ that all callouts do actually occur. */
+
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0)
+ {
+ /* Advance to a unique first byte if there is one. */
+
+ if (first_byte >= 0)
{
-#ifdef SUPPORT_UTF8
- if (utf8)
+ if (first_byte_caseless)
+ while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+ start_match++;
+ else
+ while (start_match < end_subject && *start_match != first_byte)
+ start_match++;
+ }
+
+ /* Or to just after a linebreak for a multiline match */
+
+ else if (startline)
+ {
+ if (start_match > md->start_subject + start_offset)
{
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
+#ifdef SUPPORT_UTF8
+ if (utf8)
{
- start_match++;
- while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ {
start_match++;
+ while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+ start_match++;
+ }
}
- }
- else
+ else
#endif
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
- start_match++;
-
- /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
- and we are now at a LF, advance the match position by one more character.
- */
-
- if (start_match[-1] == '\r' &&
- (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
- start_match < end_subject &&
- *start_match == '\n')
- start_match++;
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ start_match++;
+
+ /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
+ and we are now at a LF, advance the match position by one more character.
+ */
+
+ if (start_match[-1] == '\r' &&
+ (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
+ start_match < end_subject &&
+ *start_match == '\n')
+ start_match++;
+ }
}
- }
-
- /* Or to a non-unique first byte after study */
-
- else if (start_bits != NULL)
- {
- while (start_match < end_subject)
+
+ /* Or to a non-unique first byte after study */
+
+ else if (start_bits != NULL)
{
- register unsigned int c = *start_match;
- if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
- else break;
+ while (start_match < end_subject)
+ {
+ register unsigned int c = *start_match;
+ if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
+ else break;
+ }
}
- }
-
+ } /* Starting optimizations */
+
/* Restore fudged end_subject */
-
+
end_subject = save_end_subject;
#ifdef DEBUG /* Sigh. Some compilers never learn. */
@@ -4808,23 +4816,25 @@ for(;;)
printf("\n");
#endif
- /* If req_byte is set, we know that that character must appear in the subject
- for the match to succeed. If the first character is set, req_byte must be
- later in the subject; otherwise the test starts at the match point. This
- optimization can save a huge amount of backtracking in patterns with nested
- unlimited repeats that aren't going to match. Writing separate code for
- cased/caseless versions makes it go faster, as does using an autoincrement
- and backing off on a match.
-
- HOWEVER: when the subject string is very, very long, searching to its end can
- take a long time, and give bad performance on quite ordinary patterns. This
- showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
- string... so we don't do this when the string is sufficiently long.
-
- ALSO: this processing is disabled when partial matching is requested.
- */
-
- if (req_byte >= 0 &&
+ /* If req_byte is set, we know that that character must appear in the
+ subject for the match to succeed. If the first character is set, req_byte
+ must be later in the subject; otherwise the test starts at the match point.
+ This optimization can save a huge amount of backtracking in patterns with
+ nested unlimited repeats that aren't going to match. Writing separate code
+ for cased/caseless versions makes it go faster, as does using an
+ autoincrement and backing off on a match.
+
+ HOWEVER: when the subject string is very, very long, searching to its end
+ can take a long time, and give bad performance on quite ordinary patterns.
+ This showed up when somebody was matching something like /^\d+C/ on a
+ 32-megabyte string... so we don't do this when the string is sufficiently
+ long.
+
+ ALSO: this processing is disabled when partial matching is requested, or if
+ disabling is explicitly requested. */
+
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
+ req_byte >= 0 &&
end_subject - start_match < REQ_BYTE_MAX &&
!md->partial)
{
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 30566bb..3a343bd 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -89,7 +89,7 @@ if (re->magic_number != MAGIC_NUMBER)
switch (what)
{
case PCRE_INFO_OPTIONS:
- *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
+ *((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
break;
case PCRE_INFO_SIZE:
diff --git a/pcre_info.c b/pcre_info.c
index 02cf1c9..f35f398 100644
--- a/pcre_info.c
+++ b/pcre_info.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -83,7 +83,7 @@ if (re->magic_number != MAGIC_NUMBER)
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
}
-if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
+if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
if (first_byte != NULL)
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
diff --git a/pcre_internal.h b/pcre_internal.h
index c836a40..de46426 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -540,7 +540,7 @@ time, run time, or study time, respectively. */
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
PCRE_NEWLINE_ANYCRLF)
-#define PUBLIC_OPTIONS \
+#define PUBLIC_COMPILE_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
@@ -549,12 +549,13 @@ time, run time, or study time, respectively. */
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
- PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
+ PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
+ PCRE_NO_START_OPTIMIZE)
#define PUBLIC_DFA_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
- PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
+ PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
diff --git a/pcretest.c b/pcretest.c
index a597f7a..dd40e3f 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -2009,6 +2009,10 @@ while (!done)
case 'S':
show_malloc = 1;
continue;
+
+ case 'Y':
+ options |= PCRE_NO_START_OPTIMIZE;
+ continue;
case 'Z':
options |= PCRE_NOTEOL;
diff --git a/testdata/testinput2 b/testdata/testinput2
index cf3a38b..e84a49d 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2739,4 +2739,14 @@ a random value. /Ix
** Failers
ddd\N
+/xyz/C
+ xyz
+ abcxyz
+ abcxyz\Y
+ ** Failers
+ abc
+ abc\Y
+ abcxypqr
+ abcxypqr\Y
+
/ End of testinput2 /
diff --git a/testdata/testinput7 b/testdata/testinput7
index dbe9c1a..e9f18cd 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4401,4 +4401,14 @@
X
X\n
+/xyz/C
+ xyz
+ abcxyz
+ abcxyz\Y
+ ** Failers
+ abc
+ abc\Y
+ abcxypqr
+ abcxypqr\Y
+
/ End of testinput7 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index b38b65b..ddde568 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -9698,4 +9698,57 @@ No need char
ddd\N
No match: POSIX code 17: match failed
+/xyz/C
+ xyz
+--->xyz
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ abcxyz
+--->abcxyz
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ abcxyz\Y
+--->abcxyz
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ ** Failers
+No match
+ abc
+No match
+ abc\Y
+--->abc
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+No match
+ abcxypqr
+No match
+ abcxypqr\Y
+--->abcxypqr
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+No match
+
/ End of testinput2 /
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 2756a67..49479e4 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -7268,4 +7268,57 @@ No match
X\n
0: X
+/xyz/C
+ xyz
+--->xyz
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ abcxyz
+--->abcxyz
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ abcxyz\Y
+--->abcxyz
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +3 ^ ^
+ 0: xyz
+ ** Failers
+No match
+ abc
+No match
+ abc\Y
+--->abc
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+No match
+ abcxypqr
+No match
+ abcxypqr\Y
+--->abcxypqr
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +1 ^^ y
+ +2 ^ ^ z
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+ +0 ^ x
+No match
+
/ End of testinput7 /