summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/pcre2test.132
-rw-r--r--src/pcre2.h.in37
-rw-r--r--src/pcre2_error.c7
-rw-r--r--src/pcre2_substitute.c94
-rw-r--r--src/pcre2test.c247
-rw-r--r--testdata/grepoutput14
-rw-r--r--testdata/testinput261
-rw-r--r--testdata/testinput55
-rw-r--r--testdata/testoutput102
-rw-r--r--testdata/testoutput12-164
-rw-r--r--testdata/testoutput1424
-rw-r--r--testdata/testoutput1630
-rw-r--r--testdata/testoutput2102
-rw-r--r--testdata/testoutput56
-rw-r--r--testdata/testoutput624
-rw-r--r--testdata/testoutput72
16 files changed, 520 insertions, 171 deletions
diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index 8434bc4..2dbe5d3 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "02 November 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "09 November 2014" "PCRE 10.00"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -447,7 +447,6 @@ about the pattern:
posix use the POSIX API
stackguard=<number> test the stackguard feature
tables=[0|1|2] select internal tables
- use_length use the pattern's length
.sp
The effects of these modifiers are described in the following sections.
FIXME: Give more examples.
@@ -497,15 +496,10 @@ pairs. For example:
/ab 32 59/hex
.sp
This feature is provided as a way of creating patterns that contain binary zero
-characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
-.
-.
-.SS "Using the pattern's length"
-.rs
-.sp
-By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
-\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
-length of the pattern is passed. This is implied if \fBhex\fP is set.
+characters. By default, \fBpcre2test\fP passes patterns as zero-terminated
+strings to \fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED.
+However, for patterns specified in hexadecimal, the length of the pattern is
+passed.
.
.
.SS "JIT compilation"
@@ -726,6 +720,7 @@ pattern.
ovector=<n> set size of output vector
recursion_limit=<n> set a recursion limit
startchar show startchar when relevant
+ zero_terminate pass the subject as zero-terminated
.sp
The effects of these modifiers are described in the following sections.
FIXME: Give more examples.
@@ -931,6 +926,19 @@ create a match block with a zero-length ovector; there is always one pair of
offsets.)
.
.
+.SS "Passing the subject as zero-terminated"
+.rs
+.sp
+By default, the subject string is passed to a native API matching function with
+its correct length. In order to test the facility for passing a zero-terminated
+string, the \fBzero_terminate\fP modifier is provided. It causes the length to
+be passed as PCRE2_ZERO_TERMINATED. (When matching via the POSIX interface,
+this modifier has no effect, as there is no facility for passing a length.)
+.P
+When testing \fBpcre2_substitute\fP, this modifier also has the effect of
+passing the replacement string as zero-terminated.
+.
+.
.SH "THE ALTERNATIVE MATCHING FUNCTION"
.rs
.sp
@@ -1192,6 +1200,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 02 November 2014
+Last updated: 09 November 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 0ea211c..1dd5906 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -206,24 +206,25 @@ context functions. */
#define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34)
-#define PCRE2_ERROR_BADUTFOFFSET (-35)
-#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_DFA_BADRESTART (-37)
-#define PCRE2_ERROR_DFA_RECURSE (-38)
-#define PCRE2_ERROR_DFA_UCOND (-39)
-#define PCRE2_ERROR_DFA_UITEM (-40)
-#define PCRE2_ERROR_DFA_WSSIZE (-41)
-#define PCRE2_ERROR_INTERNAL (-42)
-#define PCRE2_ERROR_JIT_BADOPTION (-43)
-#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
-#define PCRE2_ERROR_MATCHLIMIT (-45)
-#define PCRE2_ERROR_NOMEMORY (-46)
-#define PCRE2_ERROR_NOSUBSTRING (-47)
-#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
-#define PCRE2_ERROR_NULL (-49)
-#define PCRE2_ERROR_RECURSELOOP (-50)
-#define PCRE2_ERROR_RECURSIONLIMIT (-51)
-#define PCRE2_ERROR_UNSET (-52)
+#define PCRE2_ERROR_BADREPLACEMENT (-35)
+#define PCRE2_ERROR_BADUTFOFFSET (-36)
+#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART (-38)
+#define PCRE2_ERROR_DFA_RECURSE (-39)
+#define PCRE2_ERROR_DFA_UCOND (-40)
+#define PCRE2_ERROR_DFA_UITEM (-41)
+#define PCRE2_ERROR_DFA_WSSIZE (-42)
+#define PCRE2_ERROR_INTERNAL (-43)
+#define PCRE2_ERROR_JIT_BADOPTION (-44)
+#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
+#define PCRE2_ERROR_MATCHLIMIT (-46)
+#define PCRE2_ERROR_NOMEMORY (-47)
+#define PCRE2_ERROR_NOSUBSTRING (-48)
+#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
+#define PCRE2_ERROR_NULL (-50)
+#define PCRE2_ERROR_RECURSELOOP (-51)
+#define PCRE2_ERROR_RECURSIONLIMIT (-52)
+#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index 45b25cd..bd38714 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -206,24 +206,25 @@ static const char match_error_texts[] =
"bad offset value\0"
"bad option value\0"
/* 35 */
+ "invalid replacement string\0"
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
- "backreference condition or recursion test not supported for DFA matching\0"
/* 40 */
+ "backreference condition or recursion test not supported for DFA matching\0"
"item unsupported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
"bad JIT option\0"
- "JIT stack limit reached\0"
/* 45 */
+ "JIT stack limit reached\0"
"match limit exceeded\0"
"no more memory\0"
"unknown or unset substring\0"
"non-unique substring name\0"
- "NULL argument passed\0"
/* 50 */
+ "NULL argument passed\0"
"nested recursion at the same subject position\0"
"recursion limit exceeded\0"
"requested value is not set\0"
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 7bc6da9..1100c93 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -51,7 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
*************************************************/
/* This function applies a compiled re to a subject string and creates a new
-string with substitutione. The first 7 arguments are the same as for
+string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
Arguments:
@@ -69,6 +69,7 @@ Arguments:
Returns: > 0 number of substitutions made
< 0 an error code, including PCRE2_ERROR_NOMATCH if no match
+ PCRE2_ERROR_BADREPLACEMENT means invalid use of $
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -86,6 +87,11 @@ BOOL global = FALSE;
PCRE2_SIZE buff_offset, lengthleft, endlength;
PCRE2_SIZE *ovector;
+/* Partial matching is not valid. */
+
+if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
/* If no match data block is provided, create one. */
if (match_data == NULL)
@@ -129,11 +135,16 @@ do
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
+
+ /* Any error other than no match returns the error code. No match when not
+ doing the special after-empty-match global rematch, or when at the end of the
+ subject, breaks the global loop. Otherwise, advance the starting point and
+ try again. */
if (rc < 0)
{
- if (goptions == 0 || rc != PCRE2_ERROR_NOMATCH || start_offset >= length)
- break;
+ if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
+ if (goptions == 0 || start_offset >= length) break;
start_offset++;
if ((code->overall_options & PCRE2_UTF) != 0)
{
@@ -149,6 +160,8 @@ do
goptions = 0;
continue;
}
+
+ /* Handle a successful match. */
subs++;
if (rc == 0) rc = ovector_count;
@@ -161,29 +174,34 @@ do
for (i = 0; i < rlength; i++)
{
- if (replacement[i] == CHAR_DOLLAR_SIGN && i != rlength - 1)
+ if (replacement[i] == CHAR_DOLLAR_SIGN)
{
- int group = -1;
- int n = 0;
- BOOL inparens = FALSE;
- PCRE2_SIZE j = i + 1;
- PCRE2_SIZE sublength;
- PCRE2_UCHAR next = replacement[j];
- PCRE2_UCHAR name[33];
+ int group, n;
+ BOOL inparens;
+ PCRE2_SIZE sublength;
+ PCRE2_UCHAR next;
+ PCRE2_UCHAR name[33];
+
+ if (++i == rlength) goto BAD;
+ if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
+
+ group = -1;
+ n = 0;
+ inparens = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
- if (j == rlength - 1) goto LITERAL;
+ if (++i == rlength) goto BAD;
+ next = replacement[i];
inparens = TRUE;
- next = replacement[++j];
}
if (next >= CHAR_0 && next <= CHAR_9)
{
group = next - CHAR_0;
- while (j < rlength - 1)
+ while (i < rlength - 1)
{
- next = replacement[++j];
+ next = replacement[++i];
if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0;
}
@@ -194,31 +212,31 @@ do
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
{
name[n++] = next;
- if (n > 32) goto LITERAL;
- if (j == rlength - 1) break;
- next = replacement[++j];
+ if (n > 32) goto BAD;
+ if (i == rlength) break;
+ next = replacement[++i];
}
+ if (n == 0) goto BAD;
name[n] = 0;
}
if (inparens)
{
- if (j == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto LITERAL;
+ if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
}
- else j--; /* Last code unit of name/number */
-
+ else i--; /* Last code unit of name/number */
+
/* Have found a syntactically correct group number or name. */
- i = j; /* Where to continue from */
-
+ sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else
rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength);
-
- if (rc < 0) goto EXIT;
+
+ if (rc < 0) goto EXIT;
buff_offset += sublength;
lengthleft -= sublength;
}
@@ -242,20 +260,16 @@ do
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
} while (global); /* Repeat "do" loop */
-/* No match is a "normal" end; copy the rest of the subject and return the
-number of substitutions. */
+/* Copy the rest of the subject and return the number of substitutions. */
-if (rc == PCRE2_ERROR_NOMATCH)
- {
- rc = subs;
- endlength = length - start_offset;
- if (endlength + 1 >= lengthleft) goto NOROOM;
- memcpy(buffer + buff_offset, subject + start_offset,
- endlength*(PCRE2_CODE_UNIT_WIDTH/8));
- buff_offset += endlength;
- buffer[buff_offset] = 0;
- *blength = buff_offset;
- }
+rc = subs;
+endlength = length - start_offset;
+if (endlength + 1 > lengthleft) goto NOROOM;
+memcpy(buffer + buff_offset, subject + start_offset,
+ endlength*(PCRE2_CODE_UNIT_WIDTH/8));
+buff_offset += endlength;
+buffer[buff_offset] = 0;
+*blength = buff_offset;
EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
@@ -264,6 +278,10 @@ return rc;
NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;
+
+BAD:
+rc = PCRE2_ERROR_BADREPLACEMENT;
+goto EXIT;
}
/* End of pcre2_substitute.c */
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 5b5c937..b652fa2 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -165,6 +165,7 @@ void vms_setsymbol( char *, char *, int );
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */
+#define REPLACE_BUFFSIZE 400 /* For replacement strings */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
/* Execution modes */
@@ -345,9 +346,9 @@ either on a pattern or a data line, so they must all be distinct. */
#define CTL_JITVERIFY 0x00010000u
#define CTL_MARK 0x00020000u
#define CTL_MEMORY 0x00040000u
-#define CTL_PATLEN 0x00080000u
-#define CTL_POSIX 0x00100000u
-#define CTL_STARTCHAR 0x00200000u
+#define CTL_POSIX 0x00080000u
+#define CTL_STARTCHAR 0x00100000u
+#define CTL_ZERO_TERMINATE 0x00200000u
#define CTL_BSR_SET 0x80000000u /* This is informational */
#define CTL_NL_SET 0x40000000u /* This is informational */
@@ -376,6 +377,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t stackguard_test;
uint32_t tables_id;
uint8_t locale[32];
+ uint8_t replacement[REPLACE_BUFFSIZE];
} patctl;
#define MAXCPYGET 10
@@ -485,13 +487,14 @@ static modstruct modlist[] = {
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
+ { "replace", MOD_PAT, MOD_STR, 0, PO(replacement) },
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
- { "use_length", MOD_PAT, MOD_CTL, CTL_PATLEN, PO(control) },
- { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }
+ { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
+ { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};
#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
@@ -945,6 +948,17 @@ are supported. */
else \
pcre2_set_recursion_limit_32(G(a,32),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ if (test_mode == PCRE8_MODE) \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
+ else if (test_mode == PCRE16_MODE) \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
+ else \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
+
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == PCRE8_MODE) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
@@ -1298,6 +1312,16 @@ the three different cases. */
else \
G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
+ G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
+ (G(PCRE2_UCHAR,BITONE) *)k,l); \
+ else \
+ a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
+ G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
+ (G(PCRE2_UCHAR,BITTWO) *)k,l)
+
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
@@ -1466,6 +1490,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1544,6 +1571,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1622,6 +1652,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -3199,9 +3232,9 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "",
- ((controls & CTL_PATLEN) != 0)? " use_length" : "",
((controls & CTL_POSIX) != 0)? " posix" : "",
- ((controls & CTL_STARTCHAR) != 0)? " startchar" : "");
+ ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
+ ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
}
@@ -3672,6 +3705,7 @@ patlen = p - buffer - 2;
/* Look for modifiers and options after the final delimiter. */
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
+utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
specified. */
@@ -3679,7 +3713,6 @@ specified. */
if (pat_patctl.jit == 0 &&
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
pat_patctl.jit = 7;
-utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
in callouts. Convert to binary if required. */
@@ -3786,6 +3819,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
/* Check for features that the POSIX interface does not support. */
if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
+ if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
if (timeit > 0) prmsg(&msg, "timing");
@@ -3863,11 +3897,11 @@ switch(errorcode)
break;
}
-/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By
+/* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
default, however, we pass a zero-terminated pattern. The length is passed only
-if we had a hex pattern or if use_length was set. */
+if we had a hex pattern. */
-if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1;
+if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
/* Compile many times when timing. */
@@ -4491,22 +4525,6 @@ SET(*q, 0);
len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
ulen = len/code_unit_size; /* Length in code units */
-/* If we have explicit valgrind support, mark the data from after its end to
-the end of the buffer as unaddressable, so that a read over the end of the
-buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
-building with valgrind support, at least move the data to the end of the buffer
-so that it might at least cause a crash. If we are using the POSIX interface,
-we must include the terminating zero. */
-
-pp = dbuffer;
-c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0;
-
-#ifdef SUPPORT_VALGRIND
- VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
-#else
- pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
-#endif
-
/* If the string was terminated by \= we must now interpret modifiers. */
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
@@ -4522,10 +4540,27 @@ if (c - (c & -c) != 0)
return PR_OK;
}
-/* Now run the pattern match: len contains the byte length, ulen contains the
-code unit length, and pp points to the subject string. POSIX matching is only
-possible in 8-bit mode, and it does not support timing or other fancy features.
-Some were checked at compile time, but we need to check the match-time settings
+/* If we have explicit valgrind support, mark the data from after its end to
+the end of the buffer as unaddressable, so that a read over the end of the
+buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
+building with valgrind support, at least move the data to the end of the buffer
+so that it might at least cause a crash. If we are using the POSIX interface,
+or testing zero-termination, we must include the terminating zero. */
+
+pp = dbuffer;
+c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
+ (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
+
+#ifdef SUPPORT_VALGRIND
+ VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
+#else
+ pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
+#endif
+
+/* We now have len containing the byte length, ulen containing the code unit
+length, and pp pointing to the subject string. POSIX matching is only possible
+in 8-bit mode, and it does not support timing or other fancy features. Some
+were checked at compile time, but we need to check the match-time settings
here. */
#ifdef SUPPORT_PCRE2_8
@@ -4621,6 +4656,11 @@ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
dat_datctl.control &= ~CTL_ALLUSEDTEXT;
}
+/* Handle passing the subject as zero-terminated. */
+
+if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+ ulen = PCRE2_ZERO_TERMINATED;
+
/* Enable display of malloc/free if wanted. */
show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
@@ -4676,9 +4716,134 @@ else
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
}
-/* Loop for global matching */
+/* If a replacement string is provided, call pcre2_substitute() instead of one
+of the matching functions. First we have to convert the replacement string to
+the appropriate width. */
-for (gmatched = 0;; gmatched++)
+if (pat_patctl.replacement[0] != 0)
+ {
+ int rc;
+ uint8_t *pr;
+ uint8_t rbuffer[REPLACE_BUFFSIZE];
+ uint8_t nbuffer[REPLACE_BUFFSIZE];
+ uint32_t goption;
+ PCRE2_SIZE rlen;
+ PCRE2_SIZE nsize;
+
+#ifdef SUPPORT_PCRE2_8
+ uint8_t *r8 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_16
+ uint16_t *r16 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_32
+ uint32_t *r32 = NULL;
+#endif
+
+ goption = ((pat_patctl.control & CTL_GLOBAL) == 0)? 0 :
+ PCRE2_SUBSTITUTE_GLOBAL;
+ SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
+ pr = pat_patctl.replacement;
+
+ /* If the replacement starts with '[<number>]' we interpret that as length
+ value for the replacement buffer. */
+
+ nsize = REPLACE_BUFFSIZE/code_unit_size;
+ if (*pr == '[')
+ {
+ PCRE2_SIZE n = 0;
+ while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
+ if (*pr++ != ']')
+ {
+ fprintf(outfile, "Bad buffer size in replacement string\n");
+ return PR_OK;
+ }
+ if (n > nsize)
+ {
+ fprintf(outfile, "Replacement buffer setting (%ld) is too large "
+ "(max %ld)\n", n, nsize);
+ return PR_OK;
+ }
+ nsize = n;
+ }
+
+ /* Now copy the replacement string to a buffer of the appropriate width. */
+
+ while ((c = *pr++) != 0)
+ {
+ if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
+
+ /* At present no escape processing is provided for replacements. */
+
+#ifdef SUPPORT_PCRE2_8
+ if (test_mode == PCRE8_MODE)
+ {
+ if (utf)
+ {
+ r8 += ord2utf8(c, r8);
+ }
+ else
+ {
+ *r8++ = c;
+ }
+ }
+#endif
+#ifdef SUPPORT_PCRE2_16
+ if (test_mode == PCRE16_MODE)
+ {
+ if (utf)
+ {
+ if (c >= 0x10000u)
+ {
+ c-= 0x10000u;
+ *r16++ = 0xD800 | (c >> 10);
+ *r16++ = 0xDC00 | (c & 0x3ff);
+ }
+ else
+ *r16++ = c;
+ }
+ else
+ {
+ *r16++ = c;
+ }
+ }
+#endif
+#ifdef SUPPORT_PCRE2_32
+ if (test_mode == PCRE32_MODE)
+ {
+ *r32++ = c;
+ }
+#endif
+ }
+
+ SET(*r, 0);
+ if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+ rlen = PCRE2_ZERO_TERMINATED;
+ else
+ rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
+ PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
+ dat_datctl.options|goption, match_data, dat_context,
+ rbuffer, rlen, nbuffer, &nsize);
+
+ if (rc < 0)
+ {
+ fprintf(outfile, "Failed: error %d: ", rc);
+ PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
+ PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
+ }
+ else
+ {
+ fprintf(outfile, "%2d: ", rc);
+ PCHARSV(nbuffer, 0, nsize, utf, outfile);
+ }
+
+ fprintf(outfile, "\n");
+ } /* End of substitution handling */
+
+/* When a replacement string is not provided, run a loop for global matching
+with one of the basic matching functions. */
+
+else for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
@@ -4689,7 +4854,7 @@ for (gmatched = 0;; gmatched++)
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
-
+
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the
@@ -4787,7 +4952,7 @@ for (gmatched = 0;; gmatched++)
{
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
}
-
+
/* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0)
@@ -4888,7 +5053,7 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
-
+
fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */
@@ -4900,15 +5065,15 @@ for (gmatched = 0;; gmatched++)
}
/* Check for silly offsets, in particular, values that have not been
- set when they should have been. */
-
+ set when they should have been. */
+
if (start > ulen || end > ulen)
{
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
start, end);
- continue;
- }
-
+ continue;
+ }
+
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 97af187..6f84141 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -384,15 +384,15 @@ aaaaa2
010203040506
RC=0
======== STDERR ========
-pcre2grep: pcre2_match() gave error -45 while matching this text:
+pcre2grep: pcre2_match() gave error -46 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-pcre2grep: pcre2_match() gave error -45 while matching this text:
+pcre2grep: pcre2_match() gave error -46 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded.
+pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >
@@ -510,23 +510,23 @@ In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 62 -----------------------------
-pcre2grep: pcre2_match() gave error -45 while matching text that starts:
+pcre2grep: pcre2_match() gave error -46 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded.
+pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 63 -----------------------------
-pcre2grep: pcre2_match() gave error -51 while matching text that starts:
+pcre2grep: pcre2_match() gave error -52 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded.
+pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 64 ------------------------------
diff --git a/testdata/testinput2 b/testdata/testinput2
index c48999f..f42a2aa 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4008,4 +4008,65 @@ a random value. /Ix
/(((((a)))))/parens_nest_limit=2
+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+ 123123
+ 123abc123
+ 123abc123abc123
+ 123123\=zero_terminate
+ 123abc123\=zero_terminate
+ 123abc123abc123\=zero_terminate
+
+/abc/g,replace=XYZ
+ 123abc123
+ 123abc123abc123
+
+/abc/replace=X$$Z
+ 123abc123
+
+/abc/g,replace=X$$Z
+ 123abc123abc123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+ "abcde"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+ "abcde-abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+ "abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+ "abcde-abcde-"
+
+/abc/replace=a$++
+ 123abc
+
+/abc/replace=a$bad
+ 123abc
+
+/abc/replace=a${A234567890123456789_123456789012}z
+ 123abc
+
+/abc/replace=a${A23456789012345678901234567890123}z
+ 123abc
+
+/abc/replace=a${bcd
+ 123abc
+
+/abc/replace=a${b+d}z
+ 123abc
+
+/abc/replace=[10]XYZ
+ 123abc123
+
+/abc/replace=[9]XYZ
+ 123abc123
+
+/abc/replace=xyz
+ 1abc2\=partial_hard
+
+# End of substitute tests
+
# End of testinput2
diff --git a/testdata/testinput5 b/testdata/testinput5
index 51767a8..1957b48 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -1627,5 +1627,10 @@
/\x{100}\x{200}\K\x{300}/utf,startchar
\x{100}\x{200}\x{300}
+
+# Test UTF characters in a substitution
+
+/ábc/utf,replace=XሴZ
+ 123ábc123
# End of testinput5
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index cc757d8..bc010c9 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -888,7 +888,7 @@ Subject length lower bound = 3
a\x{123}aa\=offset=1
0: aa
a\x{123}aa\=offset=2
-Error -35 (bad UTF-8 offset)
+Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=3
0: aa
a\x{123}aa\=offset=4
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index e18fa83..4713c4f 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -851,9 +851,9 @@ Subject length lower bound = 1
/a/utf
\x{10000}\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=2
0: a
\x{10000}ab\=offset=3
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index d248879..b57b24b 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -114,11 +114,11 @@ Subject length lower bound = 3
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(a+)*zz/
aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -127,9 +127,9 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -138,7 +138,7 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1
@@ -149,7 +149,7 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_RECURSION=10)(a+)*zz/I
Capturing subpattern count = 1
@@ -158,9 +158,9 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
aaaaaaaaaaaaaz\=recursion_limit=1000
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
Capturing subpattern count = 1
@@ -180,21 +180,21 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
# These three have infinitely nested recursions.
/((?2))((?1))/
abc
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/
aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
# The allusedtext modifier does not work with JIT, which does not maintain
# the leftchar/rightchar data.
diff --git a/testdata/testoutput16 b/testdata/testoutput16
index ec9ba59..2456815 100644
--- a/testdata/testoutput16
+++ b/testdata/testoutput16
@@ -15,7 +15,7 @@ JIT compilation was not successful
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/abcd/I
Capturing subpattern count = 0
@@ -64,13 +64,13 @@ No match
abcd
0: abcd (JIT)
ab\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
xyz
No match (JIT)
xyz\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
/abcd/jit=2
abcd
@@ -84,13 +84,13 @@ No match
/abcd/jit=2,jitfast
abcd
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
ab\=ps
Partial match: ab (JIT)
ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
xyz
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
/abcd/jit=3
abcd
@@ -256,7 +256,7 @@ Minimum match limit = 6
aaaaaaaaaaaaaz
No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -266,9 +266,9 @@ Last code unit = 'z'
Subject length lower bound = 2
JIT compilation was successful
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -278,7 +278,7 @@ Last code unit = 'z'
Subject length lower bound = 2
JIT compilation was successful
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1
@@ -290,21 +290,21 @@ JIT compilation was successful
aaaaaaaaaaaaaz
No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
# These three have infinitely nested recursions.
/((?2))((?1))/
abc
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/((?(R2)a+|(?1)b))/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
# Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 1ed51f4..f999a1f 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -993,7 +993,7 @@ Subject length lower bound = 4
0: abcd
1: a
2: d
-Copy substring 5 failed (-47): unknown or unset substring
+Copy substring 5 failed (-48): unknown or unset substring
/(.{20})/I
Capturing subpattern count = 1
@@ -1047,9 +1047,9 @@ Subject length lower bound = 4
2: <unset>
3: f
1G a (1)
-Get substring 2 failed (-47): unknown or unset substring
+Get substring 2 failed (-48): unknown or unset substring
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L adef
1L a
2L
@@ -1062,7 +1062,7 @@ Get substring 4 failed (-47): unknown or unset substring
1G bc (2)
2G bc (2)
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L bcdef
1L bc
2L bc
@@ -4363,7 +4363,7 @@ Subject length lower bound = 8
1: cd
2: gh
Number not found for group 'three'
-Copy substring 'three' failed (-47): unknown or unset substring
+Copy substring 'three' failed (-48): unknown or unset substring
/(?P<Tes>)(?P<Test>)/IB
------------------------------------------------------------------
@@ -5731,7 +5731,7 @@ No match
1: a1
2: a1
Number not found for group 'Z'
-Copy substring 'Z' failed (-47): unknown or unset substring
+Copy substring 'Z' failed (-48): unknown or unset substring
C a1 (2) A (non-unique)
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@@ -5772,7 +5772,7 @@ Subject length lower bound = 2
C a (1) A (non-unique)
cd\=copy=A
0: cd
-Copy substring 'A' failed (-47): unknown or unset substring
+Copy substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -5817,7 +5817,7 @@ No match
1: a1
2: a1
Number not found for group 'Z'
-Get substring 'Z' failed (-47): unknown or unset substring
+Get substring 'Z' failed (-48): unknown or unset substring
G a1 (2) A (non-unique)
/^(?P<A>a)(?P<A>b)/I,dupnames
@@ -5848,7 +5848,7 @@ Subject length lower bound = 2
G a (1) A (non-unique)
cd\=get=A
0: cd
-Get substring 'A' failed (-47): unknown or unset substring
+Get substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -13607,4 +13607,88 @@ Subject length lower bound = 0
/(((((a)))))/parens_nest_limit=2
Failed: error 119 at offset 3: parentheses are too deeply nested
+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+ 123123
+ 0: 123123
+ 123abc123
+ 1: 123XYZ123
+ 123abc123abc123
+ 1: 123XYZ123abc123
+ 123123\=zero_terminate
+ 0: 123123
+ 123abc123\=zero_terminate
+ 1: 123XYZ123
+ 123abc123abc123\=zero_terminate
+ 1: 123XYZ123abc123
+
+/abc/g,replace=XYZ
+ 123abc123
+ 1: 123XYZ123
+ 123abc123abc123
+ 2: 123XYZ123XYZ123
+
+/abc/replace=X$$Z
+ 123abc123
+ 1: 123X$Z123
+
+/abc/g,replace=X$$Z
+ 123abc123abc123
+ 2: 123X$Z123X$Z123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+ "abcde"
+ 1: "XbYdZ"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+ "abcde-abcde"
+ 2: "XbYdZ-XbYdZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+ "abcde"
+ 1: "Xb+dZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+ "abcde-abcde-"
+ 2: "Xb+dZ-Xb+dZ-"
+
+/abc/replace=a$++
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a$bad
+ 123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A234567890123456789_123456789012}z
+ 123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A23456789012345678901234567890123}z
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${bcd
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${b+d}z
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=[10]XYZ
+ 123abc123
+ 1: 123XYZ123
+
+/abc/replace=[9]XYZ
+ 123abc123
+Failed: error -47: no more memory
+
+/abc/replace=xyz
+ 1abc2\=partial_hard
+Failed: error -34: bad option value
+
+# End of substitute tests
+
# End of testinput2
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 70d34f4..421222f 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -3995,5 +3995,11 @@ Subject length lower bound = 1
\x{100}\x{200}\x{300}
0: \x{100}\x{200}\x{300}
^^^^^^^^^^^^^^
+
+# Test UTF characters in a substitution
+
+/ábc/utf,replace=XሴZ
+ 123ábc123
+ 1: 123X\x{1234}Z123
# End of testinput5
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 52db4f4..4603ccc 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -6133,7 +6133,7 @@ No match
/^(?(2)a|(1)(2))+$/
123a
-Failed: error -39: backreference condition or recursion test not supported for DFA matching
+Failed: error -40: backreference condition or recursion test not supported for DFA matching
/(?<=a|bbbb)c/
ac
@@ -7087,7 +7087,7 @@ Partial match: dogs
/abc\K123/
xyzabc123pqr
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/(?<=abc)123/
xyzabc123pqr
@@ -7205,29 +7205,29 @@ No match
/^(?!a(*SKIP)b)/
ac
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/^(?=a(*SKIP)b|ac)/
** Failers
No match
ac
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/^(?=a(*THEN)b|ac)/
ac
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/^(?=a(*PRUNE)b)/
ab
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
** Failers
No match
ac
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/^(?(?!a(*SKIP)b))/
ac
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/(?<=abc)def/
abc\=ph
@@ -7424,7 +7424,7 @@ No match
/((?2))((?1))/
abc
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/(?(R)a+|(?R)b)/
aaaabcde
@@ -7440,11 +7440,11 @@ Failed: error -50: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/
aaaabcde
-Failed: error -39: backreference condition or recursion test not supported for DFA matching
+Failed: error -40: backreference condition or recursion test not supported for DFA matching
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/(a+)/no_auto_possess
aaaa\=ovector=3
@@ -7593,7 +7593,7 @@ Partial match: \x0d\x0d\x0d
/abcdef/
abc\=dfa_restart
-Failed: error -37: invalid data in workspace for DFA restart
+Failed: error -38: invalid data in workspace for DFA restart
/<H((?(?!<H|F>)(.)|(?R))++)*F>/
text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 34ead75..7760c93 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -1218,7 +1218,7 @@ Partial match: the cat
/ab\Cde/utf
abXde
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/(?<=ab\Cde)X/utf
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion