summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-06-03 16:42:58 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-06-03 16:42:58 +0000
commit4ed24ba49fc4a584c58509177e5a3ad6d1a000e4 (patch)
tree748e07e2456656493ddcb098b8b852d32b081d6b
parent778799e8109592a370cc114c89b0f86c43af11f2 (diff)
downloadpcre2-4ed24ba49fc4a584c58509177e5a3ad6d1a000e4.tar.gz
Fix matching offsets from regexec() in the POSIX wrapper when called with
REG_STARTEND and a starting offset greater than zero. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@818 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog3
-rw-r--r--doc/pcre2posix.330
-rw-r--r--doc/pcre2test.118
-rw-r--r--src/pcre2posix.c4
-rw-r--r--src/pcre2test.c16
-rw-r--r--testdata/testinput1810
-rw-r--r--testdata/testoutput1817
7 files changed, 81 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index 6e9a2c3..25ced58 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -179,6 +179,9 @@ deeply. (Compare item 10.23/36.) This should fix oss-fuzz #1761.
37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
+38. Fix returned offsets from regexec() when REG_STARTEND is used with a
+starting offset greater than zero.
+
Version 10.23 14-February-2017
------------------------------
diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3
index 70a86d8..b37046b 100644
--- a/doc/pcre2posix.3
+++ b/doc/pcre2posix.3
@@ -1,4 +1,4 @@
-.TH PCRE2POSIX 3 "31 January 2016" "PCRE2 10.22"
+.TH PCRE2POSIX 3 "03 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SYNOPSIS"
@@ -204,15 +204,21 @@ function.
.sp
REG_STARTEND
.sp
-The string is considered to start at \fIstring\fP + \fIpmatch[0].rm_so\fP and
-to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP
-(there need not actually be a NUL at that location), regardless of the value of
-\fInmatch\fP. This is a BSD extension, compatible with but not specified by
-IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software
-intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does
-not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not
-how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are
-mutually exclusive; the error REG_INVARG is returned.
+When this option is set, the string is considered to start at \fIstring\fP +
+\fIpmatch[0].rm_so\fP and to have a terminating NUL located at \fIstring\fP +
+\fIpmatch[0].rm_eo\fP (there need not actually be a NUL at that location),
+regardless of the value of \fInmatch\fP. However, the offsets of the matched
+string and any captured substrings are still given relative to the start of
+\fIstring\fP. (Before PCRE2 release 10.30 these were given relative to
+\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other
+implementations.)
+.P
+This is a BSD extension, compatible with but not specified by IEEE Standard
+1003.2 (POSIX.2), and should be used with caution in software intended to be
+portable to other systems. Note that a non-zero \fIrm_so\fP does not imply
+REG_NOTBOL; REG_STARTEND affects only the location of the string, not how it is
+matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are mutually
+exclusive; the error REG_INVARG is returned.
.P
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
@@ -271,6 +277,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 31 January 2016
-Copyright (c) 1997-2016 University of Cambridge.
+Last updated: 03 June 2017
+Copyright (c) 1997-2017 University of Cambridge.
.fi
diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index 26d395f..abd42d0 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "01 June 2017" "PCRE 10.30"
+.TH PCRE2TEST 1 "03 June 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -1046,6 +1046,20 @@ wrapper API to be used, the only option-setting modifiers that have any effect
are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL,
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP.
The other modifiers are ignored, with a warning message.
+.P
+There is one additional modifier that can be used with the POSIX wrapper. It is
+ignored (with a warning) if used for non-POSIX matching.
+.sp
+ posix_startend=<n>[:<m>]
+.sp
+This causes the subject string to be passed to \fBregexec()\fP using the
+REG_STARTEND option, which uses offsets to restrict which part of the string is
+searched. If only one number is given, the end offset is passed as the end of
+the subject string. For more detail of REG_STARTEND, see the
+.\" HREF
+\fBpcre2posix\fP
+.\"
+documentation.
.
.
.SS "Setting match controls"
@@ -1793,6 +1807,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 01 June 2017
+Last updated: 03 June 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi
diff --git a/src/pcre2posix.c b/src/pcre2posix.c
index 4ecc701..8be969a 100644
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@@ -338,8 +338,8 @@ if (rc >= 0)
if ((size_t)rc > nmatch) rc = (int)nmatch;
for (i = 0; i < (size_t)rc; i++)
{
- pmatch[i].rm_so = ovector[i*2];
- pmatch[i].rm_eo = ovector[i*2+1];
+ pmatch[i].rm_so = ovector[i*2] + so;
+ pmatch[i].rm_eo = ovector[i*2+1] + so;
}
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 8eafadf..5a2b86f 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -186,7 +186,7 @@ void vms_setsymbol( char *, char *, int );
#endif
#endif
-#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
+#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
@@ -538,6 +538,7 @@ typedef struct datctl { /* Structure for data line modifiers. */
uint32_t control; /* Must be in same position as patctl */
uint32_t control2; /* Must be in same position as patctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
+ uint32_t startend[2];
uint32_t cerror[2];
uint32_t cfail[2];
int32_t callout_data;
@@ -662,6 +663,7 @@ static modstruct modlist[] = {
{ "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
+ { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
@@ -6660,6 +6662,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
}
}
+ if (dat_datctl.startend[0] != CFORE_UNSET)
+ {
+ pmatch[0].rm_so = dat_datctl.startend[0];
+ pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
+ dat_datctl.startend[1] : len;
+ eflags |= REG_STARTEND;
+ }
+
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
@@ -6713,6 +6723,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
/* Handle matching via the native interface. Check for consistency of
modifiers. */
+if (dat_datctl.startend[0] != CFORE_UNSET)
+ fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
+
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
matching, even if the JIT compiler was used. */
@@ -7903,6 +7916,7 @@ memset(&def_datctl, 0, sizeof(datctl));
def_datctl.oveccount = DEFAULT_OVECCOUNT;
def_datctl.copy_numbers[0] = -1;
def_datctl.get_numbers[0] = -1;
+def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
diff --git a/testdata/testinput18 b/testdata/testinput18
index 7fc9b12..ececc06 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -113,4 +113,14 @@
/(?=(a\K))/
a
+/^d(e)$/posix
+ acdef\=posix_startend=2:4
+ acde\=posix_startend=2
+\= Expect no match
+ acdef
+ acdef\=posix_startend=2
+
+/^a\x{00}b$/posix
+ a\x{00}b\=posix_startend=0:3
+
# End of testdata/testinput18
diff --git a/testdata/testoutput18 b/testdata/testoutput18
index 6f68ca1..96386da 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@@ -174,4 +174,21 @@ Start of matched string is beyond its end - displaying from end to start.
0: a
1: a
+/^d(e)$/posix
+ acdef\=posix_startend=2:4
+ 0: de
+ 1: e
+ acde\=posix_startend=2
+ 0: de
+ 1: e
+\= Expect no match
+ acdef
+No match: POSIX code 17: match failed
+ acdef\=posix_startend=2
+No match: POSIX code 17: match failed
+
+/^a\x{00}b$/posix
+ a\x{00}b\=posix_startend=0:3
+ 0: a\x00b
+
# End of testdata/testinput18