summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-22 11:38:35 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-22 11:38:35 +0000
commit0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab (patch)
tree1444ae432fa2b0e59859ff37ab6ab066448c84ae
parent4f487821d0df0abda5c5b0be1235a13bc028a983 (diff)
downloadpcre-0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab.tar.gz
Make \A record a lookbehind value of 1.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1253 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog7
-rw-r--r--doc/pcreapi.317
-rw-r--r--pcre_compile.c24
-rw-r--r--testdata/testoutput21
4 files changed, 31 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 14fa79e..d91ccfe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -63,6 +63,13 @@ Version 8.33 xx-xxxx-201x
16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the
offset of the starting point of the matching process, provided the offsets
vector is large enough.
+
+17. The \A escape now records a lookbehind value of 1, though its execution
+ does not actually inspect the previous character. This is to ensure that,
+ in partial multi-segment matching, at least one character from the old
+ segment is retained when a new segment is processed. Otherwise, if there
+ are no lookbehinds in the pattern, \A might match incorrectly at the start
+ of a new segment.
Version 8.32 30-November-2012
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 0eebf94..3edc0e8 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "08 November 2012" "PCRE 8.32"
+.TH PCREAPI 3 "22 February 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -1297,9 +1297,14 @@ be used.
PCRE_INFO_MAXLOOKBEHIND
.sp
Return the number of characters (NB not bytes) in the longest lookbehind
-assertion in the pattern. Note that the simple assertions \eb and \eB require a
-one-character lookbehind. This information is useful when doing multi-segment
-matching using the partial matching facilities.
+assertion in the pattern. This information is useful when doing multi-segment
+matching using the partial matching facilities. Note that the simple assertions
+\eb and \eB require a one-character lookbehind. \eA also registers a
+one-character lookbehind, though it does not actually inspect the previous
+character. This is to ensure that at least one character from the old segment
+is retained when a new segment is processed. Otherwise, if there are no
+lookbehinds in the pattern, \eA might match incorrectly at the start of a new
+segment.
.sp
PCRE_INFO_MINLENGTH
.sp
@@ -2818,6 +2823,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 08 November 2012
-Copyright (c) 1997-2012 University of Cambridge.
+Last updated: 22 February 2013
+Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/pcre_compile.c b/pcre_compile.c
index f4ab3c8..4fd1678 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -797,7 +797,8 @@ Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
/* Not alphanumeric */
else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
+else if ((i = escapes[c - CHAR_0]) != 0)
+ { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
#else /* EBCDIC coding */
/* Not alphanumeric */
@@ -3094,7 +3095,8 @@ value is a character, a negative value is an escape value. */
if (*ptr == CHAR_BACKSLASH)
{
int temperrorcode = 0;
- escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
+ escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
+ FALSE);
if (temperrorcode != 0) return FALSE;
ptr++; /* Point after the escape sequence */
}
@@ -4277,14 +4279,12 @@ for (;; ptr++)
if (c == CHAR_BACKSLASH)
{
- escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
-
+ escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+ TRUE);
if (*errorcodeptr != 0) goto FAILED;
-
- if (escape == 0)
- c = ec;
+ if (escape == 0) c = ec;
else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
- else if (escape == ESC_N) /* \N is not supported in a class */
+ else if (escape == ESC_N) /* \N is not supported in a class */
{
*errorcodeptr = ERR71;
goto FAILED;
@@ -6718,10 +6718,9 @@ for (;; ptr++)
case CHAR_BACKSLASH:
tempptr = ptr;
escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-
if (*errorcodeptr != 0) goto FAILED;
- if (escape == 0)
+ if (escape == 0) /* The escape coded a single character */
c = ec;
else
{
@@ -6887,11 +6886,12 @@ for (;; ptr++)
can obtain the OP value by negating the escape value in the default
situation when PCRE_UCP is not set. When it *is* set, we substitute
Unicode property tests. Note that \b and \B do a one-character
- lookbehind. */
+ lookbehind, and \A also behaves as if it does. */
else
{
- if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
+ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+ cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
if (escape >= ESC_DU && escape <= ESC_wu)
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 9fc539a..e194de8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -634,6 +634,7 @@ Capturing subpattern count = 0
Options: anchored multiline
No first char
No need char
+Max lookbehind = 1
/^abc/Im
Capturing subpattern count = 0