diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-02-22 11:38:35 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-02-22 11:38:35 +0000 |
commit | 0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab (patch) | |
tree | 1444ae432fa2b0e59859ff37ab6ab066448c84ae | |
parent | 4f487821d0df0abda5c5b0be1235a13bc028a983 (diff) | |
download | pcre-0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab.tar.gz |
Make \A record a lookbehind value of 1.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1253 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | doc/pcreapi.3 | 17 | ||||
-rw-r--r-- | pcre_compile.c | 24 | ||||
-rw-r--r-- | testdata/testoutput2 | 1 |
4 files changed, 31 insertions, 18 deletions
@@ -63,6 +63,13 @@ Version 8.33 xx-xxxx-201x 16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the offset of the starting point of the matching process, provided the offsets vector is large enough. + +17. The \A escape now records a lookbehind value of 1, though its execution + does not actually inspect the previous character. This is to ensure that, + in partial multi-segment matching, at least one character from the old + segment is retained when a new segment is processed. Otherwise, if there + are no lookbehinds in the pattern, \A might match incorrectly at the start + of a new segment. Version 8.32 30-November-2012 diff --git a/doc/pcreapi.3 b/doc/pcreapi.3 index 0eebf94..3edc0e8 100644 --- a/doc/pcreapi.3 +++ b/doc/pcreapi.3 @@ -1,4 +1,4 @@ -.TH PCREAPI 3 "08 November 2012" "PCRE 8.32" +.TH PCREAPI 3 "22 February 2013" "PCRE 8.33" .SH NAME PCRE - Perl-compatible regular expressions .sp @@ -1297,9 +1297,14 @@ be used. PCRE_INFO_MAXLOOKBEHIND .sp Return the number of characters (NB not bytes) in the longest lookbehind -assertion in the pattern. Note that the simple assertions \eb and \eB require a -one-character lookbehind. This information is useful when doing multi-segment -matching using the partial matching facilities. +assertion in the pattern. This information is useful when doing multi-segment +matching using the partial matching facilities. Note that the simple assertions +\eb and \eB require a one-character lookbehind. \eA also registers a +one-character lookbehind, though it does not actually inspect the previous +character. This is to ensure that at least one character from the old segment +is retained when a new segment is processed. Otherwise, if there are no +lookbehinds in the pattern, \eA might match incorrectly at the start of a new +segment. .sp PCRE_INFO_MINLENGTH .sp @@ -2818,6 +2823,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 08 November 2012 -Copyright (c) 1997-2012 University of Cambridge. +Last updated: 22 February 2013 +Copyright (c) 1997-2013 University of Cambridge. .fi diff --git a/pcre_compile.c b/pcre_compile.c index f4ab3c8..4fd1678 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -797,7 +797,8 @@ Otherwise further processing may be required. */ #ifndef EBCDIC /* ASCII/UTF-8 coding */ /* Not alphanumeric */ else if (c < CHAR_0 || c > CHAR_z) {} -else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } +else if ((i = escapes[c - CHAR_0]) != 0) + { if (i > 0) c = (pcre_uint32)i; else escape = -i; } #else /* EBCDIC coding */ /* Not alphanumeric */ @@ -3094,7 +3095,8 @@ value is a character, a negative value is an escape value. */ if (*ptr == CHAR_BACKSLASH) { int temperrorcode = 0; - escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE); + escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, + FALSE); if (temperrorcode != 0) return FALSE; ptr++; /* Point after the escape sequence */ } @@ -4277,14 +4279,12 @@ for (;; ptr++) if (c == CHAR_BACKSLASH) { - escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE); - + escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, + TRUE); if (*errorcodeptr != 0) goto FAILED; - - if (escape == 0) - c = ec; + if (escape == 0) c = ec; else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ - else if (escape == ESC_N) /* \N is not supported in a class */ + else if (escape == ESC_N) /* \N is not supported in a class */ { *errorcodeptr = ERR71; goto FAILED; @@ -6718,10 +6718,9 @@ for (;; ptr++) case CHAR_BACKSLASH: tempptr = ptr; escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE); - if (*errorcodeptr != 0) goto FAILED; - if (escape == 0) + if (escape == 0) /* The escape coded a single character */ c = ec; else { @@ -6887,11 +6886,12 @@ for (;; ptr++) can obtain the OP value by negating the escape value in the default situation when PCRE_UCP is not set. When it *is* set, we substitute Unicode property tests. Note that \b and \B do a one-character - lookbehind. */ + lookbehind, and \A also behaves as if it does. */ else { - if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0) + if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && + cd->max_lookbehind == 0) cd->max_lookbehind = 1; #ifdef SUPPORT_UCP if (escape >= ESC_DU && escape <= ESC_wu) diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 9fc539a..e194de8 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -634,6 +634,7 @@ Capturing subpattern count = 0 Options: anchored multiline No first char No need char +Max lookbehind = 1 /^abc/Im Capturing subpattern count = 0 |