Make \A record a lookbehind value of 1.

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1253 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2013-02-22 11:38:35 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2013-02-22 11:38:35 +0000
commit: 0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab (patch)
tree: 1444ae432fa2b0e59859ff37ab6ab066448c84ae
parent: 4f487821d0df0abda5c5b0be1235a13bc028a983 (diff)
download: pcre-0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab.tar.gz
4 files changed, 31 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 14fa79e..d91ccfe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -63,6 +63,13 @@ Version 8.33 xx-xxxx-201x
 16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the
     offset of the starting point of the matching process, provided the offsets 
     vector is large enough.
+    
+17. The \A escape now records a lookbehind value of 1, though its execution
+    does not actually inspect the previous character. This is to ensure that,
+    in partial multi-segment matching, at least one character from the old
+    segment is retained when a new segment is processed. Otherwise, if there
+    are no lookbehinds in the pattern, \A might match incorrectly at the start
+    of a new segment.
 
 
 Version 8.32 30-November-2012
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 0eebf94..3edc0e8 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "08 November 2012" "PCRE 8.32"
+.TH PCREAPI 3 "22 February 2013" "PCRE 8.33"
 .SH NAME
 PCRE - Perl-compatible regular expressions
 .sp
@@ -1297,9 +1297,14 @@ be used.
   PCRE_INFO_MAXLOOKBEHIND
 .sp
 Return the number of characters (NB not bytes) in the longest lookbehind
-assertion in the pattern. Note that the simple assertions \eb and \eB require a
-one-character lookbehind. This information is useful when doing multi-segment
-matching using the partial matching facilities.
+assertion in the pattern. This information is useful when doing multi-segment
+matching using the partial matching facilities. Note that the simple assertions
+\eb and \eB require a one-character lookbehind. \eA also registers a
+one-character lookbehind, though it does not actually inspect the previous
+character. This is to ensure that at least one character from the old segment
+is retained when a new segment is processed. Otherwise, if there are no 
+lookbehinds in the pattern, \eA might match incorrectly at the start of a new 
+segment.
 .sp
   PCRE_INFO_MINLENGTH
 .sp
@@ -2818,6 +2823,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 08 November 2012
-Copyright (c) 1997-2012 University of Cambridge.
+Last updated: 22 February 2013
+Copyright (c) 1997-2013 University of Cambridge.
 .fi
diff --git a/pcre_compile.c b/pcre_compile.c
index f4ab3c8..4fd1678 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -797,7 +797,8 @@ Otherwise further processing may be required. */
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
 /* Not alphanumeric */
 else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
+else if ((i = escapes[c - CHAR_0]) != 0) 
+  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
 
 #else           /* EBCDIC coding */
 /* Not alphanumeric */
@@ -3094,7 +3095,8 @@ value is a character, a negative value is an escape value. */
 if (*ptr == CHAR_BACKSLASH)
   {
   int temperrorcode = 0;
-  escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
+  escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, 
+    FALSE);
   if (temperrorcode != 0) return FALSE;
   ptr++;    /* Point after the escape sequence */
   }
@@ -4277,14 +4279,12 @@ for (;; ptr++)
 
       if (c == CHAR_BACKSLASH)
         {
-        escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
-
+        escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, 
+          TRUE);
         if (*errorcodeptr != 0) goto FAILED;
-
-        if (escape == 0)
-          c = ec;
+        if (escape == 0) c = ec;
         else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
-        else if (escape == ESC_N)            /* \N is not supported in a class */
+        else if (escape == ESC_N)          /* \N is not supported in a class */
           {
           *errorcodeptr = ERR71;
           goto FAILED;
@@ -6718,10 +6718,9 @@ for (;; ptr++)
     case CHAR_BACKSLASH:
     tempptr = ptr;
     escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-
     if (*errorcodeptr != 0) goto FAILED;
 
-    if (escape == 0)
+    if (escape == 0)                  /* The escape coded a single character */
       c = ec;
     else
       {
@@ -6887,11 +6886,12 @@ for (;; ptr++)
       can obtain the OP value by negating the escape value in the default
       situation when PCRE_UCP is not set. When it *is* set, we substitute
       Unicode property tests. Note that \b and \B do a one-character
-      lookbehind. */
+      lookbehind, and \A also behaves as if it does. */
 
       else
         {
-        if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
+        if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && 
+             cd->max_lookbehind == 0)
           cd->max_lookbehind = 1;
 #ifdef SUPPORT_UCP
         if (escape >= ESC_DU && escape <= ESC_wu)
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 9fc539a..e194de8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -634,6 +634,7 @@ Capturing subpattern count = 0
 Options: anchored multiline
 No first char
 No need char
+Max lookbehind = 1
 
 /^abc/Im
 Capturing subpattern count = 0
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2013-02-22 11:38:35 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2013-02-22 11:38:35 +0000
commit	0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab (patch)
tree	1444ae432fa2b0e59859ff37ab6ab066448c84ae
parent	4f487821d0df0abda5c5b0be1235a13bc028a983 (diff)
download	pcre-0bba7a1d42b9e5ec80f18fd676e7b4f8a8e419ab.tar.gz