summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-06-04 14:28:58 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-06-04 14:28:58 +0000
commit2713a25161c167b2339b245e124932e6e3820ba7 (patch)
tree516802e8275b1f50e925ab2bedb0575caafbb544
parent977a33e0e05ab3378b480a7270a0d89c24644dec (diff)
downloadpcre-2713a25161c167b2339b245e124932e6e3820ba7.tar.gz
Support \k{name} and \g{name} a la Perl 5.10.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@171 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog7
-rw-r--r--doc/pcrepattern.313
-rw-r--r--pcre_compile.c22
-rw-r--r--testdata/testinput212
-rw-r--r--testdata/testoutput231
5 files changed, 71 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index a0a8d9c..46689b0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -36,13 +36,18 @@ Version 7.2 01-May-07
(d) \K resets the start of the current match so that everything before
is not part of it.
+ (e) \k{name} is synonymous with \k<name> and \k'name' (.NET compatible).
+
+ (f) \g{name} is another synonym - part of Perl 5.10's unification of
+ reference syntax.
+
7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and
PCRE_INFO_JCHANGED.
8. A pattern such as (.*(.)?)* caused pcre_exec() to fail by either not
terminating or by crashing. Diagnosed by Viktor Griph; it was in the code
for detecting groups that can match an empty string.
-
+
Version 7.1 24-Apr-07
---------------------
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index c19e364..4d8b943 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -241,8 +241,8 @@ meanings
.rs
.sp
The sequence \eg followed by a positive or negative number, optionally enclosed
-in braces, is an absolute or relative back reference. Back references are
-discussed
+in braces, is an absolute or relative back reference. A named back reference
+can be coded as \eg{name}. Back references are discussed
.\" HTML <a href="#backreferences">
.\" </a>
later,
@@ -1325,12 +1325,17 @@ back reference, the case of letters is relevant. For example,
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
capturing subpattern is matched caselessly.
.P
-Back references to named subpatterns use the Perl syntax \ek<name> or \ek'name'
-or the Python syntax (?P=name). We could rewrite the above example in either of
+There are several different ways of writing back references to named
+subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or
+\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
+back reference syntax, in which \eg can be used for both numeric and named
+references, is also supported. We could rewrite the above example in any of
the following ways:
.sp
(?<p1>(?i)rah)\es+\ek<p1>
+ (?'p1'(?i)rah)\es+\ek{p1}
(?P<p1>(?i)rah)\es+(?P=p1)
+ (?<p1>(?i)rah)\es+\eg{p1}
.sp
A subpattern that is referenced by name may appear in the pattern before or
after the reference.
diff --git a/pcre_compile.c b/pcre_compile.c
index c930193..3fd5432 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -242,7 +242,7 @@ static const char *error_texts[] = {
/* 55 */
"repeating a DEFINE group is not allowed",
"inconsistent NEWLINE options",
- "\\g is not followed by an (optionally braced) non-zero number",
+ "\\g is not followed by a braced name or an optionally braced non-zero number",
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
};
@@ -453,11 +453,22 @@ else
/* \g must be followed by a number, either plain or braced. If positive, it
is an absolute backreference. If negative, it is a relative backreference.
- This is a Perl 5.10 feature. */
+ This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
+ reference to a named group. This is part of Perl's movement towards a
+ unified syntax for back references. As this is synonymous with \k{name}, we
+ fudge it up by pretending it really was \k. */
case 'g':
if (ptr[1] == '{')
{
+ const uschar *p;
+ for (p = ptr+2; *p != 0 && *p != '}'; p++)
+ if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
+ if (*p != 0 && *p != '}')
+ {
+ c = -ESC_k;
+ break;
+ }
braced = TRUE;
ptr++;
}
@@ -4470,12 +4481,13 @@ for (;; ptr++)
zerofirstbyte = firstbyte;
zeroreqbyte = reqbyte;
- /* \k<name> or \k'name' is a back reference by name (Perl syntax) */
+ /* \k<name> or \k'name' is a back reference by name (Perl syntax).
+ We also support \k{name} (.NET syntax) */
- if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))
+ if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
{
is_recurse = FALSE;
- terminator = (*(++ptr) == '<')? '>' : '\'';
+ terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
goto NAMED_REF_OR_RECURSE;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index 1faa2a9..8e152bc 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2203,4 +2203,16 @@ a random value. /Ix
/(foo\Kbar)baz/
foobarbaz
+/(?<A>tom|bon)-\k{A}/
+ tom-tom
+ bon-bon
+ ** Failers
+ tom-bon
+
+/(?<A>tom|bon)-\g{A}/
+ tom-tom
+ bon-bon
+
+/\g{A/
+
/ End of testinput2 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 8cda6a2..c6982e9 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -8067,16 +8067,16 @@ No match
Failed: reference to non-existent subpattern at offset 4
/^(a)\g/
-Failed: \g is not followed by an (optionally braced) non-zero number at offset 4
+Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4
/^(a)\g{0}/
-Failed: \g is not followed by an (optionally braced) non-zero number at offset 4
+Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4
/^(a)\g{3/
-Failed: \g is not followed by an (optionally braced) non-zero number at offset 4
+Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4
/^(a)\g{4a}/
-Failed: \g is not followed by an (optionally braced) non-zero number at offset 4
+Failed: reference to non-existent subpattern at offset 9
/^a.b/<lf>
a\rb
@@ -8334,4 +8334,27 @@ Failed: reference to non-existent subpattern at offset 7
0: barbaz
1: foobar
+/(?<A>tom|bon)-\k{A}/
+ tom-tom
+ 0: tom-tom
+ 1: tom
+ bon-bon
+ 0: bon-bon
+ 1: bon
+ ** Failers
+No match
+ tom-bon
+No match
+
+/(?<A>tom|bon)-\g{A}/
+ tom-tom
+ 0: tom-tom
+ 1: tom
+ bon-bon
+ 0: bon-bon
+ 1: bon
+
+/\g{A/
+Failed: syntax error in subpattern name (missing terminator) at offset 4
+
/ End of testinput2 /