diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-06-04 14:28:58 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-06-04 14:28:58 +0000 |
commit | 2713a25161c167b2339b245e124932e6e3820ba7 (patch) | |
tree | 516802e8275b1f50e925ab2bedb0575caafbb544 | |
parent | 977a33e0e05ab3378b480a7270a0d89c24644dec (diff) | |
download | pcre-2713a25161c167b2339b245e124932e6e3820ba7.tar.gz |
Support \k{name} and \g{name} a la Perl 5.10.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@171 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 13 | ||||
-rw-r--r-- | pcre_compile.c | 22 | ||||
-rw-r--r-- | testdata/testinput2 | 12 | ||||
-rw-r--r-- | testdata/testoutput2 | 31 |
5 files changed, 71 insertions, 14 deletions
@@ -36,13 +36,18 @@ Version 7.2 01-May-07 (d) \K resets the start of the current match so that everything before is not part of it. + (e) \k{name} is synonymous with \k<name> and \k'name' (.NET compatible). + + (f) \g{name} is another synonym - part of Perl 5.10's unification of + reference syntax. + 7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and PCRE_INFO_JCHANGED. 8. A pattern such as (.*(.)?)* caused pcre_exec() to fail by either not terminating or by crashing. Diagnosed by Viktor Griph; it was in the code for detecting groups that can match an empty string. - + Version 7.1 24-Apr-07 --------------------- diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index c19e364..4d8b943 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -241,8 +241,8 @@ meanings .rs .sp The sequence \eg followed by a positive or negative number, optionally enclosed -in braces, is an absolute or relative back reference. Back references are -discussed +in braces, is an absolute or relative back reference. A named back reference +can be coded as \eg{name}. Back references are discussed .\" HTML <a href="#backreferences"> .\" </a> later, @@ -1325,12 +1325,17 @@ back reference, the case of letters is relevant. For example, matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original capturing subpattern is matched caselessly. .P -Back references to named subpatterns use the Perl syntax \ek<name> or \ek'name' -or the Python syntax (?P=name). We could rewrite the above example in either of +There are several different ways of writing back references to named +subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or +\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified +back reference syntax, in which \eg can be used for both numeric and named +references, is also supported. We could rewrite the above example in any of the following ways: .sp (?<p1>(?i)rah)\es+\ek<p1> + (?'p1'(?i)rah)\es+\ek{p1} (?P<p1>(?i)rah)\es+(?P=p1) + (?<p1>(?i)rah)\es+\eg{p1} .sp A subpattern that is referenced by name may appear in the pattern before or after the reference. diff --git a/pcre_compile.c b/pcre_compile.c index c930193..3fd5432 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -242,7 +242,7 @@ static const char *error_texts[] = { /* 55 */ "repeating a DEFINE group is not allowed", "inconsistent NEWLINE options", - "\\g is not followed by an (optionally braced) non-zero number", + "\\g is not followed by a braced name or an optionally braced non-zero number", "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number" }; @@ -453,11 +453,22 @@ else /* \g must be followed by a number, either plain or braced. If positive, it is an absolute backreference. If negative, it is a relative backreference. - This is a Perl 5.10 feature. */ + This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a + reference to a named group. This is part of Perl's movement towards a + unified syntax for back references. As this is synonymous with \k{name}, we + fudge it up by pretending it really was \k. */ case 'g': if (ptr[1] == '{') { + const uschar *p; + for (p = ptr+2; *p != 0 && *p != '}'; p++) + if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break; + if (*p != 0 && *p != '}') + { + c = -ESC_k; + break; + } braced = TRUE; ptr++; } @@ -4470,12 +4481,13 @@ for (;; ptr++) zerofirstbyte = firstbyte; zeroreqbyte = reqbyte; - /* \k<name> or \k'name' is a back reference by name (Perl syntax) */ + /* \k<name> or \k'name' is a back reference by name (Perl syntax). + We also support \k{name} (.NET syntax) */ - if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'')) + if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{')) { is_recurse = FALSE; - terminator = (*(++ptr) == '<')? '>' : '\''; + terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}'; goto NAMED_REF_OR_RECURSE; } diff --git a/testdata/testinput2 b/testdata/testinput2 index 1faa2a9..8e152bc 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2203,4 +2203,16 @@ a random value. /Ix /(foo\Kbar)baz/ foobarbaz +/(?<A>tom|bon)-\k{A}/ + tom-tom + bon-bon + ** Failers + tom-bon + +/(?<A>tom|bon)-\g{A}/ + tom-tom + bon-bon + +/\g{A/ + / End of testinput2 / diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8cda6a2..c6982e9 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -8067,16 +8067,16 @@ No match Failed: reference to non-existent subpattern at offset 4 /^(a)\g/ -Failed: \g is not followed by an (optionally braced) non-zero number at offset 4 +Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4 /^(a)\g{0}/ -Failed: \g is not followed by an (optionally braced) non-zero number at offset 4 +Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4 /^(a)\g{3/ -Failed: \g is not followed by an (optionally braced) non-zero number at offset 4 +Failed: \g is not followed by a braced name or an optionally braced non-zero number at offset 4 /^(a)\g{4a}/ -Failed: \g is not followed by an (optionally braced) non-zero number at offset 4 +Failed: reference to non-existent subpattern at offset 9 /^a.b/<lf> a\rb @@ -8334,4 +8334,27 @@ Failed: reference to non-existent subpattern at offset 7 0: barbaz 1: foobar +/(?<A>tom|bon)-\k{A}/ + tom-tom + 0: tom-tom + 1: tom + bon-bon + 0: bon-bon + 1: bon + ** Failers +No match + tom-bon +No match + +/(?<A>tom|bon)-\g{A}/ + tom-tom + 0: tom-tom + 1: tom + bon-bon + 0: bon-bon + 1: bon + +/\g{A/ +Failed: syntax error in subpattern name (missing terminator) at offset 4 + / End of testinput2 / |