diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-12-18 22:41:35 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-12-31 08:27:22 -0700 |
commit | f2c2a6ab6210456432884985634012f6a0b6b00f (patch) | |
tree | 1aea9e958463f0cfbd77d41c07644371795727d4 | |
parent | 731cb813cb160baa0c7969c70e0864d4da074738 (diff) | |
download | perl-f2c2a6ab6210456432884985634012f6a0b6b00f.tar.gz |
Fatalized non-unicode warnings skip regex optimization
This makes sure that fatalized non-unicode warnings actually get output.
For example \p{Line_Break=CR} would normally get optimized into an EXACT
node. But if the user has made non-unicode warnings fatal indicating
they want to be sure not to try to even match such code points, the
optimization is skipped so that the checks are made.
Documentation for this change will be in a future commit.
-rw-r--r-- | regcomp.c | 12 | ||||
-rw-r--r-- | t/lib/warnings/utf8 | 8 |
2 files changed, 17 insertions, 3 deletions
@@ -430,6 +430,11 @@ static const scan_data_t zero_scan_data = #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD) +/* For programs that want to be strictly Unicode compatible by dying if any + * attempt is made to match a non-Unicode code point against a Unicode + * property. */ +#define ALWAYS_WARN_SUPER ckDEAD(packWARN(WARN_NON_UNICODE)) + #define OOB_NAMEDCLASS -1 /* There is no code point that is out-of-bounds, so this is problematic. But @@ -14501,7 +14506,12 @@ parseit: && ! invert && ! depends_list && ! (ANYOF_FLAGS(ret) & ANYOF_POSIXL) - && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION) + && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION + + /* We don't optimize if we are supposed to make sure all non-Unicode + * code points raise a warning, as only ANYOF nodes have this check. + * */ + && ! ((ANYOF_FLAGS(ret) | ANYOF_WARN_SUPER) && ALWAYS_WARN_SUPER)) { UV start, end; U8 op = END; /* The optimzation node-type */ diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index 7c387b540b..d6032d4905 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -661,13 +661,17 @@ chr(0x110000) =~ /\p{Any}/; EXPECT Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. ######## -# TODO optimized regnode should still give warnings +# NAME optimizable regnode should still give non_unicode warnings when fatalized use warnings 'utf8'; +use warnings FATAL => 'non_unicode'; chr(0x110000) =~ /\p{lb=cr}/; +EXPECT +Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 3. +######## +# NAME optimizable regnode should not give non_unicode warnings when warnings are off no warnings 'non_unicode'; chr(0x110000) =~ /\p{lb=cr}/; EXPECT -Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. ######## require "../test.pl"; use warnings 'utf8'; |