summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-12-18 22:41:35 -0700
committerKarl Williamson <public@khwilliamson.com>2013-12-31 08:27:22 -0700
commitf2c2a6ab6210456432884985634012f6a0b6b00f (patch)
tree1aea9e958463f0cfbd77d41c07644371795727d4
parent731cb813cb160baa0c7969c70e0864d4da074738 (diff)
downloadperl-f2c2a6ab6210456432884985634012f6a0b6b00f.tar.gz
Fatalized non-unicode warnings skip regex optimization
This makes sure that fatalized non-unicode warnings actually get output. For example \p{Line_Break=CR} would normally get optimized into an EXACT node. But if the user has made non-unicode warnings fatal indicating they want to be sure not to try to even match such code points, the optimization is skipped so that the checks are made. Documentation for this change will be in a future commit.
-rw-r--r--regcomp.c12
-rw-r--r--t/lib/warnings/utf88
2 files changed, 17 insertions, 3 deletions
diff --git a/regcomp.c b/regcomp.c
index 46da08bee5..5c3260a153 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -430,6 +430,11 @@ static const scan_data_t zero_scan_data =
#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
+/* For programs that want to be strictly Unicode compatible by dying if any
+ * attempt is made to match a non-Unicode code point against a Unicode
+ * property. */
+#define ALWAYS_WARN_SUPER ckDEAD(packWARN(WARN_NON_UNICODE))
+
#define OOB_NAMEDCLASS -1
/* There is no code point that is out-of-bounds, so this is problematic. But
@@ -14501,7 +14506,12 @@ parseit:
&& ! invert
&& ! depends_list
&& ! (ANYOF_FLAGS(ret) & ANYOF_POSIXL)
- && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+ && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
+
+ /* We don't optimize if we are supposed to make sure all non-Unicode
+ * code points raise a warning, as only ANYOF nodes have this check.
+ * */
+ && ! ((ANYOF_FLAGS(ret) | ANYOF_WARN_SUPER) && ALWAYS_WARN_SUPER))
{
UV start, end;
U8 op = END; /* The optimzation node-type */
diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8
index 7c387b540b..d6032d4905 100644
--- a/t/lib/warnings/utf8
+++ b/t/lib/warnings/utf8
@@ -661,13 +661,17 @@ chr(0x110000) =~ /\p{Any}/;
EXPECT
Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
########
-# TODO optimized regnode should still give warnings
+# NAME optimizable regnode should still give non_unicode warnings when fatalized
use warnings 'utf8';
+use warnings FATAL => 'non_unicode';
chr(0x110000) =~ /\p{lb=cr}/;
+EXPECT
+Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 3.
+########
+# NAME optimizable regnode should not give non_unicode warnings when warnings are off
no warnings 'non_unicode';
chr(0x110000) =~ /\p{lb=cr}/;
EXPECT
-Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
########
require "../test.pl";
use warnings 'utf8';