From d4e0b827d7de0f9f86fa8b9dde76e6d722b0cf2d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 20 Oct 2010 11:11:13 -0600 Subject: regexec.c: utf8 doesn't match /i nonutf8 self This is a continuation of [perl #78464]. It fixes it also for the /i flag. After this, a character should match itself in the regrepeat function, even if one is in utf8 and the other isn't, for both /i and not. The solution is to move the code for handling /i into the non-i structure so that the decisions about utf8 are all in one place. When the string is in utf8, it uses the utf8-fold function. This has the added effect of fixing a few cases where a utf8 string did not match a fold in a non-utf8 pattern. I haven't added tests for these, as it only fixes a few cases where this is a problem, and I'm working on a comprehensive solution to the problem, accompanied by extensive tests. --- t/re/pat.t | 4 ++++ 1 file changed, 4 insertions(+) (limited to 't/re/pat.t') diff --git a/t/re/pat.t b/t/re/pat.t index 46681040e5..d4bbbb8f20 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -1078,10 +1078,14 @@ sub run_tests { my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/; utf8::upgrade($utf8_pattern); ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; + ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; + ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; utf8::upgrade($c); ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; + ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; + ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; } { -- cgit v1.2.1