diff options
-rw-r--r-- | regcomp.c | 14 | ||||
-rw-r--r-- | regexec.c | 9 | ||||
-rwxr-xr-x | t/op/pat.t | 34 | ||||
-rw-r--r-- | utf8.h | 5 |
4 files changed, 58 insertions, 4 deletions
@@ -3981,9 +3981,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (prevvalue < value) Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)prevvalue, (UV)value); - else if (prevvalue == value) + else if (prevvalue == value) { Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value); + if (FOLD) { + if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) { + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_CAPITAL_LETTER_SIGMA); + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_SMALL_LETTER_SIGMA); + } + else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA) + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_SMALL_LETTER_SIGMA); + } + } } } @@ -975,7 +975,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta * Fortunately, not getting this right is allowed * for Unicode Regular Expression Support level 1, * only one-to-one matching is required. --jhi */ - if (c1 == c2) + if (c1 == c2) { while (s <= e) { if ( utf8_to_uvchr((U8*)s, &len) == c1 && (ln == len || @@ -985,9 +985,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta goto got_it; s += len; } - else + } + else { while (s <= e) { UV c = utf8_to_uvchr((U8*)s, &len); + if (c == UNICODE_GREEK_CAPITAL_LETTER_SIGMA || + c == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) + c = UNICODE_GREEK_SMALL_LETTER_SIGMA; if ( (c == c1 || c == c2) && (ln == len || ibcmp_utf8(s, do_utf8, strend - s, @@ -996,6 +1000,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta goto got_it; s += len; } + } } else { if (c1 == c2) diff --git a/t/op/pat.t b/t/op/pat.t index 30fc1a949c..a8742f8cbd 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..794\n"; +print "1..812\n"; BEGIN { chdir 't' if -d 't'; @@ -2418,3 +2418,35 @@ print "# some Unicode properties\n"; print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; } + +{ + print "# FINAL SIGMA\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; + print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; + print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; + + print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; + print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; + print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; + + print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; + print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; + print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; + + print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; + print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; + print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; + + print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; + print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; + print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; + + print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; + print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; + print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; +} @@ -188,3 +188,8 @@ END_EXTERN_C #endif #define UTF8_IS_ASCII(c) UTF8_IS_INVARIANT(c) + +#define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3 +#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 +#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3 + |