summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c14
-rw-r--r--regexec.c9
-rwxr-xr-xt/op/pat.t34
-rw-r--r--utf8.h5
4 files changed, 58 insertions, 4 deletions
diff --git a/regcomp.c b/regcomp.c
index 2e0943ea6e..ea98177a9f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3981,9 +3981,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (prevvalue < value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
(UV)prevvalue, (UV)value);
- else if (prevvalue == value)
+ else if (prevvalue == value) {
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)value);
+ if (FOLD) {
+ if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ UNICODE_GREEK_SMALL_LETTER_SIGMA);
+ }
+ else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ UNICODE_GREEK_SMALL_LETTER_SIGMA);
+ }
+ }
}
}
diff --git a/regexec.c b/regexec.c
index d239a70144..7a00dfdd46 100644
--- a/regexec.c
+++ b/regexec.c
@@ -975,7 +975,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
* Fortunately, not getting this right is allowed
* for Unicode Regular Expression Support level 1,
* only one-to-one matching is required. --jhi */
- if (c1 == c2)
+ if (c1 == c2) {
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
&& (ln == len ||
@@ -985,9 +985,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
goto got_it;
s += len;
}
- else
+ }
+ else {
while (s <= e) {
UV c = utf8_to_uvchr((U8*)s, &len);
+ if (c == UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
+ c == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
+ c = UNICODE_GREEK_SMALL_LETTER_SIGMA;
if ( (c == c1 || c == c2)
&& (ln == len ||
ibcmp_utf8(s, do_utf8, strend - s,
@@ -996,6 +1000,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
goto got_it;
s += len;
}
+ }
}
else {
if (c1 == c2)
diff --git a/t/op/pat.t b/t/op/pat.t
index 30fc1a949c..a8742f8cbd 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..794\n";
+print "1..812\n";
BEGIN {
chdir 't' if -d 't';
@@ -2418,3 +2418,35 @@ print "# some Unicode properties\n";
print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n";
print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n";
}
+
+{
+ print "# FINAL SIGMA\n";
+
+ my $SIGMA = "\x{03A3}"; # CAPITAL
+ my $Sigma = "\x{03C2}"; # SMALL FINAL
+ my $sigma = "\x{03C3}"; # SMALL
+
+ print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n";
+ print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n";
+ print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n";
+
+ print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n";
+ print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n";
+ print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n";
+
+ print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n";
+ print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n";
+ print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n";
+
+ print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n";
+ print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n";
+ print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n";
+
+ print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n";
+ print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n";
+ print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n";
+
+ print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n";
+ print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n";
+ print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n";
+}
diff --git a/utf8.h b/utf8.h
index b35cfebb5e..d907d26b20 100644
--- a/utf8.h
+++ b/utf8.h
@@ -188,3 +188,8 @@ END_EXTERN_C
#endif
#define UTF8_IS_ASCII(c) UTF8_IS_INVARIANT(c)
+
+#define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
+#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
+#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
+