The #14715 and #14716 were okay: they just revealed

a bug in the EXACTF matching. p4raw-id: //depot/perl@14724
author: Jarkko Hietaniemi <jhi@iki.fi> 2002-02-17 01:12:37 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2002-02-17 01:12:37 +0000
commit: 1feea2c750cf5f74093deff93d1536d7d44a8925 (patch)
tree: b2007ead2265e4aba5344f9c2d3d040dbf42f67d
parent: 640ed10d7c25ae7621893c11f70653a184961f71 (diff)
download: perl-1feea2c750cf5f74093deff93d1536d7d44a8925.tar.gz
2 files changed, 32 insertions, 16 deletions
diff --git a/regexec.c b/regexec.c
index 8bd2284f29..900b491502 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2380,8 +2380,8 @@ S_regmatch(pTHX_ regnode *prog)
 		char *l = locinput;
 		char *e = PL_regeol;
 
-		if (ibcmp_utf8(s, 0,  ln, do_utf8,
-			       l, &e, 0,  UTF)) {
+		if (ibcmp_utf8(s, 0,  ln, UTF,
+			       l, &e, 0,  do_utf8)) {
 		     /* One more case for the sharp s:
 		      * pack("U0U*", 0xDF) =~ /ss/i,
 		      * the 0xC3 0x9F are the UTF-8
diff --git a/utf8.c b/utf8.c
index 71aaf8aa6a..0db449c7b4 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1287,23 +1287,38 @@ to the hash is by Perl_to_utf8_case().
 UV
 Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *normal, char *special)
 {
-    UV uv;
+    UV uv0, uv1, uv2;
+    U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
+    STRLEN len;
 
     if (!*swashp)
         *swashp = swash_init("utf8", normal, &PL_sv_undef, 4, 0);
-    uv = swash_fetch(*swashp, p, TRUE);
-    if (!uv) {
+    uv0 = utf8_to_uvchr(p, 0);
+    /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings
+     * are necessary in EBCDIC, they are redundant no-ops
+     * in ASCII-ish platforms, and hopefully optimized away. */
+    uv1 = NATIVE_TO_UNI(uv0);
+    uvuni_to_utf8(tmpbuf, uv1);
+    uv2 = swash_fetch(*swashp, tmpbuf, TRUE);
+    if (uv2) {
+	 /* It was "normal" (single character mapping). */
+         UV uv3 = UNI_TO_NATIVE(uv2);
+
+         len = uvchr_to_utf8(ustrp, uv3) - ustrp;
+         if (lenp)
+              *lenp = len;
+
+         return uv3;
+    }
+    else {
 	 HV *hv;
 	 SV *keysv;
 	 HE *he;
 
-	 uv = utf8_to_uvchr(p, 0);
-
 	 if ((hv    = get_hv(special, FALSE)) &&
-	     (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv))) &&
+	     (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv1))) &&
 	     (he    = hv_fetch_ent(hv, keysv, FALSE, 0))) {
 	      SV *val = HeVAL(he);
-	      STRLEN len;
 	      char *s = SvPV(val, len);
 
 	      if (len > 1) {
@@ -1316,8 +1331,6 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma
 			 * mapping, since any characters in the low 256
 			 * are in Unicode code points, not EBCDIC.
 			 * --jhi */
-
-			U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
 			U8 *d = tmpbuf;
 			U8 *t, *tend;
 			
@@ -1351,14 +1364,17 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma
 	      }
 	      if (lenp)
 		   *lenp = len;
+
 	      return utf8_to_uvchr(ustrp, 0);
 	 }
-	 uv  = NATIVE_TO_UNI(uv);
+
+	 /* So it was not "special": just copy it. */
+	 len = uvchr_to_utf8(ustrp, uv0) - ustrp;
+	 if (lenp)
+	      *lenp = len;
+
+	 return uv0;
     }
-    if (lenp)
-       *lenp = UNISKIP(uv);
-    uvuni_to_utf8(ustrp, uv);
-    return uv;
 }
 
 /*
author	Jarkko Hietaniemi <jhi@iki.fi>	2002-02-17 01:12:37 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2002-02-17 01:12:37 +0000
commit	1feea2c750cf5f74093deff93d1536d7d44a8925 (patch)
tree	b2007ead2265e4aba5344f9c2d3d040dbf42f67d
parent	640ed10d7c25ae7621893c11f70653a184961f71 (diff)
download	perl-1feea2c750cf5f74093deff93d1536d7d44a8925.tar.gz