It could be possible for the case-insensitive

Unicode-aware string comparison to wander off to the la-la land. p4raw-id: //depot/perl@13669
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-12-13 04:38:19 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-12-13 04:38:19 +0000
commit: 332ddc25e33054764b72da8fd0b572ab1c6c6e65 (patch)
tree: 11df6fb10d4e8bca0069f7509632e09304816644 /utf8.c
parent: c4fbe2471f42249bd57e1c071c99349d2331aea5 (diff)
download: perl-332ddc25e33054764b72da8fd0b572ab1c6c6e65.tar.gz
1 files changed, 44 insertions, 43 deletions
diff --git a/utf8.c b/utf8.c
index 1a024b719c..747b78cfe6 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1579,8 +1579,7 @@ Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
 Return true if the strings s1 and s2 differ case-insensitively, false
 if not (if they are equal case-insensitively).  If u1 is true, the
 string s1 is assumed to be in UTF-8-encoded Unicode.  If u2 is true,
-the string s2 is assumed to be in UTF-8-encoded Unicode.  (If both u1
-and u2 are false, ibcmp() is called.)
+the string s2 is assumed to be in UTF-8-encoded Unicode.
 
 For case-insensitiveness, the "casefolding" of Unicode is used
 instead of upper/lowercasing both the characters, see
@@ -1588,50 +1587,52 @@ http://www.unicode.org/unicode/reports/tr21/ (Case Mappings).
 
 =cut */
 I32
-Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, const char *s2, bool u2, register I32 len)
-{
-     if (u1 || u2) {
-	  register U8 *a = (U8*)s1;
-	  register U8 *b = (U8*)s2;
-	  STRLEN la, lb;
-	  UV ca, cb;
-	  STRLEN ulen1, ulen2;
-	  U8 tmpbuf1[UTF8_MAXLEN*3+1];
-	  U8 tmpbuf2[UTF8_MAXLEN*3+1];
-	  
-	  while (len) {
+Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2, bool u2, register I32 len2)
+{
+     register U8 *a  = (U8*)s1;
+     register U8 *b  = (U8*)s2;
+     register U8 *ae = b + len1;
+     register U8 *be = b + len2;
+     STRLEN la, lb;
+     UV ca, cb;
+     STRLEN ulen1, ulen2;
+     U8 tmpbuf1[UTF8_MAXLEN*3+1];
+     U8 tmpbuf2[UTF8_MAXLEN*3+1];
+     
+     while (a < ae && b < be) {
+	  if (u1) {
+	       if (a + UTF8SKIP(a) > ae)
+		    break;
+	       ca = utf8_to_uvchr((U8*)a, &la);
+	  } else {
+	       ca = *a;
+	       la = 1;
+	  }
+	  if (u2) {
+	       if (b + UTF8SKIP(b) > be)
+		    break;
+	       cb = utf8_to_uvchr((U8*)b, &lb);
+	  } else {
+	       cb = *b;
+	       lb = 1;
+	  }
+	  if (ca != cb) {
 	       if (u1)
-		    ca = utf8_to_uvchr((U8*)a, &la);
-	       else {
-		    ca = *a;
-		    la = 1;
-	       }
+		    to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
+	       else
+		    ulen1 = 1;
 	       if (u2)
-		    cb = utf8_to_uvchr((U8*)b, &lb);
-	       else {
-		    cb = *b;
-		    lb = 1;
-	       }
-	       if (ca != cb) {
-		    if (u1)
-			 to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
-		    else
-			 ulen1 = 1;
-		    if (u2)
-			 to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
-		    else
-			 ulen2 = 1;
-		    if (ulen1 != ulen2
-			|| (ulen1 == 1 && PL_fold[ca] != PL_fold[cb])
-			|| memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
-			 return 1;
-	       }
-	       a += la;
-	       b += lb;
+		    to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
+	       else
+		    ulen2 = 1;
+	       if (ulen1 != ulen2
+		   || (ulen1 == 1 && PL_fold[ca] != PL_fold[cb])
+		   || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
+		    return 1; /* mismatch */
 	  }
-	  return 0;
+	  a += la;
+	  b += lb;
      }
-     else
-         return ibcmp(s1, s2, len);
+     return a == ae && b == be ? 0 : 1; /* 0 match, 1 mismatch */
 }
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-12-13 04:38:19 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-12-13 04:38:19 +0000
commit	332ddc25e33054764b72da8fd0b572ab1c6c6e65 (patch)
tree	11df6fb10d4e8bca0069f7509632e09304816644 /utf8.c
parent	c4fbe2471f42249bd57e1c071c99349d2331aea5 (diff)
download	perl-332ddc25e33054764b72da8fd0b572ab1c6c6e65.tar.gz