Tidy comments about UTF case-independence.

git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@762 6239d852-aaf2-0410-a92c-79f79f948069
author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2017-04-20 16:51:36 +0000
committer: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2017-04-20 16:51:36 +0000
commit: 72100a6110129b4870e4484eb0f395c4c389cf22 (patch)
tree: b80a9c016d1fe149af6d51bbf79bac71927f25ce /src/pcre2_match.c
parent: 50e8ff74cdc215dd02751e61a37d0dba384799d7 (diff)
download: pcre2-72100a6110129b4870e4484eb0f395c4c389cf22.tar.gz
1 files changed, 13 insertions, 20 deletions
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 142a753..57743aa 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -929,7 +929,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     /* ===================================================================== */
     /* Match a single character, caselessly. If we are at the end of the
-    subject, give up immediately. */
+    subject, give up immediately. We get here only when the pattern character 
+    has at most one other case. Characters with more than two cases are coded 
+    as OP_PROP with the pseudo-property PT_CLIST. */
 
     case OP_CHARI:
     if (Feptr >= mb->end_subject)
@@ -945,10 +947,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       Fecode++;
       GETCHARLEN(fc, Fecode, Flength);
 
-      /* If the pattern character's value is < 128, we have only one byte, and
-      we know that its other case must also be one byte long, so we can use the
-      fast lookup table. We know that there is at least one byte left in the
-      subject. */
+      /* If the pattern character's value is < 128, we know that its other case
+      (if any) is also < 128 (and therefore only one code unit long in all 
+      code-unit widths), so we can use the fast lookup table. We checked above
+      that there is at least one character left in the subject. */
 
       if (fc < 128)
         {
@@ -958,32 +960,23 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
         Feptr++;
         }
 
-      /* Otherwise we must pick up the subject character. Note that we cannot
-      use the value of "Flength" to check for sufficient bytes left, because the
-      other case of the character may have more or fewer bytes.  */
+      /* Otherwise we must pick up the subject character and use Unicode 
+      property support to test its other case. Note that we cannot use the
+      value of "Flength" to check for sufficient bytes left, because the other
+      case of the character may have more or fewer code units. */
 
       else
         {
         uint32_t dc;
         GETCHARINC(dc, Feptr);
         Fecode += Flength;
-
-        /* If we have Unicode property support, we can use it to test the other
-        case of the character, if there is one. */
-
-        if (fc != dc)
-          {
-#ifdef SUPPORT_UNICODE
-          if (dc != UCD_OTHERCASE(fc))
-#endif
-            RRETURN(MATCH_NOMATCH);
-          }
+        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
         }
       }
     else
 #endif   /* SUPPORT_UNICODE */
 
-    /* Not UTF mode */
+    /* Not UTF mode; use the table for characters < 256. */
       {
       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
author	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2017-04-20 16:51:36 +0000
committer	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2017-04-20 16:51:36 +0000
commit	72100a6110129b4870e4484eb0f395c4c389cf22 (patch)
tree	b80a9c016d1fe149af6d51bbf79bac71927f25ce /src/pcre2_match.c
parent	50e8ff74cdc215dd02751e61a37d0dba384799d7 (diff)
download	pcre2-72100a6110129b4870e4484eb0f395c4c389cf22.tar.gz