summaryrefslogtreecommitdiff
path: root/src/pcre2_match.c
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-04-20 16:51:36 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-04-20 16:51:36 +0000
commit72100a6110129b4870e4484eb0f395c4c389cf22 (patch)
treeb80a9c016d1fe149af6d51bbf79bac71927f25ce /src/pcre2_match.c
parent50e8ff74cdc215dd02751e61a37d0dba384799d7 (diff)
downloadpcre2-72100a6110129b4870e4484eb0f395c4c389cf22.tar.gz
Tidy comments about UTF case-independence.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@762 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_match.c')
-rw-r--r--src/pcre2_match.c33
1 files changed, 13 insertions, 20 deletions
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 142a753..57743aa 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -929,7 +929,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Match a single character, caselessly. If we are at the end of the
- subject, give up immediately. */
+ subject, give up immediately. We get here only when the pattern character
+ has at most one other case. Characters with more than two cases are coded
+ as OP_PROP with the pseudo-property PT_CLIST. */
case OP_CHARI:
if (Feptr >= mb->end_subject)
@@ -945,10 +947,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Fecode++;
GETCHARLEN(fc, Fecode, Flength);
- /* If the pattern character's value is < 128, we have only one byte, and
- we know that its other case must also be one byte long, so we can use the
- fast lookup table. We know that there is at least one byte left in the
- subject. */
+ /* If the pattern character's value is < 128, we know that its other case
+ (if any) is also < 128 (and therefore only one code unit long in all
+ code-unit widths), so we can use the fast lookup table. We checked above
+ that there is at least one character left in the subject. */
if (fc < 128)
{
@@ -958,32 +960,23 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Feptr++;
}
- /* Otherwise we must pick up the subject character. Note that we cannot
- use the value of "Flength" to check for sufficient bytes left, because the
- other case of the character may have more or fewer bytes. */
+ /* Otherwise we must pick up the subject character and use Unicode
+ property support to test its other case. Note that we cannot use the
+ value of "Flength" to check for sufficient bytes left, because the other
+ case of the character may have more or fewer code units. */
else
{
uint32_t dc;
GETCHARINC(dc, Feptr);
Fecode += Flength;
-
- /* If we have Unicode property support, we can use it to test the other
- case of the character, if there is one. */
-
- if (fc != dc)
- {
-#ifdef SUPPORT_UNICODE
- if (dc != UCD_OTHERCASE(fc))
-#endif
- RRETURN(MATCH_NOMATCH);
- }
+ if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
}
}
else
#endif /* SUPPORT_UNICODE */
- /* Not UTF mode */
+ /* Not UTF mode; use the table for characters < 256. */
{
if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
!= TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);