summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
authorAnatol Belski <ab@php.net>2013-03-03 19:06:22 +0100
committerAnatol Belski <ab@php.net>2013-03-03 19:06:22 +0100
commit357ab3cbada57374075ccf57c9ec25bbbbcb6948 (patch)
treef8c61f2068629b070bf82ada4ad2b2adc44516ba /ext/pcre/pcrelib/pcre_exec.c
parentc134f2a1a5ef56da72dbdc1096eddc91e4f5939e (diff)
downloadphp-git-357ab3cbada57374075ccf57c9ec25bbbbcb6948.tar.gz
merged PCRE 8.32
- defined HAVE_CONFIG_H to simplify the future merges - PCRE NEWS http://pcre.org/news.txt
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c900
1 files changed, 401 insertions, 499 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 4a8f1c268c..05d0e52d33 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -41,7 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
possible. There are also some static supporting functions. */
+#ifdef HAVE_CONFIG_H
#include "config.h"
+#endif
#define NLBLOCK md /* Block containing newline information */
#define PSSTART start_subject /* Field containing processed string start */
@@ -90,8 +92,6 @@ because the offset vector is always a multiple of 3 long. */
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
-
-
#ifdef PCRE_DEBUG
/*************************************************
* Debugging function to print chars *
@@ -112,10 +112,11 @@ Returns: nothing
static void
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
{
-unsigned int c;
+pcre_uint32 c;
+BOOL utf = md->utf;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
while (length-- > 0)
- if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
+ if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
}
#endif
@@ -148,6 +149,9 @@ match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
{
PCRE_PUCHAR eptr_start = eptr;
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
+#ifdef SUPPORT_UTF
+BOOL utf = md->utf;
+#endif
#ifdef PCRE_DEBUG
if (eptr >= md->end_subject)
@@ -175,24 +179,35 @@ if (caseless)
{
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
- if (md->utf)
+ if (utf)
{
/* Match characters up to the end of the reference. NOTE: the number of
- bytes matched may differ, because there are some characters whose upper and
- lower case versions code as different numbers of bytes. For example, U+023A
- (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
- a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
- the latter. It is important, therefore, to check the length along the
- reference, not along the subject (earlier code did this wrong). */
+ data units matched may differ, because in UTF-8 there are some characters
+ whose upper and lower case versions code have different numbers of bytes.
+ For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
+ (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
+ sequence of two of the latter. It is important, therefore, to check the
+ length along the reference, not along the subject (earlier code did this
+ wrong). */
PCRE_PUCHAR endptr = p + length;
while (p < endptr)
{
- int c, d;
+ pcre_uint32 c, d;
+ const ucd_record *ur;
if (eptr >= md->end_subject) return -2; /* Partial match */
GETCHARINC(c, eptr);
GETCHARINC(d, p);
- if (c != d && c != UCD_OTHERCASE(d)) return -1;
+ ur = GET_UCD(d);
+ if (c != d && c != d + ur->other_case)
+ {
+ const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
+ for (;;)
+ {
+ if (c < *pp) return -1;
+ if (c == *pp++) break;
+ }
+ }
}
}
else
@@ -204,8 +219,11 @@ if (caseless)
{
while (length-- > 0)
{
+ pcre_uchar cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
+ cc = RAWUCHARTEST(eptr);
+ cp = RAWUCHARTEST(p);
+ if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
p++;
eptr++;
}
@@ -220,7 +238,7 @@ else
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (*p++ != *eptr++) return -1;
+ if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
}
}
@@ -276,7 +294,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66 };
+ RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -294,7 +312,7 @@ actually used in this definition. */
}
#define RRETURN(ra) \
{ \
- printf("match() returned %d from line %d ", ra, __LINE__); \
+ printf("match() returned %d from line %d\n", ra, __LINE__); \
return ra; \
}
#else
@@ -385,7 +403,7 @@ typedef struct heapframe {
#ifdef SUPPORT_UCP
int Xprop_type;
- int Xprop_value;
+ unsigned int Xprop_value;
int Xprop_fail_result;
int Xoclength;
pcre_uchar Xocchars[6];
@@ -486,7 +504,7 @@ so they can be ordinary variables in all cases. Mark some of them with
register int rrc; /* Returns from recursive calls */
register int i; /* Used for loops not involving calls to RMATCH() */
-register unsigned int c; /* Character values not kept over RMATCH() calls */
+register pcre_uint32 c; /* Character values not kept over RMATCH() calls */
register BOOL utf; /* Local copy of UTF flag for speed */
BOOL minimize, possessive; /* Quantifier options */
@@ -603,7 +621,7 @@ BOOL prev_is_word;
#ifdef SUPPORT_UCP
int prop_type;
-int prop_value;
+unsigned int prop_value;
int prop_fail_result;
int oclength;
pcre_uchar occhars[6];
@@ -614,9 +632,9 @@ int ctype;
int length;
int max;
int min;
-int number;
+unsigned int number;
int offset;
-int op;
+pcre_uchar op;
int save_capture_last;
int save_offset1, save_offset2, save_offset3;
int stacksave[REC_STACK_SAVE_MAX];
@@ -735,7 +753,7 @@ for (;;)
unaltered. */
else if (rrc == MATCH_SKIP_ARG &&
- STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
+ STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
{
md->start_match_ptr = eptr;
RRETURN(MATCH_SKIP);
@@ -1260,10 +1278,12 @@ for (;;)
cb.version = 2; /* Version 1 of the callout block */
cb.callout_number = ecode[LINK_SIZE+2];
cb.offset_vector = md->offset_vector;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)md->start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)md->start_subject;
#endif
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
@@ -1293,7 +1313,7 @@ for (;;)
}
else
{
- int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
+ unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
/* If the test is for recursion into a specific subpattern, and it is
@@ -1365,7 +1385,7 @@ for (;;)
if (!condition && condcode == OP_NCREF)
{
- int refno = offset >> 1;
+ unsigned int refno = offset >> 1;
pcre_uchar *slotA = md->name_table;
for (i = 0; i < md->name_count; i++)
@@ -1683,10 +1703,12 @@ for (;;)
cb.version = 2; /* Version 1 of the callout block */
cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)md->start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)md->start_subject;
#endif
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
@@ -1723,7 +1745,7 @@ for (;;)
case OP_RECURSE:
{
recursion_info *ri;
- int recno;
+ unsigned int recno;
callpat = md->start_code + GET(ecode, 1);
recno = (callpat == md->start_code)? 0 :
@@ -2077,7 +2099,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2121,7 +2143,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2264,7 +2286,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2413,22 +2435,24 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x000d:
+ case CHAR_CR:
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
}
- else if (*eptr == 0x0a) eptr++;
+ else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
break;
- case 0x000a:
+ case CHAR_LF:
break;
- case 0x000b:
- case 0x000c:
- case 0x0085:
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#ifndef EBCDIC
case 0x2028:
case 0x2029:
+#endif /* Not EBCDIC */
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
@@ -2444,27 +2468,8 @@ for (;;)
GETCHARINCTEST(c, eptr);
switch(c)
{
+ HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
default: break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- RRETURN(MATCH_NOMATCH);
}
ecode++;
break;
@@ -2478,27 +2483,8 @@ for (;;)
GETCHARINCTEST(c, eptr);
switch(c)
{
+ HSPACE_CASES: break; /* Byte and multibyte cases */
default: RRETURN(MATCH_NOMATCH);
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- break;
}
ecode++;
break;
@@ -2512,15 +2498,8 @@ for (;;)
GETCHARINCTEST(c, eptr);
switch(c)
{
+ VSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- RRETURN(MATCH_NOMATCH);
}
ecode++;
break;
@@ -2534,15 +2513,8 @@ for (;;)
GETCHARINCTEST(c, eptr);
switch(c)
{
+ VSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- break;
}
ecode++;
break;
@@ -2560,6 +2532,7 @@ for (;;)
}
GETCHARINCTEST(c, eptr);
{
+ const pcre_uint32 *cp;
const ucd_record *prop = GET_UCD(c);
switch(ecode[1])
@@ -2620,6 +2593,17 @@ for (;;)
RRETURN(MATCH_NOMATCH);
break;
+ case PT_CLIST:
+ cp = PRIV(ucd_caseless_sets) + ecode[2];
+ for (;;)
+ {
+ if (c < *cp)
+ { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
+ if (c == *cp++)
+ { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
+ }
+ break;
+
/* This should never occur */
default:
@@ -2639,19 +2623,25 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
- while (eptr < md->end_subject)
+ else
{
- int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr += len;
+ int lgb, rgb;
+ GETCHARINCTEST(c, eptr);
+ lgb = UCD_GRAPHBREAK(c);
+ while (eptr < md->end_subject)
+ {
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ lgb = rgb;
+ eptr += len;
+ }
}
CHECK_PARTIAL();
ecode++;
break;
-#endif
+#endif /* SUPPORT_UCP */
/* Match a back reference, possibly repeatedly. Look past the end of the
@@ -3160,7 +3150,7 @@ for (;;)
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
- while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
+ while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
}
else
#endif
@@ -3200,8 +3190,8 @@ for (;;)
if (fc < 128)
{
- if (md->lcc[fc]
- != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
+ pcre_uchar cc = RAWUCHAR(eptr);
+ if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
}
@@ -3212,7 +3202,7 @@ for (;;)
else
{
- unsigned int dc;
+ pcre_uint32 dc;
GETCHARINC(dc, eptr);
ecode += length;
@@ -3322,7 +3312,7 @@ for (;;)
if (length > 1)
{
#ifdef SUPPORT_UCP
- unsigned int othercase;
+ pcre_uint32 othercase;
if (op >= OP_STARI && /* Caseless */
(othercase = UCD_OTHERCASE(fc)) != fc)
oclength = PRIV(ord2utf)(othercase, occhars);
@@ -3449,12 +3439,15 @@ for (;;)
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
if (min == max) continue;
@@ -3462,6 +3455,8 @@ for (;;)
{
for (fi = min;; fi++)
{
+ pcre_uchar cc;
+
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
@@ -3470,7 +3465,8 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
/* Control never gets here */
@@ -3480,12 +3476,15 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (fc != *eptr && foc != *eptr) break;
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) break;
eptr++;
}
@@ -3513,7 +3512,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
if (min == max) continue;
@@ -3530,7 +3529,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3544,7 +3543,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- if (fc != *eptr) break;
+ if (fc != RAWUCHARTEST(eptr)) break;
eptr++;
}
if (possessive) continue;
@@ -3573,7 +3572,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int ch, och;
+ register pcre_uint32 ch, och;
ecode++;
GETCHARINC(ch, ecode);
@@ -3600,7 +3599,7 @@ for (;;)
else
#endif
{
- register unsigned int ch = ecode[1];
+ register pcre_uint32 ch = ecode[1];
c = *eptr++;
if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
RRETURN(MATCH_NOMATCH);
@@ -3714,7 +3713,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject)
@@ -3749,7 +3748,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (fi = min;; fi++)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
@@ -3794,7 +3793,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (i = min; i < max; i++)
{
int len = 1;
@@ -3851,7 +3850,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject)
@@ -3885,7 +3884,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (fi = min;; fi++)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
@@ -3929,7 +3928,7 @@ for (;;)
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (i = min; i < max; i++)
{
int len = 1;
@@ -4205,6 +4204,27 @@ for (;;)
}
break;
+ case PT_CLIST:
+ for (i = 1; i <= min; i++)
+ {
+ const pcre_uint32 *cp;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ cp = PRIV(ucd_caseless_sets) + prop_value;
+ for (;;)
+ {
+ if (c < *cp)
+ { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
+ if (c == *cp++)
+ { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
+ }
+ }
+ break;
+
/* This should not occur */
default:
@@ -4224,14 +4244,20 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
- while (eptr < md->end_subject)
+ else
{
- int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr += len;
+ int lgb, rgb;
+ GETCHARINCTEST(c, eptr);
+ lgb = UCD_GRAPHBREAK(c);
+ while (eptr < md->end_subject)
+ {
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ lgb = rgb;
+ eptr += len;
+ }
}
CHECK_PARTIAL();
}
@@ -4258,7 +4284,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4299,18 +4325,20 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x000d:
- if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
+ case CHAR_CR:
+ if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
break;
- case 0x000a:
+ case CHAR_LF:
break;
- case 0x000b:
- case 0x000c:
- case 0x0085:
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#ifndef EBCDIC
case 0x2028:
case 0x2029:
+#endif /* Not EBCDIC */
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
@@ -4328,27 +4356,8 @@ for (;;)
GETCHARINC(c, eptr);
switch(c)
{
+ HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
default: break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- RRETURN(MATCH_NOMATCH);
}
}
break;
@@ -4364,27 +4373,8 @@ for (;;)
GETCHARINC(c, eptr);
switch(c)
{
+ HSPACE_CASES: break; /* Byte and multibyte cases */
default: RRETURN(MATCH_NOMATCH);
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- break;
}
}
break;
@@ -4400,15 +4390,8 @@ for (;;)
GETCHARINC(c, eptr);
switch(c)
{
+ VSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- RRETURN(MATCH_NOMATCH);
}
}
break;
@@ -4424,15 +4407,8 @@ for (;;)
GETCHARINC(c, eptr);
switch(c)
{
+ VSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- break;
}
}
break;
@@ -4454,12 +4430,15 @@ for (;;)
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -4469,12 +4448,15 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
+ cc = RAWUCHAR(eptr);
+ if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
@@ -4484,12 +4466,15 @@ for (;;)
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -4499,12 +4484,15 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
+ cc = RAWUCHAR(eptr);
+ if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
@@ -4514,12 +4502,15 @@ for (;;)
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -4590,17 +4581,17 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x000d:
- if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
+ case CHAR_CR:
+ if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
break;
- case 0x000a:
+ case CHAR_LF:
break;
- case 0x000b:
- case 0x000c:
- case 0x0085:
-#ifdef COMPILE_PCRE16
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case 0x2028:
case 0x2029:
#endif
@@ -4621,26 +4612,9 @@ for (;;)
switch(*eptr++)
{
default: break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
-#ifdef COMPILE_PCRE16
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -4658,26 +4632,9 @@ for (;;)
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
-#ifdef COMPILE_PCRE16
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
break;
}
@@ -4694,17 +4651,12 @@ for (;;)
}
switch(*eptr++)
{
- default: break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
-#ifdef COMPILE_PCRE16
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
+ default: break;
}
}
break;
@@ -4720,14 +4672,9 @@ for (;;)
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
-#ifdef COMPILE_PCRE16
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
break;
}
@@ -5005,8 +4952,31 @@ for (;;)
}
/* Control never gets here */
- /* This should never occur */
+ case PT_CLIST:
+ for (fi = min;; fi++)
+ {
+ const pcre_uint32 *cp;
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ cp = PRIV(ucd_caseless_sets) + prop_value;
+ for (;;)
+ {
+ if (c < *cp)
+ { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
+ if (c == *cp++)
+ { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
+ }
+ }
+ /* Control never gets here */
+ /* This should never occur */
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
@@ -5027,14 +4997,20 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
- while (eptr < md->end_subject)
+ else
{
- int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr += len;
+ int lgb, rgb;
+ GETCHARINCTEST(c, eptr);
+ lgb = UCD_GRAPHBREAK(c);
+ while (eptr < md->end_subject)
+ {
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ lgb = rgb;
+ eptr += len;
+ }
}
CHECK_PARTIAL();
}
@@ -5080,17 +5056,20 @@ for (;;)
switch(c)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x000d:
- if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
+ case CHAR_CR:
+ if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
break;
- case 0x000a:
+
+ case CHAR_LF:
break;
- case 0x000b:
- case 0x000c:
- case 0x0085:
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#ifndef EBCDIC
case 0x2028:
case 0x2029:
+#endif /* Not EBCDIC */
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
@@ -5099,84 +5078,32 @@ for (;;)
case OP_NOT_HSPACE:
switch(c)
{
+ HSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- RRETURN(MATCH_NOMATCH);
}
break;
case OP_HSPACE:
switch(c)
{
+ HSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- break;
}
break;
case OP_NOT_VSPACE:
switch(c)
{
+ VSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- RRETURN(MATCH_NOMATCH);
}
break;
case OP_VSPACE:
switch(c)
{
+ VSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- break;
}
break;
@@ -5254,17 +5181,17 @@ for (;;)
switch(c)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x000d:
- if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
+ case CHAR_CR:
+ if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
break;
- case 0x000a:
+ case CHAR_LF:
break;
- case 0x000b:
- case 0x000c:
- case 0x0085:
-#ifdef COMPILE_PCRE16
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case 0x2028:
case 0x2029:
#endif
@@ -5277,26 +5204,9 @@ for (;;)
switch(c)
{
default: break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
-#ifdef COMPILE_PCRE16
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -5306,26 +5216,9 @@ for (;;)
switch(c)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
-#ifdef COMPILE_PCRE16
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
break;
}
@@ -5335,14 +5228,9 @@ for (;;)
switch(c)
{
default: break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
-#ifdef COMPILE_PCRE16
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -5352,14 +5240,9 @@ for (;;)
switch(c)
{
default: RRETURN(MATCH_NOMATCH);
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
-#ifdef COMPILE_PCRE16
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
break;
}
@@ -5563,6 +5446,30 @@ for (;;)
}
break;
+ case PT_CLIST:
+ for (i = min; i < max; i++)
+ {
+ const pcre_uint32 *cp;
+ int len = 1;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLENTEST(c, eptr, len);
+ cp = PRIV(ucd_caseless_sets) + prop_value;
+ for (;;)
+ {
+ if (c < *cp)
+ { if (prop_fail_result) break; else goto GOT_MAX; }
+ if (c == *cp++)
+ { if (prop_fail_result) goto GOT_MAX; else break; }
+ }
+ eptr += len;
+ }
+ GOT_MAX:
+ break;
+
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
@@ -5586,21 +5493,25 @@ for (;;)
{
for (i = min; i < max; i++)
{
- int len = 1;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- if (UCD_CATEGORY(c) == ucp_M) break;
- eptr += len;
- while (eptr < md->end_subject)
+ else
{
- len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- if (UCD_CATEGORY(c) != ucp_M) break;
- eptr += len;
+ int lgb, rgb;
+ GETCHARINCTEST(c, eptr);
+ lgb = UCD_GRAPHBREAK(c);
+ while (eptr < md->end_subject)
+ {
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+ lgb = rgb;
+ eptr += len;
+ }
}
CHECK_PARTIAL();
}
@@ -5650,7 +5561,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5676,7 +5587,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5730,17 +5641,20 @@ for (;;)
break;
}
GETCHARLEN(c, eptr, len);
- if (c == 0x000d)
+ if (c == CHAR_CR)
{
if (++eptr >= md->end_subject) break;
- if (*eptr == 0x000a) eptr++;
+ if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
}
else
{
- if (c != 0x000a &&
+ if (c != CHAR_LF &&
(md->bsr_anycrlf ||
- (c != 0x000b && c != 0x000c &&
- c != 0x0085 && c != 0x2028 && c != 0x2029)))
+ (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
+#ifndef EBCDIC
+ && c != 0x2028 && c != 0x2029
+#endif /* Not EBCDIC */
+ )))
break;
eptr += len;
}
@@ -5761,28 +5675,8 @@ for (;;)
GETCHARLEN(c, eptr, len);
switch(c)
{
+ HSPACE_CASES: gotspace = TRUE; break;
default: gotspace = FALSE; break;
- case 0x09: /* HT */
- case 0x20: /* SPACE */
- case 0xa0: /* NBSP */
- case 0x1680: /* OGHAM SPACE MARK */
- case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
- case 0x2000: /* EN QUAD */
- case 0x2001: /* EM QUAD */
- case 0x2002: /* EN SPACE */
- case 0x2003: /* EM SPACE */
- case 0x2004: /* THREE-PER-EM SPACE */
- case 0x2005: /* FOUR-PER-EM SPACE */
- case 0x2006: /* SIX-PER-EM SPACE */
- case 0x2007: /* FIGURE SPACE */
- case 0x2008: /* PUNCTUATION SPACE */
- case 0x2009: /* THIN SPACE */
- case 0x200A: /* HAIR SPACE */
- case 0x202f: /* NARROW NO-BREAK SPACE */
- case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
- case 0x3000: /* IDEOGRAPHIC SPACE */
- gotspace = TRUE;
- break;
}
if (gotspace == (ctype == OP_NOT_HSPACE)) break;
eptr += len;
@@ -5803,16 +5697,8 @@ for (;;)
GETCHARLEN(c, eptr, len);
switch(c)
{
+ VSPACE_CASES: gotspace = TRUE; break;
default: gotspace = FALSE; break;
- case 0x0a: /* LF */
- case 0x0b: /* VT */
- case 0x0c: /* FF */
- case 0x0d: /* CR */
- case 0x85: /* NEL */
- case 0x2028: /* LINE SEPARATOR */
- case 0x2029: /* PARAGRAPH SEPARATOR */
- gotspace = TRUE;
- break;
}
if (gotspace == (ctype == OP_NOT_VSPACE)) break;
eptr += len;
@@ -5926,8 +5812,8 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr);
- if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
- eptr[-1] == '\r') eptr--;
+ if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
+ RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
}
}
else
@@ -5978,19 +5864,19 @@ for (;;)
break;
}
c = *eptr;
- if (c == 0x000d)
+ if (c == CHAR_CR)
{
if (++eptr >= md->end_subject) break;
- if (*eptr == 0x000a) eptr++;
+ if (*eptr == CHAR_LF) eptr++;
}
else
{
- if (c != 0x000a && (md->bsr_anycrlf ||
- (c != 0x000b && c != 0x000c && c != 0x0085
-#ifdef COMPILE_PCRE16
- && c != 0x2028 && c != 0x2029
+ if (c != CHAR_LF && (md->bsr_anycrlf ||
+ (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ && c != 0x2028 && c != 0x2029
#endif
- ))) break;
+ ))) break;
eptr++;
}
}
@@ -6004,15 +5890,17 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- c = *eptr;
- if (c == 0x09 || c == 0x20 || c == 0xa0
-#ifdef COMPILE_PCRE16
- || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
- || c == 0x202f || c == 0x205f || c == 0x3000
+ switch(*eptr)
+ {
+ default: eptr++; break;
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
- ) break;
- eptr++;
+ goto ENDLOOP00;
+ }
}
+ ENDLOOP00:
break;
case OP_HSPACE:
@@ -6023,15 +5911,17 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- c = *eptr;
- if (c != 0x09 && c != 0x20 && c != 0xa0
-#ifdef COMPILE_PCRE16
- && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
- && c != 0x202f && c != 0x205f && c != 0x3000
+ switch(*eptr)
+ {
+ default: goto ENDLOOP01;
+ HSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ HSPACE_MULTIBYTE_CASES:
#endif
- ) break;
- eptr++;
+ eptr++; break;
+ }
}
+ ENDLOOP01:
break;
case OP_NOT_VSPACE:
@@ -6042,14 +5932,17 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- c = *eptr;
- if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
-#ifdef COMPILE_PCRE16
- || c == 0x2028 || c == 0x2029
+ switch(*eptr)
+ {
+ default: eptr++; break;
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
- ) break;
- eptr++;
+ goto ENDLOOP02;
+ }
}
+ ENDLOOP02:
break;
case OP_VSPACE:
@@ -6060,14 +5953,17 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- c = *eptr;
- if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
-#ifdef COMPILE_PCRE16
- && c != 0x2028 && c != 0x2029
+ switch(*eptr)
+ {
+ default: goto ENDLOOP03;
+ VSPACE_BYTE_CASES:
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ VSPACE_MULTIBYTE_CASES:
#endif
- ) break;
- eptr++;
+ eptr++; break;
+ }
}
+ ENDLOOP03:
break;
case OP_NOT_DIGIT:
@@ -6164,8 +6060,8 @@ for (;;)
RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
- if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
- eptr[-1] == '\r') eptr--;
+ if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF &&
+ eptr[-1] == CHAR_CR) eptr--;
}
}
@@ -6215,14 +6111,11 @@ switch (frame->Xwhere)
LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
- LBL(59) LBL(60) LBL(61) LBL(62)
+ LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
#endif /* SUPPORT_UCP */
#endif /* SUPPORT_UTF */
default:
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
-
-printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
-
return PCRE_ERROR_INTERNAL;
}
#undef LBL
@@ -6334,16 +6227,21 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+ PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount)
#endif
{
int rc, ocount, arg_offset_max;
@@ -6397,6 +6295,7 @@ if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
+if (length < 0) return PCRE_ERROR_BADLENGTH;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not,
@@ -6434,19 +6333,22 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
offsets[0] = erroroffset;
offsets[1] = errorcode;
}
-#ifdef COMPILE_PCRE16
- return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
- PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
-#else
+#if defined COMPILE_PCRE8
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#elif defined COMPILE_PCRE16
+ return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
+ PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#elif defined COMPILE_PCRE32
+ return PCRE_ERROR_BADUTF32;
#endif
}
-
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
/* Check that a start_offset points to the start of a UTF character. */
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
+#endif
}
#endif
@@ -6460,17 +6362,15 @@ if (extra_data != NULL
&& (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
&& extra_data->executable_jit != NULL
- && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
- PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
- PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
+ && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
{
- rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
+ rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
start_offset, options, offsets, offsetcount);
/* PCRE_ERROR_NULL means that the selected normal or partial matching
mode is not compiled. In this case we simply fallback to interpreter. */
- if (rc != PCRE_ERROR_NULL) return rc;
+ if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
}
#endif
@@ -6754,12 +6654,14 @@ for(;;)
if (has_first_char)
{
+ pcre_uchar smc;
+
if (first_char != first_char2)
while (start_match < end_subject &&
- *start_match != first_char && *start_match != first_char2)
+ (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
start_match++;
else
- while (start_match < end_subject && *start_match != first_char)
+ while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
start_match++;
}
@@ -6791,7 +6693,7 @@ for(;;)
if (start_match[-1] == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
start_match < end_subject &&
- *start_match == CHAR_NL)
+ RAWUCHARTEST(start_match) == CHAR_NL)
start_match++;
}
}
@@ -6802,7 +6704,7 @@ for(;;)
{
while (start_match < end_subject)
{
- register unsigned int c = *start_match;
+ register pcre_uint32 c = RAWUCHARTEST(start_match);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
@@ -6870,7 +6772,7 @@ for(;;)
{
while (p < end_subject)
{
- register int pp = *p++;
+ register pcre_uint32 pp = RAWUCHARINCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -6878,7 +6780,7 @@ for(;;)
{
while (p < end_subject)
{
- if (*p++ == req_char) { p--; break; }
+ if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
}
}