summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-02 17:41:28 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-02 17:41:28 +0000
commit31a692c6bfca25feffa7cc96dab542080b0a9d0c (patch)
treece87e75e8ed049d97d5f667631fcaf7b30e86f70
parent62671ac7455a5eb508bc3f99e6f01585efd08c83 (diff)
downloadpcre-31a692c6bfca25feffa7cc96dab542080b0a9d0c.tar.gz
Revert RAWUCHAR macros, renaming them as UCHAR21 and adding an explanatory
comment. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1431 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog9
-rw-r--r--pcre_dfa_exec.c29
-rw-r--r--pcre_exec.c72
-rw-r--r--pcre_internal.h34
-rw-r--r--pcre_string_utils.c8
5 files changed, 85 insertions, 67 deletions
diff --git a/ChangeLog b/ChangeLog
index a610c40..568e779 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -31,10 +31,11 @@ Version 8.35-RC1 xx-xxxx-201x
must be bigger than the treshold as well. This function is useful, when
the characters above the treshold are handled in the same way.
-7. The macros RAWUCHAR and RAWUCHARTEST were identical (and the latter was not
- testing anything, contrary to its name and comment) and were not really
- fulfilling any useful function, so I have replaced their use by plain code.
- Similarly for RAWUCHARINC and RAWUCHARINCTEST.
+7. The macros whose names start with RAWUCHAR are placeholders for a future
+ mode in which only the bottom 21 bits of 32-bit data items are used. To
+ make this more memorable for those maintaining the code, the names have
+ been changed to start with UCHAR21, and an extensive comment has been added
+ to their definition.
Version 8.34 15-December-2013
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 2b6dd10..fb0c7e8 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
below for why this module is different).
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1473,7 +1473,7 @@ for (;;)
goto ANYNL01;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL01:
@@ -1742,7 +1742,7 @@ for (;;)
goto ANYNL02;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL02:
@@ -2012,7 +2012,7 @@ for (;;)
goto ANYNL03;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL03:
@@ -2210,7 +2210,7 @@ for (;;)
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
- else if (ptr[1] == CHAR_LF)
+ else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
@@ -3474,12 +3474,12 @@ for (;;)
{
pcre_uchar csc;
while (current_subject < end_subject &&
- (csc = *current_subject) != first_char && csc != first_char2)
+ (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
current_subject++;
}
else
while (current_subject < end_subject &&
- *current_subject != first_char)
+ UCHAR21TEST(current_subject) != first_char)
current_subject++;
}
@@ -3509,9 +3509,10 @@ for (;;)
ANYCRLF, and we are now at a LF, advance the match position by one
more character. */
- if (current_subject[-1] == CHAR_CR &&
+ if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
- current_subject < end_subject && *current_subject == CHAR_NL)
+ current_subject < end_subject &&
+ UCHAR21TEST(current_subject) == CHAR_NL)
current_subject++;
}
}
@@ -3522,7 +3523,7 @@ for (;;)
{
while (current_subject < end_subject)
{
- register pcre_uint32 c = *current_subject;
+ register pcre_uint32 c = UCHAR21TEST(current_subject);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
@@ -3579,7 +3580,7 @@ for (;;)
{
while (p < end_subject)
{
- register pcre_uint32 pp = *p++;
+ register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -3587,7 +3588,7 @@ for (;;)
{
while (p < end_subject)
{
- if (*p++ == req_char) { p--; break; }
+ if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}
@@ -3655,9 +3656,9 @@ for (;;)
not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */
- if (current_subject[-1] == CHAR_CR &&
+ if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&
- *current_subject == CHAR_NL &&
+ UCHAR21TEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||
diff --git a/pcre_exec.c b/pcre_exec.c
index beadc9c..7a59dbe 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -134,7 +134,7 @@ pcre_uint32 c;
BOOL utf = md->utf;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
while (length-- > 0)
- if (isprint(c = *p++)) printf("%c", (char)c); else printf("\\x{%02x}", c);
+ if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
}
#endif
@@ -237,8 +237,8 @@ if (caseless)
{
pcre_uint32 cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
- cc = *eptr;
- cp = *p;
+ cc = UCHAR21TEST(eptr);
+ cp = UCHAR21TEST(p);
if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
p++;
eptr++;
@@ -254,7 +254,7 @@ else
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (*p++ != *eptr++) return -1;
+ if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
}
}
@@ -2103,7 +2103,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2147,7 +2147,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2290,7 +2290,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2444,7 +2444,7 @@ for (;;)
{
SCHECK_PARTIAL();
}
- else if (*eptr == CHAR_LF) eptr++;
+ else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -3218,7 +3218,7 @@ for (;;)
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
- while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
+ while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
}
else
#endif
@@ -3258,7 +3258,7 @@ for (;;)
if (fc < 128)
{
- pcre_uint32 cc = *eptr;
+ pcre_uint32 cc = UCHAR21(eptr);
if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
@@ -3527,7 +3527,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
@@ -3545,7 +3545,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
@@ -3562,7 +3562,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- cc = *eptr;
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) break;
eptr++;
}
@@ -3589,7 +3589,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
if (min == max) continue;
@@ -3606,7 +3606,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3620,7 +3620,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- if (fc != *eptr) break;
+ if (fc != UCHAR21TEST(eptr)) break;
eptr++;
}
if (possessive) continue; /* No backtracking */
@@ -4375,7 +4375,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4417,7 +4417,7 @@ for (;;)
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -4527,7 +4527,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4544,7 +4544,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21(eptr);
if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4561,7 +4561,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4578,7 +4578,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21(eptr);
if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4595,7 +4595,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = *eptr;
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -5156,7 +5156,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -5695,7 +5695,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5721,7 +5721,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5778,7 +5778,7 @@ for (;;)
if (c == CHAR_CR)
{
if (++eptr >= md->end_subject) break;
- if (*eptr == CHAR_LF) eptr++;
+ if (UCHAR21(eptr) == CHAR_LF) eptr++;
}
else
{
@@ -5941,8 +5941,8 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
BACKCHAR(eptr);
- if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_NL &&
- eptr[-1] == CHAR_CR) eptr--;
+ if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL &&
+ UCHAR21(eptr - 1) == CHAR_CR) eptr--;
}
}
else
@@ -6789,10 +6789,10 @@ for(;;)
if (first_char != first_char2)
while (start_match < end_subject &&
- (smc = *start_match) != first_char && smc != first_char2)
+ (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
start_match++;
else
- while (start_match < end_subject && *start_match != first_char)
+ while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
start_match++;
}
@@ -6824,7 +6824,7 @@ for(;;)
if (start_match[-1] == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
start_match < end_subject &&
- *start_match == CHAR_NL)
+ UCHAR21TEST(start_match) == CHAR_NL)
start_match++;
}
}
@@ -6835,7 +6835,7 @@ for(;;)
{
while (start_match < end_subject)
{
- register pcre_uint32 c = *start_match;
+ register pcre_uint32 c = UCHAR21TEST(start_match);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
@@ -6893,7 +6893,7 @@ for(;;)
{
while (p < end_subject)
{
- register pcre_uint32 pp = *p++;
+ register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -6901,7 +6901,7 @@ for(;;)
{
while (p < end_subject)
{
- if (*p++ == req_char) { p--; break; }
+ if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}
diff --git a/pcre_internal.h b/pcre_internal.h
index dd50095..7e07d63 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -316,7 +316,8 @@ start/end of string field names are. */
&(NLBLOCK->nllen), utf)) \
: \
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
- *p == NLBLOCK->nl[0] && (NLBLOCK->nllen == 1 || p[1] == NLBLOCK->nl[1]) \
+ UCHAR21TEST(p) == NLBLOCK->nl[0] && \
+ (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \
) \
)
@@ -329,8 +330,8 @@ start/end of string field names are. */
&(NLBLOCK->nllen), utf)) \
: \
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
- *(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
- (NLBLOCK->nllen == 1 || *(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
+ UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
+ (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
) \
)
@@ -581,12 +582,27 @@ changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
#define MAX_MARK ((1u << 8) - 1)
#endif
+/* There is a proposed future special "UTF-21" mode, in which only the lowest
+21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
+high-order bits available to the application for other uses. In preparation for
+the future implementation of this mode, there are macros that load a data item
+and, if in this special mode, mask it to 21 bits. These macros all have names
+starting with UCHAR21. In all other modes, including the normal 32-bit
+library, the macros all have the same simple definitions. When the new mode is
+implemented, it is expected that these definitions will be varied appropriately
+using #ifdef when compiling the library that supports the special mode. */
+
+#define UCHAR21(eptr) (*(eptr))
+#define UCHAR21TEST(eptr) (*(eptr))
+#define UCHAR21INC(eptr) (*(eptr)++)
+#define UCHAR21INCTEST(eptr) (*(eptr)++)
+
/* When UTF encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
-and other macros are not used when UTF is not supported, so they are not
-defined. To make sure they can never even appear when UTF support is omitted,
-we don't even define them. */
+byte in 8-bit mode or a single short in 16-bit mode. The macros for character
+handling generate simple sequences when used in the basic mode, and more
+complicated ones for UTF characters. GETCHARLENTEST and other macros are not
+used when UTF is not supported. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
#ifndef SUPPORT_UTF
diff --git a/pcre_string_utils.c b/pcre_string_utils.c
index 61a9b4b..25eacc8 100644
--- a/pcre_string_utils.c
+++ b/pcre_string_utils.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -91,8 +91,8 @@ pcre_uchar c2;
while (*str1 != '\0' || *str2 != '\0')
{
- c1 = *str1++;
- c2 = *str2++;
+ c1 = UCHAR21INC(str1);
+ c2 = UCHAR21INC(str2);
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
@@ -131,7 +131,7 @@ pcre_uchar c2;
while (*str1 != '\0' || *ustr2 != '\0')
{
- c1 = *str1++;
+ c1 = UCHAR21INC(str1);
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;