diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-01-02 17:41:28 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-01-02 17:41:28 +0000 |
commit | 31a692c6bfca25feffa7cc96dab542080b0a9d0c (patch) | |
tree | ce87e75e8ed049d97d5f667631fcaf7b30e86f70 | |
parent | 62671ac7455a5eb508bc3f99e6f01585efd08c83 (diff) | |
download | pcre-31a692c6bfca25feffa7cc96dab542080b0a9d0c.tar.gz |
Revert RAWUCHAR macros, renaming them as UCHAR21 and adding an explanatory
comment.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1431 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 29 | ||||
-rw-r--r-- | pcre_exec.c | 72 | ||||
-rw-r--r-- | pcre_internal.h | 34 | ||||
-rw-r--r-- | pcre_string_utils.c | 8 |
5 files changed, 85 insertions, 67 deletions
@@ -31,10 +31,11 @@ Version 8.35-RC1 xx-xxxx-201x must be bigger than the treshold as well. This function is useful, when the characters above the treshold are handled in the same way. -7. The macros RAWUCHAR and RAWUCHARTEST were identical (and the latter was not - testing anything, contrary to its name and comment) and were not really - fulfilling any useful function, so I have replaced their use by plain code. - Similarly for RAWUCHARINC and RAWUCHARINCTEST. +7. The macros whose names start with RAWUCHAR are placeholders for a future + mode in which only the bottom 21 bits of 32-bit data items are used. To + make this more memorable for those maintaining the code, the names have + been changed to start with UCHAR21, and an extensive comment has been added + to their definition. Version 8.34 15-December-2013 diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 2b6dd10..fb0c7e8 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see below for why this module is different). Written by Philip Hazel - Copyright (c) 1997-2013 University of Cambridge + Copyright (c) 1997-2014 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -1473,7 +1473,7 @@ for (;;) goto ANYNL01; case CHAR_CR: - if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1; + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; /* Fall through */ ANYNL01: @@ -1742,7 +1742,7 @@ for (;;) goto ANYNL02; case CHAR_CR: - if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1; + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; /* Fall through */ ANYNL02: @@ -2012,7 +2012,7 @@ for (;;) goto ANYNL03; case CHAR_CR: - if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1; + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; /* Fall through */ ANYNL03: @@ -2210,7 +2210,7 @@ for (;;) if ((md->moptions & PCRE_PARTIAL_HARD) != 0) reset_could_continue = TRUE; } - else if (ptr[1] == CHAR_LF) + else if (UCHAR21TEST(ptr + 1) == CHAR_LF) { ADD_NEW_DATA(-(state_offset + 1), 0, 1); } @@ -3474,12 +3474,12 @@ for (;;) { pcre_uchar csc; while (current_subject < end_subject && - (csc = *current_subject) != first_char && csc != first_char2) + (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2) current_subject++; } else while (current_subject < end_subject && - *current_subject != first_char) + UCHAR21TEST(current_subject) != first_char) current_subject++; } @@ -3509,9 +3509,10 @@ for (;;) ANYCRLF, and we are now at a LF, advance the match position by one more character. */ - if (current_subject[-1] == CHAR_CR && + if (UCHAR21TEST(current_subject - 1) == CHAR_CR && (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && - current_subject < end_subject && *current_subject == CHAR_NL) + current_subject < end_subject && + UCHAR21TEST(current_subject) == CHAR_NL) current_subject++; } } @@ -3522,7 +3523,7 @@ for (;;) { while (current_subject < end_subject) { - register pcre_uint32 c = *current_subject; + register pcre_uint32 c = UCHAR21TEST(current_subject); #ifndef COMPILE_PCRE8 if (c > 255) c = 255; #endif @@ -3579,7 +3580,7 @@ for (;;) { while (p < end_subject) { - register pcre_uint32 pp = *p++; + register pcre_uint32 pp = UCHAR21INCTEST(p); if (pp == req_char || pp == req_char2) { p--; break; } } } @@ -3587,7 +3588,7 @@ for (;;) { while (p < end_subject) { - if (*p++ == req_char) { p--; break; } + if (UCHAR21INCTEST(p) == req_char) { p--; break; } } } @@ -3655,9 +3656,9 @@ for (;;) not contain any explicit matches for \r or \n, and the newline option is CRLF or ANY or ANYCRLF, advance the match position by one more character. */ - if (current_subject[-1] == CHAR_CR && + if (UCHAR21TEST(current_subject - 1) == CHAR_CR && current_subject < end_subject && - *current_subject == CHAR_NL && + UCHAR21TEST(current_subject) == CHAR_NL && (re->flags & PCRE_HASCRORLF) == 0 && (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF || diff --git a/pcre_exec.c b/pcre_exec.c index beadc9c..7a59dbe 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2013 University of Cambridge + Copyright (c) 1997-2014 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -134,7 +134,7 @@ pcre_uint32 c; BOOL utf = md->utf; if (is_subject && length > md->end_subject - p) length = md->end_subject - p; while (length-- > 0) - if (isprint(c = *p++)) printf("%c", (char)c); else printf("\\x{%02x}", c); + if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c); } #endif @@ -237,8 +237,8 @@ if (caseless) { pcre_uint32 cc, cp; if (eptr >= md->end_subject) return -2; /* Partial match */ - cc = *eptr; - cp = *p; + cc = UCHAR21TEST(eptr); + cp = UCHAR21TEST(p); if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1; p++; eptr++; @@ -254,7 +254,7 @@ else while (length-- > 0) { if (eptr >= md->end_subject) return -2; /* Partial match */ - if (*p++ != *eptr++) return -1; + if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; } } @@ -2103,7 +2103,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21TEST(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -2147,7 +2147,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21TEST(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -2290,7 +2290,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21TEST(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -2444,7 +2444,7 @@ for (;;) { SCHECK_PARTIAL(); } - else if (*eptr == CHAR_LF) eptr++; + else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++; break; case CHAR_LF: @@ -3218,7 +3218,7 @@ for (;;) CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ RRETURN(MATCH_NOMATCH); } - while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); + while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH); } else #endif @@ -3258,7 +3258,7 @@ for (;;) if (fc < 128) { - pcre_uint32 cc = *eptr; + pcre_uint32 cc = UCHAR21(eptr); if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH); ecode++; eptr++; @@ -3527,7 +3527,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21TEST(eptr); if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); eptr++; } @@ -3545,7 +3545,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21TEST(eptr); if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH); eptr++; } @@ -3562,7 +3562,7 @@ for (;;) SCHECK_PARTIAL(); break; } - cc = *eptr; + cc = UCHAR21TEST(eptr); if (fc != cc && foc != cc) break; eptr++; } @@ -3589,7 +3589,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc != *eptr++) RRETURN(MATCH_NOMATCH); + if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH); } if (min == max) continue; @@ -3606,7 +3606,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc != *eptr++) RRETURN(MATCH_NOMATCH); + if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -3620,7 +3620,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if (fc != *eptr) break; + if (fc != UCHAR21TEST(eptr)) break; eptr++; } if (possessive) continue; /* No backtracking */ @@ -4375,7 +4375,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -4417,7 +4417,7 @@ for (;;) default: RRETURN(MATCH_NOMATCH); case CHAR_CR: - if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; + if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++; break; case CHAR_LF: @@ -4527,7 +4527,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); eptr++; @@ -4544,7 +4544,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21(eptr); if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); eptr++; @@ -4561,7 +4561,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); eptr++; @@ -4578,7 +4578,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21(eptr); if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); eptr++; @@ -4595,7 +4595,7 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - cc = *eptr; + cc = UCHAR21(eptr); if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); eptr++; @@ -5156,7 +5156,7 @@ for (;;) { default: RRETURN(MATCH_NOMATCH); case CHAR_CR: - if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++; + if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++; break; case CHAR_LF: @@ -5695,7 +5695,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -5721,7 +5721,7 @@ for (;;) eptr + 1 >= md->end_subject && NLBLOCK->nltype == NLTYPE_FIXED && NLBLOCK->nllen == 2 && - *eptr == NLBLOCK->nl[0]) + UCHAR21(eptr) == NLBLOCK->nl[0]) { md->hitend = TRUE; if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); @@ -5778,7 +5778,7 @@ for (;;) if (c == CHAR_CR) { if (++eptr >= md->end_subject) break; - if (*eptr == CHAR_LF) eptr++; + if (UCHAR21(eptr) == CHAR_LF) eptr++; } else { @@ -5941,8 +5941,8 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; BACKCHAR(eptr); - if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_NL && - eptr[-1] == CHAR_CR) eptr--; + if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL && + UCHAR21(eptr - 1) == CHAR_CR) eptr--; } } else @@ -6789,10 +6789,10 @@ for(;;) if (first_char != first_char2) while (start_match < end_subject && - (smc = *start_match) != first_char && smc != first_char2) + (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2) start_match++; else - while (start_match < end_subject && *start_match != first_char) + while (start_match < end_subject && UCHAR21TEST(start_match) != first_char) start_match++; } @@ -6824,7 +6824,7 @@ for(;;) if (start_match[-1] == CHAR_CR && (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && start_match < end_subject && - *start_match == CHAR_NL) + UCHAR21TEST(start_match) == CHAR_NL) start_match++; } } @@ -6835,7 +6835,7 @@ for(;;) { while (start_match < end_subject) { - register pcre_uint32 c = *start_match; + register pcre_uint32 c = UCHAR21TEST(start_match); #ifndef COMPILE_PCRE8 if (c > 255) c = 255; #endif @@ -6893,7 +6893,7 @@ for(;;) { while (p < end_subject) { - register pcre_uint32 pp = *p++; + register pcre_uint32 pp = UCHAR21INCTEST(p); if (pp == req_char || pp == req_char2) { p--; break; } } } @@ -6901,7 +6901,7 @@ for(;;) { while (p < end_subject) { - if (*p++ == req_char) { p--; break; } + if (UCHAR21INCTEST(p) == req_char) { p--; break; } } } diff --git a/pcre_internal.h b/pcre_internal.h index dd50095..7e07d63 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2013 University of Cambridge + Copyright (c) 1997-2014 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -316,7 +316,8 @@ start/end of string field names are. */ &(NLBLOCK->nllen), utf)) \ : \ ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ - *p == NLBLOCK->nl[0] && (NLBLOCK->nllen == 1 || p[1] == NLBLOCK->nl[1]) \ + UCHAR21TEST(p) == NLBLOCK->nl[0] && \ + (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \ ) \ ) @@ -329,8 +330,8 @@ start/end of string field names are. */ &(NLBLOCK->nllen), utf)) \ : \ ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ - *(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ - (NLBLOCK->nllen == 1 || *(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ + UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ + (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ ) \ ) @@ -581,12 +582,27 @@ changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */ #define MAX_MARK ((1u << 8) - 1) #endif +/* There is a proposed future special "UTF-21" mode, in which only the lowest +21 bits of a 32-bit character are interpreted as UTF, with the remaining 11 +high-order bits available to the application for other uses. In preparation for +the future implementation of this mode, there are macros that load a data item +and, if in this special mode, mask it to 21 bits. These macros all have names +starting with UCHAR21. In all other modes, including the normal 32-bit +library, the macros all have the same simple definitions. When the new mode is +implemented, it is expected that these definitions will be varied appropriately +using #ifdef when compiling the library that supports the special mode. */ + +#define UCHAR21(eptr) (*(eptr)) +#define UCHAR21TEST(eptr) (*(eptr)) +#define UCHAR21INC(eptr) (*(eptr)++) +#define UCHAR21INCTEST(eptr) (*(eptr)++) + /* When UTF encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -character-mode, and more complicated ones for UTF characters. GETCHARLENTEST -and other macros are not used when UTF is not supported, so they are not -defined. To make sure they can never even appear when UTF support is omitted, -we don't even define them. */ +byte in 8-bit mode or a single short in 16-bit mode. The macros for character +handling generate simple sequences when used in the basic mode, and more +complicated ones for UTF characters. GETCHARLENTEST and other macros are not +used when UTF is not supported. To make sure they can never even appear when +UTF support is omitted, we don't even define them. */ #ifndef SUPPORT_UTF diff --git a/pcre_string_utils.c b/pcre_string_utils.c index 61a9b4b..25eacc8 100644 --- a/pcre_string_utils.c +++ b/pcre_string_utils.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2013 University of Cambridge + Copyright (c) 1997-2014 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -91,8 +91,8 @@ pcre_uchar c2; while (*str1 != '\0' || *str2 != '\0') { - c1 = *str1++; - c2 = *str2++; + c1 = UCHAR21INC(str1); + c2 = UCHAR21INC(str2); if (c1 != c2) return ((c1 > c2) << 1) - 1; } @@ -131,7 +131,7 @@ pcre_uchar c2; while (*str1 != '\0' || *ustr2 != '\0') { - c1 = *str1++; + c1 = UCHAR21INC(str1); c2 = (pcre_uchar)*ustr2++; if (c1 != c2) return ((c1 > c2) << 1) - 1; |