diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-01-10 17:09:12 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-01-10 17:09:12 +0000 |
commit | 7b7bdb999dc27e3d66d39da7520cebb63e895cb0 (patch) | |
tree | 744f813e78021293b337cd7bf85cb544f277691d | |
parent | ef49ade6a6462bb4a9f673b20b28d79da9cea129 (diff) | |
download | pcre-7b7bdb999dc27e3d66d39da7520cebb63e895cb0.tar.gz |
Tidies for the 7.5 release.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@298 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 50 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | pcre_compile.c | 36 | ||||
-rw-r--r-- | pcre_exec.c | 4 | ||||
-rw-r--r-- | pcre_internal.h | 2 |
6 files changed, 49 insertions, 49 deletions
@@ -1,7 +1,7 @@ ChangeLog for PCRE ------------------ -Version 7.5 04-Jan-08 +Version 7.5 10-Jan-08 --------------------- 1. Applied a patch from Craig: "This patch makes it possible to 'ignore' @@ -99,9 +99,9 @@ Version 7.5 04-Jan-08 20. In pcrecpp.cc, the variable 'count' was incremented twice in RE::GlobalReplace(). As a result, the number of replacements returned was - double what it should be. I removed one of the increments, but Craig sent a - later patch that removed the other one (the right fix) and added unit tests - that check the return values (which was not done before). + double what it should be. I removed one of the increments, but Craig sent a + later patch that removed the other one (the right fix) and added unit tests + that check the return values (which was not done before). 21. Several CMake things: @@ -112,28 +112,28 @@ Version 7.5 04-Jan-08 linked with the newly-built libraries, not previously installed ones. (3) Added PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, PCRE_SUPPORT_LIBBZ2. - -22. In UTF-8 mode, with newline set to "any", a pattern such as .*a.*=.b.* - crashed when matching a string such as a\x{2029}b (note that \x{2029} is a - UTF-8 newline character). The key issue is that the pattern starts .*; - this means that the match must be either at the beginning, or after a - newline. The bug was in the code for advancing after a failed match and - checking that the new position followed a newline. It was not taking + +22. In UTF-8 mode, with newline set to "any", a pattern such as .*a.*=.b.* + crashed when matching a string such as a\x{2029}b (note that \x{2029} is a + UTF-8 newline character). The key issue is that the pattern starts .*; + this means that the match must be either at the beginning, or after a + newline. The bug was in the code for advancing after a failed match and + checking that the new position followed a newline. It was not taking account of UTF-8 characters correctly. - -23. PCRE was behaving differently from Perl in the way it recognized POSIX - character classes. PCRE was not treating the sequence [:...:] as a - character class unless the ... were all letters. Perl, however, seems to - allow any characters between [: and :], though of course it rejects as - unknown any "names" that contain non-letters, because all the known class - names consist only of letters. Thus, Perl gives an error for [[:1234:]], - for example, whereas PCRE did not - it did not recognize a POSIX character - class. This seemed a bit dangerous, so the code has been changed to be - closer to Perl. The behaviour is not identical to Perl, because PCRE will - diagnose an unknown class for, for example, [[:l\ower:]] where Perl will - treat it as [[:lower:]]. However, PCRE does now give "unknown" errors where - Perl does, and where it didn't before. - + +23. PCRE was behaving differently from Perl in the way it recognized POSIX + character classes. PCRE was not treating the sequence [:...:] as a + character class unless the ... were all letters. Perl, however, seems to + allow any characters between [: and :], though of course it rejects as + unknown any "names" that contain non-letters, because all the known class + names consist only of letters. Thus, Perl gives an error for [[:1234:]], + for example, whereas PCRE did not - it did not recognize a POSIX character + class. This seemed a bit dangerous, so the code has been changed to be + closer to Perl. The behaviour is not identical to Perl, because PCRE will + diagnose an unknown class for, for example, [[:l\ower:]] where Perl will + treat it as [[:lower:]]. However, PCRE does now give "unknown" errors where + Perl does, and where it didn't before. + 24. Rewrite so as to remove the single use of %n from pcregrep because in some Windows environments %n is disabled by default. @@ -1,7 +1,7 @@ News about PCRE releases ------------------------ -Release 7.5 27-Dec-07 +Release 7.5 10-Jan-08 --------------------- This is mainly a bug-fix release. However the ability to link pcregrep with diff --git a/configure.ac b/configure.ac index 1fc0cef..19eaf54 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,8 @@ dnl empty. m4_define(pcre_major, [7]) m4_define(pcre_minor, [5]) -m4_define(pcre_prerelease, [-RC2]) -m4_define(pcre_date, [2007-12-27]) +m4_define(pcre_prerelease, []) +m4_define(pcre_date, [2008-01-10]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) diff --git a/pcre_compile.c b/pcre_compile.c index 33b2c48..d4840cb 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -1738,22 +1738,22 @@ return TRUE; /* This function is called when the sequence "[:" or "[." or "[=" is encountered in a character class. It checks whether this is followed by a -sequence of characters terminated by a matching ":]" or ".]" or "=]". If we +sequence of characters terminated by a matching ":]" or ".]" or "=]". If we reach an unescaped ']' without the special preceding character, return FALSE. -Originally, this function only recognized a sequence of letters between the -terminators, but it seems that Perl recognizes any sequence of characters, -though of course unknown POSIX names are subsequently rejected. Perl gives an -"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE -didn't consider this to be a POSIX class. Likewise for [:1234:]. - -The problem in trying to be exactly like Perl is in the handling of escapes. We -have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX -class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code -below handles the special case of \], but does not try to do any other escape -processing. This makes it different from Perl for cases such as [:l\ower:] +Originally, this function only recognized a sequence of letters between the +terminators, but it seems that Perl recognizes any sequence of characters, +though of course unknown POSIX names are subsequently rejected. Perl gives an +"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE +didn't consider this to be a POSIX class. Likewise for [:1234:]. + +The problem in trying to be exactly like Perl is in the handling of escapes. We +have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX +class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code +below handles the special case of \], but does not try to do any other escape +processing. This makes it different from Perl for cases such as [:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize -"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does, +"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does, I think. Arguments: @@ -1771,15 +1771,15 @@ terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ for (++ptr; *ptr != 0; ptr++) { if (*ptr == '\\' && ptr[1] == ']') ptr++; else - { - if (*ptr == ']') return FALSE; + { + if (*ptr == ']') return FALSE; if (*ptr == terminator && ptr[1] == ']') { *endptr = ptr; return TRUE; - } - } - } + } + } + } return FALSE; } diff --git a/pcre_exec.c b/pcre_exec.c index 23b8bd8..04c6803 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -4704,8 +4704,8 @@ for(;;) while (start_match < end_subject) { register unsigned int c = *start_match; - if ((start_bits[c/8] & (1 << (c&7))) == 0) - { NEXTCHAR(start_match); } + if ((start_bits[c/8] & (1 << (c&7))) == 0) + { NEXTCHAR(start_match); } else break; } } diff --git a/pcre_internal.h b/pcre_internal.h index b10e81b..0b2603b 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -378,7 +378,7 @@ in UTF-8 mode. */ #define NEXTCHAR(p) \ p++; \ - if (utf8) { while((*p & 0xc0) == 0x80) p++; } + if (utf8) { while((*p & 0xc0) == 0x80) p++; } /* Get the next UTF-8 character, not advancing the pointer. This is called when we know we are in UTF-8 mode. */ |