summaryrefslogtreecommitdiff
path: root/pcre_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'pcre_compile.c')
-rw-r--r--pcre_compile.c47
1 files changed, 33 insertions, 14 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index e962d19..33b2c48 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1737,30 +1737,49 @@ return TRUE;
*************************************************/
/* This function is called when the sequence "[:" or "[." or "[=" is
-encountered in a character class. It checks whether this is followed by an
-optional ^ and then a sequence of letters, terminated by a matching ":]" or
-".]" or "=]".
+encountered in a character class. It checks whether this is followed by a
+sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
+reach an unescaped ']' without the special preceding character, return FALSE.
+
+Originally, this function only recognized a sequence of letters between the
+terminators, but it seems that Perl recognizes any sequence of characters,
+though of course unknown POSIX names are subsequently rejected. Perl gives an
+"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
+didn't consider this to be a POSIX class. Likewise for [:1234:].
+
+The problem in trying to be exactly like Perl is in the handling of escapes. We
+have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
+class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
+below handles the special case of \], but does not try to do any other escape
+processing. This makes it different from Perl for cases such as [:l\ower:]
+where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
+"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
+I think.
-Argument:
+Arguments:
ptr pointer to the initial [
endptr where to return the end pointer
- cd pointer to compile data
Returns: TRUE or FALSE
*/
static BOOL
-check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
+check_posix_syntax(const uschar *ptr, const uschar **endptr)
{
int terminator; /* Don't combine these lines; the Solaris cc */
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
-if (*(++ptr) == '^') ptr++;
-while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
-if (*ptr == terminator && ptr[1] == ']')
+for (++ptr; *ptr != 0; ptr++)
{
- *endptr = ptr;
- return TRUE;
- }
+ if (*ptr == '\\' && ptr[1] == ']') ptr++; else
+ {
+ if (*ptr == ']') return FALSE;
+ if (*ptr == terminator && ptr[1] == ']')
+ {
+ *endptr = ptr;
+ return TRUE;
+ }
+ }
+ }
return FALSE;
}
@@ -2620,7 +2639,7 @@ for (;; ptr++)
they are encountered at the top level, so we'll do that too. */
if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
- check_posix_syntax(ptr, &tempptr, cd))
+ check_posix_syntax(ptr, &tempptr))
{
*errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
goto FAILED;
@@ -2706,7 +2725,7 @@ for (;; ptr++)
if (c == '[' &&
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
- check_posix_syntax(ptr, &tempptr, cd))
+ check_posix_syntax(ptr, &tempptr))
{
BOOL local_negate = FALSE;
int posix_class, taboffset, tabopt;