summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-28 16:07:52 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-28 16:07:52 +0000
commitd75f0b6fc01e5289720c7daf67b3ce2a827173b9 (patch)
tree93c3e1c9dfa126c87b28817ae5d357e68c1d28a5
parent7492c1ceb7dd157d6c351723ad12398146789d08 (diff)
downloadpcre-d75f0b6fc01e5289720c7daf67b3ce2a827173b9.tar.gz
Fix an infinite fast-forward newline on invalid UTF input.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1452 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--pcre_jit_compile.c20
-rw-r--r--pcre_jit_test.c27
3 files changed, 36 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 616e184..66ea129 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -96,6 +96,10 @@ Version 8.35-RC1 xx-xxxx-201x
19. Little endian PowerPC systems are supported now by the JIT compiler.
+20. The fast forward newline mechanism could enter to an infinite loop on
+ certain invalid UTF-8 input. Although we don't support these cases
+ this issue can be fixed by a performance optimization.
+
Version 8.34 15-December-2013
-----------------------------
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 96eb728..5a588cf 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -398,6 +398,7 @@ typedef struct compiler_common {
struct sljit_label *quit_label;
struct sljit_label *forced_quit_label;
struct sljit_label *accept_label;
+ struct sljit_label *ff_newline_shortcut;
stub_list *stubs;
label_addr_list *label_addrs;
recurse_entry *entries;
@@ -3871,7 +3872,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
JUMPHERE(lastchar);
if (firstline)
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
return;
}
@@ -3881,6 +3882,8 @@ firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
skip_char_back(common);
loop = LABEL();
+common->ff_newline_shortcut = loop;
+
read_char_range(common, common->nlmin, common->nlmax, TRUE);
lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
@@ -10133,10 +10136,19 @@ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
if ((re->options & PCRE_ANCHORED) == 0)
{
- if ((re->options & PCRE_FIRSTLINE) == 0)
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+ if (common->ff_newline_shortcut != NULL)
+ {
+ if ((re->options & PCRE_FIRSTLINE) == 0)
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
+ /* There cannot be more newlines here. */
+ }
else
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+ {
+ if ((re->options & PCRE_FIRSTLINE) == 0)
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+ else
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+ }
}
/* No more remaining characters. */
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 4ec6f9d..7a01cdf 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -392,6 +392,10 @@ static struct regression_test_case regression_test_cases[] = {
{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
{ PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
+ { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
+ { MUA, 1, "^", "\r\n" },
+ { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
+ { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
/* Any character except newline or any newline. */
{ PCRE_NEWLINE_CRLF, 0, ".", "\r" },
@@ -650,6 +654,7 @@ static struct regression_test_case regression_test_cases[] = {
{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
{ PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
+ { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
/* Recurse. */
{ MUA, 0, "(a)(?1)", "aa" },
@@ -980,7 +985,7 @@ static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *o
if (offsetmap)
*offsetmap++ = (int)(iptr - (unsigned char*)input);
- if (!(*iptr & 0x80))
+ if (*iptr < 0xc0)
c = *iptr++;
else if (!(*iptr & 0x20)) {
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1052,7 +1057,7 @@ static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *o
if (offsetmap)
*offsetmap++ = (int)(iptr - (unsigned char*)input);
- if (!(*iptr & 0x80))
+ if (*iptr < 0xc0)
c = *iptr++;
else if (!(*iptr & 0x20)) {
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1326,10 +1331,10 @@ static int regression_tests(void)
if ((counter & 0x1) != 0) {
setstack8(extra8);
return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
} else
return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
memset(&dummy_extra8, 0, sizeof(pcre_extra));
dummy_extra8.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1338,7 +1343,7 @@ static int regression_tests(void)
}
dummy_extra8.mark = &mark8_2;
return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
}
#endif
@@ -1360,10 +1365,10 @@ static int regression_tests(void)
if ((counter & 0x1) != 0) {
setstack16(extra16);
return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
} else
return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
memset(&dummy_extra16, 0, sizeof(pcre16_extra));
dummy_extra16.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1372,7 +1377,7 @@ static int regression_tests(void)
}
dummy_extra16.mark = &mark16_2;
return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
}
#endif
@@ -1394,10 +1399,10 @@ static int regression_tests(void)
if ((counter & 0x1) != 0) {
setstack32(extra32);
return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
} else
return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
memset(&dummy_extra32, 0, sizeof(pcre32_extra));
dummy_extra32.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1406,7 +1411,7 @@ static int regression_tests(void)
}
dummy_extra32.mark = &mark32_2;
return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
}
#endif