summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-10-16 13:59:12 +0100
committerDavid Mitchell <davem@iabyn.com>2013-10-17 10:57:35 +0100
commit0c2990d652e985784f095bba4bc356481a66aa06 (patch)
tree64cab82a59f1227210631ff65000b84534308458
parent508c1ff603f9bbfcf34bc663c9f028d6acf1f6e9 (diff)
downloadperl-0c2990d652e985784f095bba4bc356481a66aa06.tar.gz
[perl #119505] Segfault from bad backreference
The code that parses regex backrefs (or ambiguous backref/octal) such as \123, did a simple atoi(), which could wrap round to negative values on long digit strings and cause seg faults. Include a check on the length of the digit string, and if greater than 9 digits, assume it can never be a valid backref (obviating the need for the atoi() call). I've also simplified the code a bit, putting most of the \g handling code into a single block, rather than doing multiple "if (isg) {...}".
-rw-r--r--regcomp.c64
-rw-r--r--t/re/re_tests41
2 files changed, 87 insertions, 18 deletions
diff --git a/regcomp.c b/regcomp.c
index 0ec6a761aa..317094c819 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10594,6 +10594,22 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32
}
}
+
+/* return atoi(p), unless it's too big to sensibly be a backref,
+ * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
+
+static I32
+S_backref_value(char *p)
+{
+ char *q = p;
+
+ for (;isDIGIT(*q); q++); /* calculate length of num */
+ if (q - p == 0 || q - p > 9)
+ return I32_MAX;
+ return atoi(p);
+}
+
+
/*
- regatom - the lowest level
@@ -11027,10 +11043,11 @@ tryagain:
case '5': case '6': case '7': case '8': case '9':
{
I32 num;
- bool isg = *RExC_parse == 'g';
- bool isrel = 0;
bool hasbrace = 0;
- if (isg) {
+
+ if (*RExC_parse == 'g') {
+ bool isrel = 0;
+
RExC_parse++;
if (*RExC_parse == '{') {
RExC_parse++;
@@ -11044,25 +11061,36 @@ tryagain:
if (isrel) RExC_parse--;
RExC_parse -= 2;
goto parse_named_seq;
- } }
- num = atoi(RExC_parse);
- if (isg && num == 0) {
- if (*RExC_parse == '0') {
+ }
+
+ num = S_backref_value(RExC_parse);
+ if (num == 0)
vFAIL("Reference to invalid group 0");
+ else if (num == I32_MAX) {
+ if (isDIGIT(*RExC_parse))
+ vFAIL("Reference to nonexistent group");
+ else
+ vFAIL("Unterminated \\g... pattern");
}
- else {
- vFAIL("Unterminated \\g... pattern");
+
+ if (isrel) {
+ num = RExC_npar - num;
+ if (num < 1)
+ vFAIL("Reference to nonexistent or unclosed group");
}
}
- if (isrel) {
- num = RExC_npar - num;
- if (num < 1)
- vFAIL("Reference to nonexistent or unclosed group");
+ else {
+ num = S_backref_value(RExC_parse);
+ /* bare \NNN might be backref or octal */
+ if (num == I32_MAX || (num > 9 && num >= RExC_npar
+ && *RExC_parse != '8' && *RExC_parse != '9'))
+ /* Probably a character specified in octal, e.g. \35 */
+ goto defchar;
}
- if (!isg && num > 9 && num >= RExC_npar && *RExC_parse != '8' && *RExC_parse != '9')
- /* Probably a character specified in octal, e.g. \35 */
- goto defchar;
- else {
+
+ /* at this point RExC_parse definitely points to a backref
+ * number */
+ {
#ifdef RE_TRACK_PATTERN_OFFSETS
char * const parse_start = RExC_parse - 1; /* MJD */
#endif
@@ -11358,7 +11386,7 @@ tryagain:
* 118 OR as "\11" . "8" depending on whether there
* were 118 capture buffers defined already in the
* pattern. */
- if ( !isDIGIT(p[1]) || atoi(p) <= RExC_npar )
+ if ( !isDIGIT(p[1]) || S_backref_value(p) <= RExC_npar)
{ /* Not to be treated as an octal constant, go
find backref */
--p;
diff --git a/t/re/re_tests b/t/re/re_tests
index cf3291d476..4ea072aae1 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1493,6 +1493,47 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
a\87 a87 c - Reference to nonexistent group in regex
a\97 a97 c - Reference to nonexistent group in regex
+# avoid problems with 32-bit signed integer overflow
+
+(.)\g2147483648} x c - Reference to nonexistent group in regex
+(.)\g2147483649} x c - Reference to nonexistent group in regex
+(.)\g2147483650} x c - Reference to nonexistent group in regex
+(.)\g4294967296} x c - Reference to nonexistent group in regex
+(.)\g4294967297} x c - Reference to nonexistent group in regex
+(.)\g4294967298} x c - Reference to nonexistent group in regex
+a(.)\g2147483648} x c - Reference to nonexistent group in regex
+a(.)\g2147483649} x c - Reference to nonexistent group in regex
+a(.)\g2147483650} x c - Reference to nonexistent group in regex
+a(.)\g4294967296} x c - Reference to nonexistent group in regex
+a(.)\g4294967297} x c - Reference to nonexistent group in regex
+a(.)\g4294967298} x c - Reference to nonexistent group in regex
+
+(.)\g{2147483648} x c - Reference to nonexistent group in regex
+(.)\g{2147483649} x c - Reference to nonexistent group in regex
+(.)\g{2147483650} x c - Reference to nonexistent group in regex
+(.)\g{4294967296} x c - Reference to nonexistent group in regex
+(.)\g{4294967297} x c - Reference to nonexistent group in regex
+(.)\g{4294967298} x c - Reference to nonexistent group in regex
+a(.)\g{2147483648} x c - Reference to nonexistent group in regex
+a(.)\g{2147483649} x c - Reference to nonexistent group in regex
+a(.)\g{2147483650} x c - Reference to nonexistent group in regex
+a(.)\g{4294967296} x c - Reference to nonexistent group in regex
+a(.)\g{4294967297} x c - Reference to nonexistent group in regex
+a(.)\g{4294967298} x c - Reference to nonexistent group in regex
+
+(.)\2147483648 b\o{214}7483648 y $1 b
+(.)\2147483649 b\o{214}7483649 y $1 b
+(.)\2147483650 b\o{214}7483650 y $1 b
+(.)\4294967296 b\o{42}94967296 y $1 b
+(.)\4294967297 b\o{42}94967297 y $1 b
+(.)\4294967298 b\o{42}94967298 y $1 b
+a(.)\2147483648 ab\o{214}7483648 y $1 b
+a(.)\2147483649 ab\o{214}7483649 y $1 b
+a(.)\2147483650 ab\o{214}7483650 y $1 b
+a(.)\4294967296 ab\o{42}94967296 y $1 b
+a(.)\4294967297 ab\o{42}94967297 y $1 b
+a(.)\4294967298 ab\o{42}94967298 y $1 b
+
# The below was inserting a NULL into the character class.
[\8\9] \000 Sn - -
[\8\9] - sc $& Unrecognized escape \\8 in character class