summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-03 20:09:39 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-03 20:09:39 +0000
commit83b2b44d38f1afd36c4b19e1afceea68e6216fbd (patch)
tree591d90b315e50d3c1ee7a3ae6d22b3b6d8e14543
parent7887b1dc1c1c7276d3f95aa3920e1c5997ea3480 (diff)
downloadpcre-83b2b44d38f1afd36c4b19e1afceea68e6216fbd.tar.gz
Add some checks for the eint vector size and the list of compile-time error
texts. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@499 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog9
-rw-r--r--pcre_compile.c15
-rw-r--r--pcre_internal.h7
-rw-r--r--pcreposix.c43
4 files changed, 52 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 1dd1232..33d6ea0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,15 @@ Version 8.02 01-Mar-2010
in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors.
I've fixed the data, and added a kludgy way of testing at compile time that
the lengths are correct (equal to the number of opcodes).
+
+8. Following on from 7, I added a similar kludge to check the length of the
+ eint vector in pcreposix.c.
+
+9. Error texts for pcre_compile() are held as one long string to avoid too
+ much relocation at load time. To find a text, the string is searched,
+ counting zeros. There was no check for running off the end of the string,
+ which could happen if a new error number was added without updating the
+ string.
Version 8.01 19-Jan-2010
diff --git a/pcre_compile.c b/pcre_compile.c
index 6ca1862..e89819a 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -263,7 +263,11 @@ the number of relocations needed when a shared library is loaded dynamically,
it is now one long string. We cannot use a table of offsets, because the
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
simply count through to the one we want - this isn't a performance issue
-because these strings are used only when there is a compilation error. */
+because these strings are used only when there is a compilation error.
+
+Each substring ends with \0 to insert a null character. This includes the final
+substring, so that the whole string ends with \0\0, which can be detected when
+counting through. */
static const char error_texts[] =
"no error\0"
@@ -344,8 +348,7 @@ static const char error_texts[] =
"digit expected after (?+\0"
"] is an invalid data character in JavaScript compatibility mode\0"
/* 65 */
- "different names for subpatterns of the same number are not allowed";
-
+ "different names for subpatterns of the same number are not allowed\0";
/* Table to identify digits and hex digits. This is used when compiling
patterns. Note that the tables in chartables are dependent on the locale, and
@@ -503,7 +506,11 @@ static const char *
find_error_text(int n)
{
const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0) {};
+for (; n > 0; n--)
+ {
+ while (*s++ != 0) {};
+ if (*s == 0) return "Error text not found (please report)";
+ }
return s;
}
diff --git a/pcre_internal.h b/pcre_internal.h
index 388688f..3cb8b46 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1502,8 +1502,9 @@ condition. */
#define RREF_ANY 0xffff
-/* Error code numbers. They are given names so that they can more easily be
-tracked. */
+/* Compile time error code numbers. They are given names so that they can more
+easily be tracked. When a new number is added, the table called eint in
+pcreposix.c must be updated. */
enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
@@ -1511,7 +1512,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
- ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };
+ ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
diff --git a/pcreposix.c b/pcreposix.c
index b30378c..44c3ff9 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -344,6 +344,8 @@ rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
+/* Successful match */
+
if (rc >= 0)
{
size_t i;
@@ -360,22 +362,33 @@ if (rc >= 0)
return 0;
}
-else
+/* Unsuccessful match */
+
+if (allocated_ovector) free(ovector);
+switch(rc)
{
- if (allocated_ovector) free(ovector);
- switch(rc)
- {
- case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
- case PCRE_ERROR_NULL: return REG_INVARG;
- case PCRE_ERROR_BADOPTION: return REG_INVARG;
- case PCRE_ERROR_BADMAGIC: return REG_INVARG;
- case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
- case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
- case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
- case PCRE_ERROR_BADUTF8: return REG_INVARG;
- case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
- default: return REG_ASSERT;
- }
+/* ========================================================================== */
+ /* These cases are never obeyed. This is a fudge that causes a compile-time
+ error if the vector eint, which is indexed by compile-time error number, is
+ not the correct length. It seems to be the only way to do such a check at
+ compile time, as the sizeof() operator does not work in the C preprocessor.
+ As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
+
+ case 0:
+ case (sizeof(eint)/sizeof(int) == ERRCOUNT):
+ return REG_ASSERT;
+/* ========================================================================== */
+
+ case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
+ case PCRE_ERROR_NULL: return REG_INVARG;
+ case PCRE_ERROR_BADOPTION: return REG_INVARG;
+ case PCRE_ERROR_BADMAGIC: return REG_INVARG;
+ case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
+ case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
+ case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
+ case PCRE_ERROR_BADUTF8: return REG_INVARG;
+ case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+ default: return REG_ASSERT;
}
}