diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-03 20:09:39 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-03 20:09:39 +0000 |
commit | 83b2b44d38f1afd36c4b19e1afceea68e6216fbd (patch) | |
tree | 591d90b315e50d3c1ee7a3ae6d22b3b6d8e14543 | |
parent | 7887b1dc1c1c7276d3f95aa3920e1c5997ea3480 (diff) | |
download | pcre-83b2b44d38f1afd36c4b19e1afceea68e6216fbd.tar.gz |
Add some checks for the eint vector size and the list of compile-time error
texts.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@499 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | pcre_compile.c | 15 | ||||
-rw-r--r-- | pcre_internal.h | 7 | ||||
-rw-r--r-- | pcreposix.c | 43 |
4 files changed, 52 insertions, 22 deletions
@@ -29,6 +29,15 @@ Version 8.02 01-Mar-2010 in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors. I've fixed the data, and added a kludgy way of testing at compile time that the lengths are correct (equal to the number of opcodes). + +8. Following on from 7, I added a similar kludge to check the length of the + eint vector in pcreposix.c. + +9. Error texts for pcre_compile() are held as one long string to avoid too + much relocation at load time. To find a text, the string is searched, + counting zeros. There was no check for running off the end of the string, + which could happen if a new error number was added without updating the + string. Version 8.01 19-Jan-2010 diff --git a/pcre_compile.c b/pcre_compile.c index 6ca1862..e89819a 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -263,7 +263,11 @@ the number of relocations needed when a shared library is loaded dynamically, it is now one long string. We cannot use a table of offsets, because the lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we simply count through to the one we want - this isn't a performance issue -because these strings are used only when there is a compilation error. */ +because these strings are used only when there is a compilation error. + +Each substring ends with \0 to insert a null character. This includes the final +substring, so that the whole string ends with \0\0, which can be detected when +counting through. */ static const char error_texts[] = "no error\0" @@ -344,8 +348,7 @@ static const char error_texts[] = "digit expected after (?+\0" "] is an invalid data character in JavaScript compatibility mode\0" /* 65 */ - "different names for subpatterns of the same number are not allowed"; - + "different names for subpatterns of the same number are not allowed\0"; /* Table to identify digits and hex digits. This is used when compiling patterns. Note that the tables in chartables are dependent on the locale, and @@ -503,7 +506,11 @@ static const char * find_error_text(int n) { const char *s = error_texts; -for (; n > 0; n--) while (*s++ != 0) {}; +for (; n > 0; n--) + { + while (*s++ != 0) {}; + if (*s == 0) return "Error text not found (please report)"; + } return s; } diff --git a/pcre_internal.h b/pcre_internal.h index 388688f..3cb8b46 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1502,8 +1502,9 @@ condition. */ #define RREF_ANY 0xffff -/* Error code numbers. They are given names so that they can more easily be -tracked. */ +/* Compile time error code numbers. They are given names so that they can more +easily be tracked. When a new number is added, the table called eint in +pcreposix.c must be updated. */ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, @@ -1511,7 +1512,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, - ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 }; + ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit diff --git a/pcreposix.c b/pcreposix.c index b30378c..44c3ff9 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -344,6 +344,8 @@ rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so), if (rc == 0) rc = nmatch; /* All captured slots were filled in */ +/* Successful match */ + if (rc >= 0) { size_t i; @@ -360,22 +362,33 @@ if (rc >= 0) return 0; } -else +/* Unsuccessful match */ + +if (allocated_ovector) free(ovector); +switch(rc) { - if (allocated_ovector) free(ovector); - switch(rc) - { - case PCRE_ERROR_NOMATCH: return REG_NOMATCH; - case PCRE_ERROR_NULL: return REG_INVARG; - case PCRE_ERROR_BADOPTION: return REG_INVARG; - case PCRE_ERROR_BADMAGIC: return REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; - case PCRE_ERROR_NOMEMORY: return REG_ESPACE; - case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE; - case PCRE_ERROR_BADUTF8: return REG_INVARG; - case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG; - default: return REG_ASSERT; - } +/* ========================================================================== */ + /* These cases are never obeyed. This is a fudge that causes a compile-time + error if the vector eint, which is indexed by compile-time error number, is + not the correct length. It seems to be the only way to do such a check at + compile time, as the sizeof() operator does not work in the C preprocessor. + As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */ + + case 0: + case (sizeof(eint)/sizeof(int) == ERRCOUNT): + return REG_ASSERT; +/* ========================================================================== */ + + case PCRE_ERROR_NOMATCH: return REG_NOMATCH; + case PCRE_ERROR_NULL: return REG_INVARG; + case PCRE_ERROR_BADOPTION: return REG_INVARG; + case PCRE_ERROR_BADMAGIC: return REG_INVARG; + case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; + case PCRE_ERROR_NOMEMORY: return REG_ESPACE; + case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE; + case PCRE_ERROR_BADUTF8: return REG_INVARG; + case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG; + default: return REG_ASSERT; } } |