Add some checks for the eint vector size and the list of compile-time error

texts. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@499 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2010-03-03 20:09:39 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2010-03-03 20:09:39 +0000
commit: 83b2b44d38f1afd36c4b19e1afceea68e6216fbd (patch)
tree: 591d90b315e50d3c1ee7a3ae6d22b3b6d8e14543
parent: 7887b1dc1c1c7276d3f95aa3920e1c5997ea3480 (diff)
download: pcre-83b2b44d38f1afd36c4b19e1afceea68e6216fbd.tar.gz
4 files changed, 52 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 1dd1232..33d6ea0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,15 @@ Version 8.02 01-Mar-2010
     in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors.
     I've fixed the data, and added a kludgy way of testing at compile time that 
     the lengths are correct (equal to the number of opcodes).  
+    
+8.  Following on from 7, I added a similar kludge to check the length of the 
+    eint vector in pcreposix.c. 
+    
+9.  Error texts for pcre_compile() are held as one long string to avoid too 
+    much relocation at load time. To find a text, the string is searched, 
+    counting zeros. There was no check for running off the end of the string,
+    which could happen if a new error number was added without updating the
+    string. 
 
 
 Version 8.01 19-Jan-2010
diff --git a/pcre_compile.c b/pcre_compile.c
index 6ca1862..e89819a 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -263,7 +263,11 @@ the number of relocations needed when a shared library is loaded dynamically,
 it is now one long string. We cannot use a table of offsets, because the
 lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
 simply count through to the one we want - this isn't a performance issue
-because these strings are used only when there is a compilation error. */
+because these strings are used only when there is a compilation error. 
+
+Each substring ends with \0 to insert a null character. This includes the final 
+substring, so that the whole string ends with \0\0, which can be detected when 
+counting through. */
 
 static const char error_texts[] =
   "no error\0"
@@ -344,8 +348,7 @@ static const char error_texts[] =
   "digit expected after (?+\0"
   "] is an invalid data character in JavaScript compatibility mode\0"
   /* 65 */
-  "different names for subpatterns of the same number are not allowed";
-
+  "different names for subpatterns of the same number are not allowed\0";
 
 /* Table to identify digits and hex digits. This is used when compiling
 patterns. Note that the tables in chartables are dependent on the locale, and
@@ -503,7 +506,11 @@ static const char *
 find_error_text(int n)
 {
 const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0) {};
+for (; n > 0; n--) 
+  {
+  while (*s++ != 0) {};
+  if (*s == 0) return "Error text not found (please report)";
+  } 
 return s;
 }
 
diff --git a/pcre_internal.h b/pcre_internal.h
index 388688f..3cb8b46 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1502,8 +1502,9 @@ condition. */
 
 #define RREF_ANY  0xffff
 
-/* Error code numbers. They are given names so that they can more easily be
-tracked. */
+/* Compile time error code numbers. They are given names so that they can more
+easily be tracked. When a new number is added, the table called eint in 
+pcreposix.c must be updated. */
 
 enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
@@ -1511,7 +1512,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
-       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };
+       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };
 
 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
diff --git a/pcreposix.c b/pcreposix.c
index b30378c..44c3ff9 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -344,6 +344,8 @@ rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
 
 if (rc == 0) rc = nmatch;    /* All captured slots were filled in */
 
+/* Successful match */
+
 if (rc >= 0)
   {
   size_t i;
@@ -360,22 +362,33 @@ if (rc >= 0)
   return 0;
   }
 
-else
+/* Unsuccessful match */
+
+if (allocated_ovector) free(ovector);
+switch(rc)
   {
-  if (allocated_ovector) free(ovector);
-  switch(rc)
-    {
-    case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
-    case PCRE_ERROR_NULL: return REG_INVARG;
-    case PCRE_ERROR_BADOPTION: return REG_INVARG;
-    case PCRE_ERROR_BADMAGIC: return REG_INVARG;
-    case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
-    case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
-    case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
-    case PCRE_ERROR_BADUTF8: return REG_INVARG;
-    case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
-    default: return REG_ASSERT;
-    }
+/* ========================================================================== */
+  /* These cases are never obeyed. This is a fudge that causes a compile-time
+  error if the vector eint, which is indexed by compile-time error number, is
+  not the correct length. It seems to be the only way to do such a check at
+  compile time, as the sizeof() operator does not work in the C preprocessor.
+  As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */ 
+
+  case 0:
+  case (sizeof(eint)/sizeof(int) == ERRCOUNT):
+  return REG_ASSERT;
+/* ========================================================================== */
+
+  case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
+  case PCRE_ERROR_NULL: return REG_INVARG;
+  case PCRE_ERROR_BADOPTION: return REG_INVARG;
+  case PCRE_ERROR_BADMAGIC: return REG_INVARG;
+  case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
+  case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
+  case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
+  case PCRE_ERROR_BADUTF8: return REG_INVARG;
+  case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+  default: return REG_ASSERT;
   }
 }
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2010-03-03 20:09:39 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2010-03-03 20:09:39 +0000
commit	83b2b44d38f1afd36c4b19e1afceea68e6216fbd (patch)
tree	591d90b315e50d3c1ee7a3ae6d22b3b6d8e14543
parent	7887b1dc1c1c7276d3f95aa3920e1c5997ea3480 (diff)
download	pcre-83b2b44d38f1afd36c4b19e1afceea68e6216fbd.tar.gz