Commit all the changes for JIT support, but without any documentation yet.

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@667 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-08-22 14:57:32 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-08-22 14:57:32 +0000
commit: ff911da489a2d0eb1aa777cc2f8740c65b080f32 (patch)
tree: 69c0e76810e5db789573348eb099f2ce55d5a42c /pcre_exec.c
parent: b205b9285d0feca53c32d7258fc02ecb926ca16b (diff)
download: pcre-ff911da489a2d0eb1aa777cc2f8740c65b080f32.tar.gz
1 files changed, 62 insertions, 40 deletions
diff --git a/pcre_exec.c b/pcre_exec.c
index b1ab387..05fe0b7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -5761,7 +5761,7 @@ pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   int offsetcount)
 {
-int rc, ocount;
+int rc, ocount, arg_offset_max;
 int first_byte = -1;
 int req_byte = -1;
 int req_byte2 = -1;
@@ -5797,8 +5797,59 @@ if (re == NULL || subject == NULL ||
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 
-/* This information is for finding all the numbers associated with a given
-name, for condition testing. */
+/* These two settings are used in the code for checking a UTF-8 string that
+follows immediately afterwards. Other values in the md block are used only
+during "normal" pcre_exec() processing, not when the JIT support is in use,
+so they are set up later. */
+
+utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
+md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
+              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
+
+/* Check a UTF-8 string if required. Pass back the character offset and error
+code for an invalid string if a results vector is available. */
+
+#ifdef SUPPORT_UTF8
+if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
+  {
+  int erroroffset;
+  int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
+  if (errorcode != 0)
+    {
+    if (offsetcount >= 2)
+      {
+      offsets[0] = erroroffset;
+      offsets[1] = errorcode;
+      }
+    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
+      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+    }
+
+  /* Check that a start_offset points to the start of a UTF-8 character. */
+  if (start_offset > 0 && start_offset < length &&
+      (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
+    return PCRE_ERROR_BADUTF8_OFFSET;
+  }
+#endif
+
+/* If the pattern was successfully studied with JIT support, run the JIT
+executable instead of the rest of this function. Most options must be set at
+compile time for the JIT code to be usable. Fallback to the normal code path if
+an unsupported flag is set. In particular, JIT does not support partial
+matching. */
+
+#ifdef SUPPORT_JIT
+if (extra_data != NULL
+    && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
+    && extra_data->executable_jit != NULL
+    && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
+                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
+  return _pcre_jit_exec(re, extra_data->executable_jit, subject, length, 
+    start_offset, options, offsets, offsetcount);
+#endif
+
+/* Carry on with non-JIT matching. This information is for finding all the
+numbers associated with a given name, for condition testing. */
 
 md->name_table = (uschar *)re + re->name_table_offset;
 md->name_count = re->name_count;
@@ -5865,7 +5916,6 @@ md->end_subject = md->start_subject + length;
 end_subject = md->end_subject;
 
 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
-utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
 md->use_ucp = (re->options & PCRE_UCP) != 0;
 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
 
@@ -5876,9 +5926,6 @@ md->notbol = (options & PCRE_NOTBOL) != 0;
 md->noteol = (options & PCRE_NOTEOL) != 0;
 md->notempty = (options & PCRE_NOTEMPTY) != 0;
 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
-md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
-              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
-
 
 md->hitend = FALSE;
 md->mark = NULL;                        /* In case never set */
@@ -5961,39 +6008,13 @@ defined (though never set). So there's no harm in leaving this code. */
 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
   return PCRE_ERROR_BADPARTIAL;
 
-/* Check a UTF-8 string if required. Pass back the character offset and error
-code for an invalid string if a results vector is available. */
-
-#ifdef SUPPORT_UTF8
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
-  {
-  int erroroffset;
-  int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
-  if (errorcode != 0)
-    {
-    if (offsetcount >= 2)
-      {
-      offsets[0] = erroroffset;
-      offsets[1] = errorcode;
-      }
-    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
-      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
-    }
-
-  /* Check that a start_offset points to the start of a UTF-8 character. */
-
-  if (start_offset > 0 && start_offset < length &&
-      (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
-    return PCRE_ERROR_BADUTF8_OFFSET;
-  }
-#endif
-
 /* If the expression has got more back references than the offsets supplied can
 hold, we get a temporary chunk of working store to use during the matching.
 Otherwise, we can use the vector supplied, rounding down its size to a multiple
 of 3. */
 
 ocount = offsetcount - (offsetcount % 3);
+arg_offset_max = (2*ocount)/3;
 
 if (re->top_backref > 0 && re->top_backref >= ocount/3)
   {
@@ -6368,21 +6389,22 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   {
   if (using_temporary_offsets)
     {
-    if (offsetcount >= 4)
+    if (arg_offset_max >= 4)
       {
       memcpy(offsets + 2, md->offset_vector + 2,
-        (offsetcount - 2) * sizeof(int));
+        (arg_offset_max - 2) * sizeof(int));
       DPRINTF(("Copied offsets from temporary memory\n"));
       }
-    if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
+    if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
     DPRINTF(("Freeing temporary memory\n"));
     (pcre_free)(md->offset_vector);
     }
 
-  /* Set the return code to the number of captured strings, or 0 if there are
+  /* Set the return code to the number of captured strings, or 0 if there were
   too many to fit into the vector. */
-
-  rc = md->offset_overflow? 0 : md->end_offset_top/2;
+  
+  rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
+    0 : md->end_offset_top/2;
 
   /* If there is space in the offset vector, set any unused pairs at the end of
   the pattern to -1 for backwards compatibility. It is documented that this
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-08-22 14:57:32 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-08-22 14:57:32 +0000
commit	ff911da489a2d0eb1aa777cc2f8740c65b080f32 (patch)
tree	69c0e76810e5db789573348eb099f2ce55d5a42c /pcre_exec.c
parent	b205b9285d0feca53c32d7258fc02ecb926ca16b (diff)
download	pcre-ff911da489a2d0eb1aa777cc2f8740c65b080f32.tar.gz