summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-20 14:03:16 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-20 14:03:16 +0000
commitd5be172106f6ee6bb083da3758f13f34d8bd2f3a (patch)
tree598aa95db1eabe59ec71306de532de53cc5c2acd
parentc5cf628e95d7622f92212b03fa5aad9d2fcab5c2 (diff)
downloadpcre-d5be172106f6ee6bb083da3758f13f34d8bd2f3a.tar.gz
pcretest can now save/restore byte-swapped 16-bit patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@813 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_byte_order.c4
-rw-r--r--pcretest.c393
2 files changed, 299 insertions, 98 deletions
diff --git a/pcre_byte_order.c b/pcre_byte_order.c
index 10373b8..c73ac61 100644
--- a/pcre_byte_order.c
+++ b/pcre_byte_order.c
@@ -240,7 +240,9 @@ while(TRUE)
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
- utf16_char = TRUE;
+#ifdef SUPPORT_UTF
+ if (utf) utf16_char = TRUE;
+#endif
length = PRIV(OP_lengths)[*ptr] - 1;
break;
diff --git a/pcretest.c b/pcretest.c
index 618e704..d656ba3 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -4,7 +4,7 @@
/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather, er, *very* untidy in places. The
+been extended and consequently is now rather, er, *very* untidy in places. The
addition of 16-bit support has made it even worse. :-(
-----------------------------------------------------------------------------
@@ -181,6 +181,7 @@ SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
use these in the definitions of generic macros. */
#ifdef SUPPORT_PCRE8
+
#define PCHARS8(lv, p, len, f) \
lv = pchars((pcre_uint8 *)p, len, f)
@@ -195,19 +196,20 @@ use these in the definitions of generic macros. */
count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
offsets, size_offsets)
-#define PCRE_STUDY8(extra, re, options, error) \
- extra = pcre_study(re, options, error)
-
#define PCRE_FREE_STUDY8(extra) \
pcre_free_study(extra)
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
pcre_pattern_to_host_byte_order(re, extra, tables)
+#define PCRE_STUDY8(extra, re, options, error) \
+ extra = pcre_study(re, options, error)
+
#endif /* SUPPORT_PCRE8 */
#ifdef SUPPORT_PCRE16
+
#define PCHARS16(lv, p, len, f) \
lv = pchars16((PCRE_SPTR16)p, len, f)
@@ -217,20 +219,20 @@ use these in the definitions of generic macros. */
#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
+#define PCRE_FREE_STUDY16(extra) \
+ pcre16_free_study(extra)
+
#define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets) \
count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
options, offsets, size_offsets)
-#define PCRE_FREE_STUDY16(extra) \
- pcre16_free_study(extra)
+#define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
+ pcre16_pattern_to_host_byte_order(re, extra, tables)
#define PCRE_STUDY16(extra, re, options, error) \
extra = pcre16_study(re, options, error)
-#define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
- pcre16_pattern_to_host_byte_order(re, extra, tables)
-
#endif /* SUPPORT_PCRE16 */
@@ -256,6 +258,12 @@ use these in the definitions of generic macros. */
else \
PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
+#define PCRE_FREE_STUDY(extra) \
+ if (use_pcre16) \
+ PCRE_FREE_STUDY16(extra); \
+ else \
+ PCRE_FREE_STUDY8(extra)
+
#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets) \
if (use_pcre16) \
@@ -265,11 +273,11 @@ use these in the definitions of generic macros. */
PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets)
-#define PCRE_FREE_STUDY(extra) \
+#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
if (use_pcre16) \
- PCRE_FREE_STUDY16(extra); \
+ PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
else \
- PCRE_FREE_STUDY8(extra)
+ PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
#define PCRE_STUDY(extra, re, options, error) \
if (use_pcre16) \
@@ -277,12 +285,6 @@ use these in the definitions of generic macros. */
else \
PCRE_STUDY8(extra, re, options, error)
-#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
- if (use_pcre16) \
- PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
- else \
- PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
-
/* ----- Only 8-bit mode is supported ----- */
#elif defined SUPPORT_PCRE8
@@ -291,8 +293,8 @@ use these in the definitions of generic macros. */
#define PCRE_COMPILE PCRE_COMPILE8
#define PCRE_EXEC PCRE_EXEC8
#define PCRE_FREE_STUDY PCRE_FREE_STUDY8
-#define PCRE_STUDY PCRE_STUDY8
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
+#define PCRE_STUDY PCRE_STUDY8
/* ----- Only 16-bit mode is supported ----- */
@@ -302,8 +304,8 @@ use these in the definitions of generic macros. */
#define PCRE_COMPILE PCRE_COMPILE16
#define PCRE_EXEC PCRE_EXEC16
#define PCRE_FREE_STUDY PCRE_FREE_STUDY16
-#define PCRE_STUDY PCRE_STUDY16
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
+#define PCRE_STUDY PCRE_STUDY16
#endif
/* ----- End of mode-specific function call macros ----- */
@@ -347,11 +349,37 @@ static pcre_uint8 *buffer = NULL;
static pcre_uint8 *dbuffer = NULL;
static pcre_uint8 *pbuffer = NULL;
+/* Another buffer is needed translation to 16-bit character strings. It will
+obtained and extended as required. */
+
#ifdef SUPPORT_PCRE16
static int buffer16_size = 0;
static pcre_uint16 *buffer16 = NULL;
+
+/* We need the table of operator lengths that is used for 16-bit compiling, in
+order to swap bytes in a pattern for saving/reloading testing. Luckily, the
+data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
+appropriately for the 16-bit world. Just as a safety check, make sure that
+COMPILE_PCRE16 is *not* set. */
+
+#ifdef COMPILE_PCRE16
+#error COMPILE_PCRE16 must not be set when compiling pcretest.c
+#endif
+
+#if LINK_SIZE == 2
+#undef LINK_SIZE
+#define LINK_SIZE 1
+#elif LINK_SIZE == 3 || LINK_SIZE == 4
+#undef LINK_SIZE
+#define LINK_SIZE 2
+#else
+#error LINK_SIZE must be either 2, 3, or 4
#endif
+static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
+
+#endif /* SUPPORT_PCRE16 */
+
/* If we have 8-bit support, default use_pcre16 to false; if there is also
16-bit support, it can be changed by an option. If there is no 8-bit support,
there must be 16-bit support, so default it to 1. */
@@ -862,15 +890,15 @@ return i + 1;
8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
-result is always left in buffer16.
+result is always left in buffer16.
Arguments:
p points to a byte string
utf true if UTF-8 (to be converted to UTF-16)
len number of bytes in the string (excluding trailing zero)
-
+
Returns: number of 16-bit data items used (excluding trailing zero)
- OR -1 if a UTF-8 string is malformed
+ OR -1 if a UTF-8 string is malformed
*/
static int
@@ -905,7 +933,7 @@ else
int chlen = utf82ord(p, &c);
if (chlen <= 0) return -1;
p += chlen;
- len -= chlen;
+ len -= chlen;
if (c < 0x10000) *pp++ = c; else
{
c -= 0x10000;
@@ -1067,21 +1095,21 @@ if (PRINTOK(c))
if (f != NULL) fprintf(f, "%c", c);
return 1;
}
-
+
if (c < 0x100)
{
if (use_utf)
- {
+ {
if (f != NULL) fprintf(f, "\\x{%02x}", c);
return 6;
- }
- else
+ }
+ else
{
if (f != NULL) fprintf(f, "\\x%02x", c);
- return 4;
- }
+ return 4;
+ }
}
-
+
if (f != NULL) fprintf(f, "\\x{%02x}", c);
return (c <= 0x000000ff)? 6 :
(c <= 0x00000fff)? 7 :
@@ -1115,7 +1143,7 @@ while (length-- > 0)
length -= rc - 1;
p += rc;
yield += pchar(c, f);
- continue;
+ continue;
}
}
#endif
@@ -1152,9 +1180,9 @@ while (length-- > 0)
{
c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
length--;
- p++;
+ p++;
}
- }
+ }
#endif
yield += pchar(c, f);
}
@@ -1343,19 +1371,226 @@ if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
/*************************************************
-* Byte flipping function *
+* Swap byte functions *
*************************************************/
-static unsigned long int
-byteflip(unsigned long int value, int n)
+/* The following functions swap the bytes of a pcre_uint16
+and pcre_uint32 value.
+
+Arguments:
+ value any number
+
+Returns: the byte swapped value
+*/
+
+static pcre_uint32
+swap_uint32(pcre_uint32 value)
{
-if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
- ((value & 0xff000000) >> 24);
+ (value >> 24);
}
+static pcre_uint16
+swap_uint16(pcre_uint16 value)
+{
+return (value >> 8) | (value << 8);
+}
+
+
+
+/*************************************************
+* Flip bytes in a compiled pattern *
+*************************************************/
+
+/* This function is called if the 'F' option was present on a pattern that is
+to be written to a file. We flip the bytes of all the integer fields in the
+regex data block and the study block. In 16-bit mode this also flips relevant
+bytes in the pattern itself. This is to make it possible to test PCRE's
+ability to reload byte-flipped patterns, e.g. those compiled on a different
+architecture. */
+
+static void
+regexflip(pcre *ere, pcre_extra *extra)
+{
+real_pcre *re = (real_pcre *)ere;
+int op;
+
+#ifdef SUPPORT_PCRE16
+pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
+int length = re->name_count * re->name_entry_size;
+#ifdef SUPPORT_UTF
+BOOL utf = (re->options & PCRE_UTF16) != 0;
+BOOL utf16_char = FALSE;
+#endif /* SUPPORT_UTF */
+#endif /* SUPPORT_PCRE16 */
+
+/* Always flip the bytes in the main data block and study blocks. */
+
+re->magic_number = REVERSED_MAGIC_NUMBER;
+re->size = swap_uint32(re->size);
+re->options = swap_uint32(re->options);
+re->flags = swap_uint16(re->flags);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
+re->first_char = swap_uint16(re->first_char);
+re->req_char = swap_uint16(re->req_char);
+re->name_table_offset = swap_uint16(re->name_table_offset);
+re->name_entry_size = swap_uint16(re->name_entry_size);
+re->name_count = swap_uint16(re->name_count);
+
+if (extra != NULL)
+ {
+ pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
+ rsd->size = swap_uint32(rsd->size);
+ rsd->flags = swap_uint32(rsd->flags);
+ rsd->minlength = swap_uint32(rsd->minlength);
+ }
+
+/* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
+in the name table, if present, and then in the pattern itself. */
+
+#ifdef SUPPORT_PCRE16
+if (!use_pcre16) return;
+
+while(TRUE)
+ {
+ /* Swap previous characters. */
+ while (length-- > 0)
+ {
+ *ptr = swap_uint16(*ptr);
+ ptr++;
+ }
+#ifdef SUPPORT_UTF
+ if (utf16_char)
+ {
+ if ((ptr[-1] & 0xfc00) == 0xd800)
+ {
+ /* We know that there is only one extra character in UTF-16. */
+ *ptr = swap_uint16(*ptr);
+ ptr++;
+ }
+ }
+ utf16_char = FALSE;
+#endif /* SUPPORT_UTF */
+
+ /* Get next opcode. */
+
+ length = 0;
+ op = *ptr;
+ *ptr++ = swap_uint16(op);
+
+ switch (op)
+ {
+ case OP_END:
+ return;
+
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_STAR:
+ case OP_MINSTAR:
+ case OP_PLUS:
+ case OP_MINPLUS:
+ case OP_QUERY:
+ case OP_MINQUERY:
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_EXACT:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+ case OP_POSQUERY:
+ case OP_POSUPTO:
+ case OP_STARI:
+ case OP_MINSTARI:
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_QUERYI:
+ case OP_MINQUERYI:
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_EXACTI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+ case OP_POSQUERYI:
+ case OP_POSUPTOI:
+ case OP_NOTSTAR:
+ case OP_NOTMINSTAR:
+ case OP_NOTPLUS:
+ case OP_NOTMINPLUS:
+ case OP_NOTQUERY:
+ case OP_NOTMINQUERY:
+ case OP_NOTUPTO:
+ case OP_NOTMINUPTO:
+ case OP_NOTEXACT:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
+ case OP_NOTSTARI:
+ case OP_NOTMINSTARI:
+ case OP_NOTPLUSI:
+ case OP_NOTMINPLUSI:
+ case OP_NOTQUERYI:
+ case OP_NOTMINQUERYI:
+ case OP_NOTUPTOI:
+ case OP_NOTMINUPTOI:
+ case OP_NOTEXACTI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ case OP_NOTPOSQUERYI:
+ case OP_NOTPOSUPTOI:
+#ifdef SUPPORT_UTF
+ if (utf) utf16_char = TRUE;
+#endif
+ length = OP_lengths16[op] - 1;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ /* Skip the character bit map. */
+ ptr += 32/sizeof(pcre_uint16);
+ length = 0;
+ break;
+
+ case OP_XCLASS:
+ /* Reverse the size of the XCLASS instance. */
+ ptr++;
+ *ptr = swap_uint16(*ptr);
+ if (LINK_SIZE > 1)
+ {
+ /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
+ ptr++;
+ *ptr = swap_uint16(*ptr);
+ }
+ ptr++;
+
+ if (LINK_SIZE > 1)
+ length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
+ (1 + LINK_SIZE + 1);
+ else
+ length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
+
+ op = *ptr;
+ *ptr = swap_uint16(op);
+ if ((op & XCL_MAP) != 0)
+ {
+ /* Skip the character bit map. */
+ ptr += 32/sizeof(pcre_uint16);
+ length -= 32/sizeof(pcre_uint16);
+ }
+ break;
+
+ default:
+ length = OP_lengths16[op] - 1;
+ break;
+ }
+ }
+/* Control should never reach here in 16 bit mode. */
+#endif /* SUPPORT_PCRE16 */
+}
@@ -1662,7 +1897,7 @@ are set, either both UTFs are supported or both are not supported. */
(void)pcre_config(PCRE_CONFIG_UTF8, &rc);
if (rc)
printf(" UTF-8 and UTF-16 support\n");
- else
+ else
printf(" No UTF-8 or UTF-16 support\n");
#elif defined SUPPORT_PCRE8
printf(" 8-bit support only\n");
@@ -1854,7 +2089,7 @@ while (!done)
magic = ((real_pcre *)re)->magic_number;
if (magic != MAGIC_NUMBER)
{
- if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
+ if (swap_uint32(magic) == MAGIC_NUMBER)
{
do_flip = 1;
}
@@ -1899,7 +2134,7 @@ while (!done)
else fprintf(outfile, "No study data\n");
/* Flip the necessary bytes. */
- if (do_flip != 0)
+ if (do_flip)
{
PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
}
@@ -2139,16 +2374,16 @@ while (!done)
/* In 16-bit mode, convert the input. */
#ifdef SUPPORT_PCRE16
- if (use_pcre16)
+ if (use_pcre16)
{
if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
{
fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
- "converted to UTF-16\n");
- goto SKIP_DATA;
- }
- p = (pcre_uint8 *)buffer16;
- }
+ "converted to UTF-16\n");
+ goto SKIP_DATA;
+ }
+ p = (pcre_uint8 *)buffer16;
+ }
#endif
/* Compile many times when timing */
@@ -2366,7 +2601,7 @@ while (!done)
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
all_options = ((real_pcre *)re)->options;
- if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
+ if (do_flip) all_options = swap_uint32(all_options);
if (get_options == 0) fprintf(outfile, "No options\n");
else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
@@ -2429,15 +2664,15 @@ while (!done)
const char *caseless =
((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
"" : " (caseless)";
-
+
if (PRINTOK(first_char))
fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
else
- {
+ {
fprintf(outfile, "First char = ");
- pchar(first_char, outfile);
+ pchar(first_char, outfile);
fprintf(outfile, "%s\n", caseless);
- }
+ }
}
if (need_char < 0)
@@ -2533,43 +2768,6 @@ while (!done)
if (to_file != NULL)
{
- /* If the 'F' option was present, we flip the bytes of all the integer
- fields in the regex data block and the study block. This is to make it
- possible to test PCRE's handling of byte-flipped patterns, e.g. those
- compiled on a different architecture. */
-
- if (do_flip)
- {
- real_pcre *rre = (real_pcre *)re;
- rre->magic_number =
- byteflip(rre->magic_number, sizeof(rre->magic_number));
- rre->size = byteflip(rre->size, sizeof(rre->size));
- rre->options = byteflip(rre->options, sizeof(rre->options));
- rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
- rre->top_bracket =
- (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
- rre->top_backref =
- (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
- rre->first_char =
- (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
- rre->req_char =
- (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
- rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
- sizeof(rre->name_table_offset));
- rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
- sizeof(rre->name_entry_size));
- rre->name_count = (pcre_uint16)byteflip(rre->name_count,
- sizeof(rre->name_count));
-
- if (extra != NULL)
- {
- pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
- rsd->size = byteflip(rsd->size, sizeof(rsd->size));
- rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
- rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
- }
- }
-
FILE *f = fopen((char *)to_file, "wb");
if (f == NULL)
{
@@ -2578,11 +2776,12 @@ while (!done)
else
{
pcre_uint8 sbuf[8];
+
+ if (do_flip) regexflip(re, extra);
sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
sbuf[3] = (pcre_uint8)((true_size) & 255);
-
sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
@@ -2614,7 +2813,7 @@ while (!done)
}
new_free(re);
- if (extra != NULL)
+ if (extra != NULL)
{
PCRE_FREE_STUDY(extra);
}
@@ -3080,17 +3279,17 @@ while (!done)
/* Handle matching via the native interface - repeats for /g and /G */
#ifdef SUPPORT_PCRE16
- if (use_pcre16)
+ if (use_pcre16)
{
len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
if (len < 0)
{
fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
- "converted to UTF-16\n");
- goto NEXT_DATA;
- }
+ "converted to UTF-16\n");
+ goto NEXT_DATA;
+ }
bptr = (pcre_uint8 *)buffer16;
- }
+ }
#endif
for (;; gmatched++) /* Loop for /g or /G */