summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-18 11:11:48 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-18 11:11:48 +0000
commita8cda0f054f0037a0a961b3e78ce0c5a00ebf63b (patch)
tree8a46d44a09df59d8a1f920c9c48ea5f4fce7b88a
parentedeb17f511c2d107140e8dadfb8755d70901e90b (diff)
downloadpcre-a8cda0f054f0037a0a961b3e78ce0c5a00ebf63b.tar.gz
Commit progress so far on pcretest (runs test 1 on interpreter).
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@808 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog10
-rwxr-xr-xRunTest111
-rw-r--r--configure.ac2
-rw-r--r--pcre_compile.c12
-rw-r--r--pcretest.c662
5 files changed, 512 insertions, 285 deletions
diff --git a/ChangeLog b/ChangeLog
index 81eaf17..b410d0f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,16 @@
ChangeLog for PCRE
------------------
+Version 8.22
+------------
+
+1. Renamed "isnumber" as "is_a_number" because in some Mac environments this
+ name is defined in ctype.h.
+
+2. Fixed a bug in the code for calculating the fixed length of lookbehind
+ assertions.
+
+
Version 8.21 12-Dec-2011
------------------------
diff --git a/RunTest b/RunTest
index b98bb96..49cce8d 100755
--- a/RunTest
+++ b/RunTest
@@ -100,8 +100,29 @@ case `$sim ./pcretest -C | $sim ./pcregrep 'Internal link size'` in
*) echo "Failed to find internal link size"; exit 1;;
esac
-$sim ./pcretest -C | $sim ./pcregrep 'No UTF-8 support' >/dev/null
-utf8=$?
+# Both 8-bit and 16-bit character strings may be supported, but only one
+# need be.
+
+$sim ./pcretest -C | $sim ./pcregrep '8-bit and 16-bit support' >/dev/null
+if [ $? -eq 0 ] ; then
+ test8=
+ test16=-16
+else
+ $sim ./pcretest -C | $sim ./pcregrep '8-bit support' >/dev/null
+ if [ $? -eq 0 ] ; then
+ test8=
+ test16=skip
+ else
+ test8=skip
+ test16=-16
+ fi
+fi
+
+# UTF support always applies to both bit sizes if both are supported; we can't
+# have UTF-8 support without UTF-16 support (for example).
+
+$sim ./pcretest -C | $sim ./pcregrep 'No UTF-(.+?) support' >/dev/null
+utf=$?
$sim ./pcretest -C | $sim ./pcregrep 'No Unicode properties support' >/dev/null
ucp=$?
@@ -114,21 +135,21 @@ if [ $jit -ne 0 ] ; then
jitopt=-s+
fi
-if [ $utf8 -eq 0 ] ; then
+if [ $utf -eq 0 ] ; then
if [ $do4 = yes ] ; then
- echo "Can't run test 4 because UTF-8 support is not configured"
+ echo "Can't run test 4 because UTF support is not configured"
exit 1
fi
if [ $do5 = yes ] ; then
- echo "Can't run test 5 because UTF-8 support is not configured"
+ echo "Can't run test 5 because UTF support is not configured"
exit 1
fi
if [ $do8 = yes ] ; then
- echo "Can't run test 8 because UTF-8 support is not configured"
+ echo "Can't run test 8 because UTF support is not configured"
exit 1
fi
if [ $do12 = yes ] ; then
- echo "Can't run test 12 because UTF-8 support is not configured"
+ echo "Can't run test 12 because UTF support is not configured"
exit 1
fi
fi
@@ -201,12 +222,20 @@ echo ""
echo PCRE C library tests using test data from $testdata
$sim ./pcretest /dev/null
+for bmode in "$test8" "$test16"; do
+ case "$bmode" in
+ skip) continue;;
+ -16) if [ "$test8" != "skip" ] ; then echo ""; fi
+ echo "---- Testing 16-bit library ----"; echo "";;
+ *) echo "---- Testing 8-bit library ----"; echo "";;
+ esac
+
# Primary test, compatible with JIT and all versions of Perl >= 5.8
if [ $do1 = yes ] ; then
echo "Test 1: main functionality (Compatible with Perl >= 5.8)"
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput1 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput1 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput1 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -222,9 +251,9 @@ fi
# PCRE tests that are not JIT or Perl-compatible: API, errors, internals
if [ $do2 = yes ] ; then
- echo "Test 2: API, errors, internals, and non-Perl stuff (not UTF-8)"
+ echo "Test 2: API, errors, internals, and non-Perl stuff (not UTF-8/16)"
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput2 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput2 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -278,7 +307,7 @@ if [ $do3 = yes ] ; then
if [ "$locale" != "" ] ; then
echo "Test 3: locale-specific features (using '$locale' locale)"
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $infile testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $infile testtry
if [ $? = 0 ] ; then
$cf $outfile testtry
if [ $? != 0 ] ; then
@@ -304,15 +333,15 @@ if [ $do3 = yes ] ; then
fi
fi
-# Additional tests for UTF8 support
+# Additional tests for UTF support
if [ $do4 = yes ] ; then
- echo "Test 4: UTF-8 support (Compatible with Perl >= 5.8)"
- if [ $utf8 -eq 0 ] ; then
- echo " Skipped because UTF-8 support is not available"
+ echo "Test 4: UTF-8/16 support (Compatible with Perl >= 5.8)"
+ if [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF support is not available"
else
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput4 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput4 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput4 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -327,12 +356,12 @@ if [ $do4 = yes ] ; then
fi
if [ $do5 = yes ] ; then
- echo "Test 5: API, internals, and non-Perl stuff for UTF-8 support"
- if [ $utf8 -eq 0 ] ; then
- echo " Skipped because UTF-8 support is not available"
+ echo "Test 5: API, internals, and non-Perl stuff for UTF-8/16 support"
+ if [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF support is not available"
else
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput5 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput5 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput5 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -348,11 +377,11 @@ fi
if [ $do6 = yes ] ; then
echo "Test 6: Unicode property support (Compatible with Perl >= 5.10)"
- if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+ if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
echo " Skipped because Unicode property support is not available"
else
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput6 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput6 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput6 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -371,7 +400,7 @@ fi
if [ $do7 = yes ] ; then
echo "Test 7: DFA matching"
for opt in "" "-s"; do
- $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput7 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput7 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput7 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -382,12 +411,12 @@ if [ $do7 = yes ] ; then
fi
if [ $do8 = yes ] ; then
- echo "Test 8: DFA matching with UTF-8"
- if [ $utf8 -eq 0 ] ; then
- echo " Skipped because UTF-8 support is not available"
+ echo "Test 8: DFA matching with UTF-8 or UTF-16"
+ if [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF support is not available"
else
for opt in "" "-s"; do
- $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput8 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput8 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput8 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -400,11 +429,11 @@ fi
if [ $do9 = yes ] ; then
echo "Test 9: DFA matching with Unicode properties"
- if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+ if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
echo " Skipped because Unicode property support is not available"
else
for opt in "" "-s"; do
- $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput9 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput9 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput9 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -429,7 +458,7 @@ if [ $do10 = yes ] ; then
echo " Skipped because Unicode property support is not available"
else
for opt in "" "-s"; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput10 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput10 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput10 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -445,7 +474,7 @@ fi
if [ $do11 = yes ] ; then
echo "Test 11: Features from Perl >= 5.10 without UTF8 support"
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput11 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput11 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput11 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -461,12 +490,12 @@ fi
# Test of Perl >= 5.10 features with UTF8 support
if [ $do12 = yes ] ; then
- echo "Test 12: Features from Perl >= 5.10 with UTF8 support"
- if [ $utf8 -eq 0 ] ; then
- echo " Skipped because UTF-8 support is not available"
+ echo "Test 12: Features from Perl >= 5.10 with UTF-8 or UTF-16 support"
+ if [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF support is not available"
else
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput12 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput12 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput12 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -484,11 +513,11 @@ fi
if [ $do13 = yes ] ; then
echo "Test 13: API, internals, and non-Perl stuff for Unicode property support"
- if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+ if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
echo " Skipped because Unicode property support is not available"
else
for opt in "" "-s" $jitopt; do
- $sim $valgrind ./pcretest -q $opt $testdata/testinput13 testtry
+ $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput13 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput13 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -509,7 +538,7 @@ if [ $do14 = yes ] ; then
if [ $jit -eq 0 ] ; then
echo " Skipped because JIT is not available or not usable"
else
- $sim $valgrind ./pcretest -q $testdata/testinput14 testtry
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput14 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput14 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -526,7 +555,7 @@ if [ $do15 = yes ] ; then
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
- $sim $valgrind ./pcretest -q $testdata/testinput15 testtry
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput15 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput15 testtry
if [ $? != 0 ] ; then exit 1; fi
@@ -536,4 +565,8 @@ if [ $do15 = yes ] ; then
fi
fi
+# End of loop for 8-bit/16-bit tests
+
+done
+
# End
diff --git a/configure.ac b/configure.ac
index a7e71ce..69943cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,7 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre_major, [8])
-m4_define(pcre_minor, [21])
+m4_define(pcre_minor, [22])
m4_define(pcre_prerelease, [])
m4_define(pcre_date, [2011-12-12])
diff --git a/pcre_compile.c b/pcre_compile.c
index bcc10f3..6267376 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1709,6 +1709,7 @@ for (;;)
int d;
pcre_uchar *ce, *cs;
register int op = *cc;
+
switch (op)
{
/* We only need to continue for OP_CBRA (normal capturing bracket) and
@@ -1768,7 +1769,8 @@ for (;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do cc += GET(cc, 1); while (*cc == OP_ALT);
- /* Fall through */
+ cc += PRIV(OP_lengths)[*cc];
+ break;
/* Skip over things that don't match chars */
@@ -6661,11 +6663,11 @@ for (;; ptr++)
if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
{
- BOOL isnumber = TRUE;
+ BOOL is_a_number = TRUE;
for (p = ptr + 1; *p != 0 && *p != terminator; p++)
{
- if (!MAX_255(*p)) { isnumber = FALSE; break; }
- if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
+ if (!MAX_255(*p)) { is_a_number = FALSE; break; }
+ if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
if ((cd->ctypes[*p] & ctype_word) == 0) break;
}
if (*p != terminator)
@@ -6673,7 +6675,7 @@ for (;; ptr++)
*errorcodeptr = ERR57;
break;
}
- if (isnumber)
+ if (is_a_number)
{
ptr++;
goto HANDLE_NUMERICAL_RECURSION;
diff --git a/pcretest.c b/pcretest.c
index 64cd065..db5b817 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -4,7 +4,8 @@
/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather, er, *very* untidy in places.
+been extended and consequently is now rather, er, *very* untidy in places. The
+addition of 16-bit support has made it even worse. :-(
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -107,9 +108,9 @@ appropriately for an application, not for building PCRE. */
#include "pcre.h"
#include "pcre_internal.h"
-/* The pcre_printint() function, which prints the internal form of a compiled
-regex, is held in a separate file so that (a) it can be compiled in either
-8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
+/* The pcre_printint() function, which prints the internal form of a compiled
+regex, is held in a separate file so that (a) it can be compiled in either
+8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
when that is compiled in debug mode. */
#ifdef SUPPORT_PCRE8
@@ -149,7 +150,7 @@ that differ in their output from isprint() even in the "C" locale. */
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif
-#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
+#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
@@ -159,11 +160,11 @@ Makefile. */
#include "pcreposix.h"
#endif
-/* It is also possible, for the benefit of the version currently imported into
-Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
-interface to the DFA matcher (NODFA), and without the doublecheck of the old
-"info" function (define NOINFOCHECK). In fact, we automatically cut out the
-UTF8 support if PCRE is built without it. */
+/* It is also possible, originally for the benefit of a version that was
+imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
+without the interface to the DFA matcher (NODFA), and without the doublecheck
+of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
+out the UTF8 support if PCRE is built without it. */
#ifndef SUPPORT_UTF8
#ifndef NOUTF8
@@ -171,6 +172,112 @@ UTF8 support if PCRE is built without it. */
#endif
#endif
+/* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
+for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
+only from one place and is handled differently). I couldn't dream up any way of
+using a single macro to do this in a generic way, because of the many different
+argument requirements. We know that at least one of SUPPORT_PCRE8 and
+SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
+use these in the definitions of generic macros. */
+
+#ifdef SUPPORT_PCRE8
+#define PCHARS8(lv, p, len, f) \
+ lv = pchars((pcre_uint8 *)p, len, f)
+
+#define PCHARSV8(p, len, f) \
+ (void)pchars((pcre_uint8 *)p, len, f)
+
+#define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
+ re = pcre_compile((char *)pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets) \
+ count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
+ offsets, size_offsets)
+
+#define PCRE_STUDY8(extra, re, options, error) \
+ extra = pcre_study(re, options, error)
+#endif
+
+
+#ifdef SUPPORT_PCRE16
+#define PCHARS16(lv, p, len, f) \
+ lv = pchars16((PCRE_SPTR16)p, len, f)
+
+#define PCHARSV16(p, len, f) \
+ (void)pchars16((PCRE_SPTR16)p, len, f)
+
+#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
+ re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets) \
+ count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
+ options, offsets, size_offsets)
+
+#define PCRE_STUDY16(extra, re, options, error) \
+ extra = pcre16_study(re, options, error)
+#endif
+
+
+/* ----- Both modes are supported; a runtime test is needed ----- */
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+
+#define PCHARS(lv, p, len, f) \
+ if (use_pcre16) \
+ PCHARS16(lv, p, len, f); \
+ else \
+ PCHARS8(lv, p, len, f)
+
+#define PCHARSV(p, len, f) \
+ if (use_pcre16) \
+ PCHARSV16(p, len, f); \
+ else \
+ PCHARSV8(p, len, f)
+
+#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
+ if (use_pcre16) \
+ PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
+ else \
+ PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets) \
+ if (use_pcre16) \
+ PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets); \
+ else \
+ PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets)
+
+#define PCRE_STUDY(extra, re, options, error) \
+ if (use_pcre16) \
+ PCRE_STUDY16(extra, re, options, error); \
+ else \
+ PCRE_STUDY8(extra, re, options, error)
+
+/* ----- Only 8-bit mode is supported ----- */
+
+#elif defined SUPPORT_PCRE8
+#define PCHARS PCHARS8
+#define PCHARSV PCHARSV8
+#define PCRE_COMPILE PCRE_COMPILE8
+#define PCRE_EXEC PCRE_EXEC8
+#define PCRE_STUDY PCRE_STUDY8
+
+/* ----- Only 16-bit mode is supported ----- */
+
+#else
+#define PCHARS PCHARS16
+#define PCHARSV PCHARSV16
+#define PCRE_COMPILE PCRE_COMPILE16
+#define PCRE_EXEC PCRE_EXEC16
+#define PCRE_STUDY PCRE_STUDY16
+#endif
+
+/* ----- End of mode-specific function call macros ----- */
+
/* Other parameters */
@@ -203,8 +310,6 @@ static size_t gotten_store;
static size_t first_gotten_store = 0;
static const unsigned char *last_callout_mark = NULL;
-static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
-
/* The buffers grow automatically if very long input lines are encountered. */
static int buffer_size = 50000;
@@ -217,6 +322,16 @@ static int buffer16_size = 0;
static pcre_uint16 *buffer16 = NULL;
#endif
+/* If we have 8-bit support, default use_pcre16 to false; if there is also
+16-bit support, it can be changed by an option. If there is no 8-bit support,
+there must be 16-bit support, so default it to 1. */
+
+#ifdef SUPPORT_PCRE8
+static int use_pcre16 = 0;
+#else
+static int use_pcre16 = 1;
+#endif
+
/* Textual explanations for runtime error codes */
static const char *errtexts[] = {
@@ -248,7 +363,7 @@ static const char *errtexts[] = {
NULL, /* SHORTUTF8 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode (8-bit/16-bit error)"
+ "pattern compiled in wrong mode (8-bit/16-bit error)"
};
@@ -264,7 +379,7 @@ the L (locale) option also adjusts the tables. */
/* This is the set of tables distributed as default with PCRE. It recognizes
only ASCII characters. */
-static const unsigned char tables0[] = {
+static const pcre_uint8 tables0[] = {
/* This table is a lower casing table. */
@@ -437,7 +552,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
be at least an approximation of ISO 8859. In particular, there are characters
greater than 128 that are marked as spaces, letters, etc. */
-static const unsigned char tables1[] = {
+static const pcre_uint8 tables1[] = {
0,1,2,3,4,5,6,7,
8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,
@@ -610,47 +725,159 @@ return (pcre_jit_stack *)arg;
}
+/*************************************************
+* Convert UTF-8 string to value *
+*************************************************/
+
+/* This function takes one or more bytes that represents a UTF-8 character,
+and returns the value of the character.
+
+Argument:
+ utf8bytes a pointer to the byte vector
+ vptr a pointer to an int to receive the value
+
+Returns: > 0 => the number of bytes consumed
+ -6 to 0 => malformed UTF-8 character at offset = (-return)
+*/
+
+#if !defined NOUTF8
+
+static int
+utf82ord(pcre_uint8 *utf8bytes, int *vptr)
+{
+int c = *utf8bytes++;
+int d = c;
+int i, j, s;
+
+for (i = -1; i < 6; i++) /* i is number of additional bytes */
+ {
+ if ((d & 0x80) == 0) break;
+ d <<= 1;
+ }
+
+if (i == -1) { *vptr = c; return 1; } /* ascii character */
+if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
+
+/* i now has a value in the range 1-5 */
+
+s = 6*i;
+d = (c & utf8_table3[i]) << s;
+
+for (j = 0; j < i; j++)
+ {
+ c = *utf8bytes++;
+ if ((c & 0xc0) != 0x80) return -(j+1);
+ s -= 6;
+ d |= (c & 0x3f) << s;
+ }
+
+/* Check that encoding was the correct unique one */
+
+for (j = 0; j < utf8_table1_size; j++)
+ if (d <= utf8_table1[j]) break;
+if (j != i) return -(i+1);
+
+/* Valid value */
+
+*vptr = d;
+return i+1;
+}
+
+#endif
+
+
+
+/*************************************************
+* Convert character value to UTF-8 *
+*************************************************/
+
+/* This function takes an integer value in the range 0 - 0x7fffffff
+and encodes it as a UTF-8 character in 0 to 6 bytes.
+
+Arguments:
+ cvalue the character value
+ utf8bytes pointer to buffer for result - at least 6 bytes long
+
+Returns: number of characters placed in the buffer
+*/
+
+#if !defined NOUTF8
+
+static int
+ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
+{
+register int i, j;
+for (i = 0; i < utf8_table1_size; i++)
+ if (cvalue <= utf8_table1[i]) break;
+utf8bytes += i;
+for (j = i; j > 0; j--)
+ {
+ *utf8bytes-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*utf8bytes = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
+#endif
+
+
+
#ifdef SUPPORT_PCRE16
/*************************************************
* Convert a string to 16-bit *
*************************************************/
-/* The result is always left in buffer16. */
+/* In non-UTF mode, the space needed for a 16-bit string is exactly double the
+8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
+double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
+in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
+result is always left in buffer16. */
static int
-to16(unsigned char *p, int utf)
+to16(pcre_uint8 *p, int utf, int len)
{
pcre_uint16 *pp;
-int len = (int)strlen((char *)p) + 1;
-if (buffer16_size < 2*len)
+if (buffer16_size < 2*len + 2)
{
if (buffer16 != NULL) free(buffer16);
- buffer16_size = 2*len;
+ buffer16_size = 2*len + 2;
buffer16 = (pcre_uint16 *)malloc(buffer16_size);
- if (buffer16 == NULL)
+ if (buffer16 == NULL)
{
fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
exit(1);
}
}
-
+
pp = buffer16;
if (!utf)
{
- while (*p != 0) *pp++ = *p++;
- *pp++ = 0;
+ while (len-- > 0) *pp++ = *p++;
}
-
+
else
{
-fprintf(stderr, "pcretest: no support yet for UTF-16\n");
-exit(1);
- }
-
+ int c;
+ while (len > 0)
+ {
+ int chlen = utf82ord(p, &c);
+ p += chlen;
+ len -= chlen;
+ if (c < 0x10000) *pp++ = c; else
+ {
+ c -= 0x10000;
+ *pp++ = 0xD800 | (c >> 10);
+ *pp++ = 0xDC00 | (c & 0x3ff);
+ }
+ }
+ }
+
+*pp = 0;
return pp - buffer16;
-}
+}
#endif
@@ -727,9 +954,9 @@ for (;;)
else
{
int new_buffer_size = 2*buffer_size;
- pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
- pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
- pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
+ pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
+ pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
+ pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
{
@@ -760,10 +987,6 @@ return NULL; /* Control never gets here */
-
-
-
-
/*************************************************
* Read number from string *
*************************************************/
@@ -780,7 +1003,7 @@ Returns: the unsigned long
*/
static int
-get_value(unsigned char *str, unsigned char **endptr)
+get_value(pcre_uint8 *str, pcre_uint8 **endptr)
{
int result = 0;
while(*str != 0 && isspace(*str)) str++;
@@ -791,114 +1014,15 @@ return(result);
-
-/*************************************************
-* Convert UTF-8 string to value *
-*************************************************/
-
-/* This function takes one or more bytes that represents a UTF-8 character,
-and returns the value of the character.
-
-Argument:
- utf8bytes a pointer to the byte vector
- vptr a pointer to an int to receive the value
-
-Returns: > 0 => the number of bytes consumed
- -6 to 0 => malformed UTF-8 character at offset = (-return)
-*/
-
-#if !defined NOUTF8
-
-static int
-utf82ord(unsigned char *utf8bytes, int *vptr)
-{
-int c = *utf8bytes++;
-int d = c;
-int i, j, s;
-
-for (i = -1; i < 6; i++) /* i is number of additional bytes */
- {
- if ((d & 0x80) == 0) break;
- d <<= 1;
- }
-
-if (i == -1) { *vptr = c; return 1; } /* ascii character */
-if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
-
-/* i now has a value in the range 1-5 */
-
-s = 6*i;
-d = (c & utf8_table3[i]) << s;
-
-for (j = 0; j < i; j++)
- {
- c = *utf8bytes++;
- if ((c & 0xc0) != 0x80) return -(j+1);
- s -= 6;
- d |= (c & 0x3f) << s;
- }
-
-/* Check that encoding was the correct unique one */
-
-for (j = 0; j < utf8_table1_size; j++)
- if (d <= utf8_table1[j]) break;
-if (j != i) return -(i+1);
-
-/* Valid value */
-
-*vptr = d;
-return i+1;
-}
-
-#endif
-
-
-
-/*************************************************
-* Convert character value to UTF-8 *
-*************************************************/
-
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
-
-Arguments:
- cvalue the character value
- utf8bytes pointer to buffer for result - at least 6 bytes long
-
-Returns: number of characters placed in the buffer
-*/
-
-#if !defined NOUTF8
-
-static int
-ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
-{
-register int i, j;
-for (i = 0; i < utf8_table1_size; i++)
- if (cvalue <= utf8_table1[i]) break;
-utf8bytes += i;
-for (j = i; j > 0; j--)
- {
- *utf8bytes-- = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
-*utf8bytes = utf8_table2[i] | cvalue;
-return i + 1;
-}
-
-#endif
-
-
-
+#ifdef SUPPORT_PCRE8
/*************************************************
-* Print character string *
+* Print 8-bit character string *
*************************************************/
-/* Character string printing function. Must handle UTF-8 strings in utf8
-mode. Yields number of characters printed. If handed a NULL file, just counts
-chars without printing. */
+/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
+If handed a NULL file, just counts chars without printing. */
-static int pchars(unsigned char *p, int length, FILE *f)
+static int pchars(pcre_uint8 *p, int length, FILE *f)
{
int c = 0;
int yield = 0;
@@ -914,7 +1038,7 @@ while (length-- > 0)
{
length -= rc - 1;
p += rc;
- if (PRINTHEX(c))
+ if (PRINTOK(c))
{
if (f != NULL) fprintf(f, "%c", c);
yield++;
@@ -936,7 +1060,7 @@ while (length-- > 0)
/* Not UTF-8, or malformed UTF-8 */
c = *p++;
- if (PRINTHEX(c))
+ if (PRINTOK(c))
{
if (f != NULL) fprintf(f, "%c", c);
yield++;
@@ -950,6 +1074,65 @@ while (length-- > 0)
return yield;
}
+#endif
+
+
+
+#ifdef SUPPORT_PCRE16
+/*************************************************
+* Print 16-bit character string *
+*************************************************/
+
+/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
+If handed a NULL file, just counts chars without printing. */
+
+static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
+{
+int yield = 0;
+
+while (length-- > 0)
+ {
+ int c = *p++ & 0xffff;
+
+#if !defined NOUTF8
+ if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
+ {
+ int d = *p & 0xffff;
+ if (d >= 0xDC00 && d < 0xDFFF)
+ {
+ c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
+ length--;
+ p++;
+ }
+ }
+#endif
+
+ if (PRINTOK(c))
+ {
+ if (f != NULL) fprintf(f, "%c", c);
+ yield++;
+ }
+ else
+ {
+ yield += 4;
+ if (c < 0x100)
+ {
+ if (f != NULL) fprintf(f, "\\x%02x", c);
+ }
+ else
+ {
+ if (f != NULL) fprintf(f, "\\x{%02x}", c);
+ yield += (c <= 0x000000ff)? 2 :
+ (c <= 0x00000fff)? 3 :
+ (c <= 0x0000ffff)? 4 :
+ (c <= 0x000fffff)? 5 : 6;
+ }
+ }
+ }
+
+return yield;
+}
+#endif
@@ -978,7 +1161,7 @@ if (callout_extra)
else
{
fprintf(f, "%2d: ", i/2);
- (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
+ PCHARSV(cb->subject + cb->offset_vector[i],
cb->offset_vector[i+1] - cb->offset_vector[i], f);
fprintf(f, "\n");
}
@@ -991,13 +1174,13 @@ printed lengths of the substrings. */
if (f != NULL) fprintf(f, "--->");
-pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
-post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
+PCHARS(pre_start, cb->subject, cb->start_match, f);
+PCHARS(post_start, cb->subject + cb->start_match,
cb->current_position - cb->start_match, f);
-subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
+PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
-(void)pchars((unsigned char *)(cb->subject + cb->current_position),
+PCHARSV(cb->subject + cb->current_position,
cb->subject_length - cb->current_position, f);
if (f != NULL) fprintf(f, "\n");
@@ -1103,13 +1286,29 @@ free(block);
* Call pcre_fullinfo() *
*************************************************/
-/* Get one piece of information from the pcre_fullinfo() function */
+/* Get one piece of information from the pcre_fullinfo() function. When only
+one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
+value, but the code is defensive. */
static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
{
int rc;
-if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
- fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
+
+if (use_pcre16)
+#ifdef SUPPORT_PCRE16
+ rc = pcre16_fullinfo(re, study, option, ptr);
+#else
+ rc = PCRE_ERROR_BADMODE;
+#endif
+else
+#ifdef SUPPORT_PCRE8
+ rc = pcre_fullinfo(re, study, option, ptr);
+#else
+ rc = PCRE_ERROR_BADMODE;
+#endif
+
+if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
+ use_pcre16? "16" : "", option);
}
@@ -1151,7 +1350,7 @@ for (;;)
{
*limit = mid;
- count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
+ PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
use_offsets, use_size_offsets);
if (count == errnumber)
@@ -1313,7 +1512,6 @@ int posix = 0;
int debug = 0;
int done = 0;
int all_use_dfa = 0;
-int use_pcre16 = 0;
int yield = 0;
int stack_size;
@@ -1329,7 +1527,7 @@ pcre_uchar *copynamesptr;
pcre_uchar *getnamesptr;
/* Get buffers from malloc() so that valgrind will check their misuse when
-debugging. They grow automatically when very long lines are read. The 16-bit
+debugging. They grow automatically when very long lines are read. The 16-bit
buffer (buffer16) is obtained only if needed. */
buffer = (pcre_uint8 *)malloc(buffer_size);
@@ -1353,16 +1551,19 @@ _setmode( _fileno( stdout ), _O_BINARY );
while (argc > 1 && argv[op][0] == '-')
{
- unsigned char *endptr;
+ pcre_uint8 *endptr;
- if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
- else if (strcmp(argv[op], "-m") == 0) showstore = 1;
+ if (strcmp(argv[op], "-m") == 0) showstore = 1;
else if (strcmp(argv[op], "-s") == 0) force_study = 0;
else if (strcmp(argv[op], "-s+") == 0)
{
force_study = 1;
force_study_options = PCRE_STUDY_JIT_COMPILE;
}
+#ifdef SUPPORT_PCRE16
+ else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
+#endif
+
else if (strcmp(argv[op], "-q") == 0) quiet = 1;
else if (strcmp(argv[op], "-b") == 0) debug = 1;
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
@@ -1372,7 +1573,7 @@ while (argc > 1 && argv[op][0] == '-')
else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
#endif
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
- ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
+ ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
*endptr == 0))
{
op++;
@@ -1382,7 +1583,7 @@ while (argc > 1 && argv[op][0] == '-')
{
int both = argv[op][2] == 0;
int temp;
- if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
+ if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
*endptr == 0))
{
timeitm = temp;
@@ -1393,7 +1594,7 @@ while (argc > 1 && argv[op][0] == '-')
if (both) timeit = timeitm;
}
else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
- ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
+ ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
*endptr == 0))
{
#if defined(_WIN32) || defined(WIN32) || defined(__minix)
@@ -1423,25 +1624,27 @@ while (argc > 1 && argv[op][0] == '-')
unsigned long int lrc;
printf("PCRE version %s\n", pcre_version());
printf("Compiled with\n");
-
-/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
+
+/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
+are set, either both UTFs are supported or both are not supported. */
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
printf(" 8-bit and 16-bit support\n");
(void)pcre_config(PCRE_CONFIG_UTF8, &rc);
- printf(" %sUTF-8 support\n", rc? "" : "No ");
- (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
- printf(" %sUTF-16 support\n", rc? "" : "No ");
+ if (rc)
+ printf(" UTF-8 and UTF-16 support\n");
+ else
+ printf(" No UTF-8 or UTF-16 support\n");
#elif defined SUPPORT_PCRE8
printf(" 8-bit support only\n");
(void)pcre_config(PCRE_CONFIG_UTF8, &rc);
printf(" %sUTF-8 support\n", rc? "" : "No ");
-#else
+#else
printf(" 16-bit support only\n");
(void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
printf(" %sUTF-16 support\n", rc? "" : "No ");
-#endif
-
+#endif
+
(void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
printf(" %sUnicode properties support\n", rc? "" : "No ");
(void)pcre_config(PCRE_CONFIG_JIT, &rc);
@@ -1488,10 +1691,6 @@ while (argc > 1 && argv[op][0] == '-')
argc--;
}
-/* Select which fullinfo function to use. */
-
-fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
-
/* Get the store for the offsets vector, and remember what it was */
size_offsets_max = size_offsets;
@@ -1561,10 +1760,10 @@ while (!done)
#endif
const char *error;
- unsigned char *markptr;
- unsigned char *p, *pp, *ppp;
- unsigned char *to_file = NULL;
- const unsigned char *tables = NULL;
+ pcre_uint8 *markptr;
+ pcre_uint8 *p, *pp, *ppp;
+ pcre_uint8 *to_file = NULL;
+ const pcre_uint8 *tables = NULL;
unsigned long int true_size, true_study_size = 0;
size_t size, regex_gotten_store;
int do_allcaps = 0;
@@ -1898,15 +2097,15 @@ while (!done)
{
unsigned long int get_options;
-
- /* In 16-bit mode, convert the input. The space needed for a non-UTF string
- is exactly double the 8-bit size. For a UTF-8 string, the size needed for
- UTF-16 is no more than double, because up to 0xffff uses no more than 3
- bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
- and up to 4 bytes in UTF-16. */
-
+
+ /* In 16-bit mode, convert the input. */
+
#ifdef SUPPORT_PCRE16
- if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
+ if (use_pcre16)
+ {
+ (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
+ p = (pcre_uint8 *)buffer16;
+ }
#endif
/* Compile many times when timing */
@@ -1918,12 +2117,7 @@ while (!done)
clock_t start_time = clock();
for (i = 0; i < timeit; i++)
{
-#ifdef SUPPORT_PCRE16
- if (use_pcre16)
- re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
- else
-#endif
- re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+ PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
if (re != NULL) free(re);
}
time_taken = clock() - start_time;
@@ -1933,13 +2127,7 @@ while (!done)
}
first_gotten_store = 0;
-
-#ifdef SUPPORT_PCRE16
- if (use_pcre16)
- re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
- else
-#endif
- re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+ PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
/* Compilation failed; go back for another re, skipping to blank line
if non-interactive. */
@@ -2001,21 +2189,15 @@ while (!done)
clock_t start_time = clock();
for (i = 0; i < timeit; i++)
{
- if (use_pcre16)
- extra = pcre16_study(re, study_options | force_study_options, &error);
- else
- extra = pcre_study(re, study_options | force_study_options, &error);
- }
+ PCRE_STUDY(extra, re, study_options | force_study_options, &error);
+ }
time_taken = clock() - start_time;
if (extra != NULL) pcre_free_study(extra);
fprintf(outfile, " Study time %.4f milliseconds\n",
(((double)time_taken * 1000.0) / (double)timeit) /
(double)CLOCKS_PER_SEC);
}
- if (use_pcre16)
- extra = pcre16_study(re, study_options | force_study_options, &error);
- else
- extra = pcre_study(re, study_options | force_study_options, &error);
+ PCRE_STUDY(extra, re, study_options | force_study_options, &error);
if (error != NULL)
fprintf(outfile, "Failed to study: %s\n", error);
else if (extra != NULL)
@@ -2090,7 +2272,7 @@ while (!done)
fprintf(outfile, "------------------------------------------------------------------\n");
if (use_pcre16)
pcre16_printint(re, outfile, debug_lengths);
- else
+ else
pcre_printint(re, outfile, debug_lengths);
}
@@ -2121,10 +2303,10 @@ while (!done)
/* The old, obsolete function pcre_info() works only in 8-bit mode. Check
that it gives the same results as the new function. */
-
+
#if !defined NOINFOCHECK
if (!use_pcre16)
- {
+ {
old_count = pcre_info(re, &old_options, &old_first_char);
if (count < 0) fprintf(outfile,
"Error %d from pcre_info()\n", count);
@@ -2133,16 +2315,16 @@ while (!done)
if (old_count != count) fprintf(outfile,
"Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
old_count);
-
+
if (old_first_char != first_char) fprintf(outfile,
"First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
first_char, old_first_char);
-
+
if (old_options != (int)get_options) fprintf(outfile,
"Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
get_options, old_options);
}
- }
+ }
#endif
if (size != regex_gotten_store) fprintf(outfile,
@@ -2233,7 +2415,7 @@ while (!done)
((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
"" : " (caseless)";
- if (PRINTHEX(first_char))
+ if (PRINTOK(first_char))
fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
else
fprintf(outfile, "First char = %d%s\n", first_char, caseless);
@@ -2249,7 +2431,7 @@ while (!done)
((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
"" : " (caseless)";
- if (PRINTHEX(need_char))
+ if (PRINTOK(need_char))
fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
else
fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
@@ -2292,7 +2474,7 @@ while (!done)
fprintf(outfile, "\n ");
c = 2;
}
- if (PRINTHEX(i) && i != ' ')
+ if (PRINTOK(i) && i != ' ')
{
fprintf(outfile, "%c ", i);
c += 2;
@@ -2479,7 +2661,7 @@ while (!done)
#if !defined NOUTF8
if (use_utf8 && c > 255)
{
- unsigned char buff8[8];
+ pcre_uint8 buff8[8];
int ii, utn;
utn = ord2utf8(c, buff8);
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
@@ -2495,7 +2677,7 @@ while (!done)
#if !defined NOUTF8
if (*p == '{')
{
- unsigned char *pt = p;
+ pcre_uint8 *pt = p;
c = 0;
/* We used to have "while (isxdigit(*(++pt)))" here, but it fails
@@ -2507,7 +2689,7 @@ while (!done)
c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
if (*pt == '}')
{
- unsigned char buff8[8];
+ pcre_uint8 buff8[8];
int ii, utn;
if (use_utf8)
{
@@ -2817,13 +2999,13 @@ while (!done)
if (pmatch[i].rm_so >= 0)
{
fprintf(outfile, "%2d: ", (int)i);
- (void)pchars(dbuffer + pmatch[i].rm_so,
+ PCHARSV(dbuffer + pmatch[i].rm_so,
pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
fprintf(outfile, "\n");
if (do_showcaprest || (i == 0 && do_showrest))
{
fprintf(outfile, "%2d+ ", (int)i);
- (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
+ PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
outfile);
fprintf(outfile, "\n");
}
@@ -2831,12 +3013,20 @@ while (!done)
}
}
free(pmatch);
+ goto NEXT_DATA;
}
+#endif /* !defined NOPOSIX */
+
/* Handle matching via the native interface - repeats for /g and /G */
- else
-#endif /* !defined NOPOSIX */
+#ifdef SUPPORT_PCRE16
+ if (use_pcre16)
+ {
+ len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
+ bptr = (pcre_uint8 *)buffer16;
+ }
+#endif
for (;; gmatched++) /* Loop for /g or /G */
{
@@ -2847,11 +3037,6 @@ while (!done)
register int i;
clock_t time_taken;
clock_t start_time = clock();
-
-#ifdef SUPPORT_PCRE16
- if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
-#endif
-
#if !defined NODFA
if (all_use_dfa || use_dfa)
@@ -2866,9 +3051,10 @@ while (!done)
#endif
for (i = 0; i < timeitm; i++)
- count = pcre_exec(re, extra, (char *)bptr, len,
+ {
+ PCRE_EXEC(count, re, extra, bptr, len,
start_offset, options | g_notempty, use_offsets, use_size_offsets);
-
+ }
time_taken = clock() - start_time;
fprintf(outfile, "Execute time %.4f milliseconds\n",
(((double)time_taken * 1000.0) / (double)timeitm) /
@@ -2913,7 +3099,7 @@ while (!done)
}
extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
extra->callout_data = &callout_data;
- count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
+ PCRE_EXEC(count, re, extra, bptr, len, start_offset,
options | g_notempty, use_offsets, use_size_offsets);
extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
}
@@ -2938,12 +3124,8 @@ while (!done)
else
{
- if (use_pcre16)
- count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
- else
- count = pcre_exec(re, extra, (char *)bptr, len,
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ PCRE_EXEC(count, re, extra, bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets);
if (count == 0)
{
fprintf(outfile, "Matched, but too many substrings\n");
@@ -3004,13 +3186,13 @@ while (!done)
else
{
fprintf(outfile, "%2d: ", i/2);
- (void)pchars(bptr + use_offsets[i],
+ PCHARSV(bptr + use_offsets[i],
use_offsets[i+1] - use_offsets[i], outfile);
fprintf(outfile, "\n");
if (do_showcaprest || (i == 0 && do_showrest))
{
fprintf(outfile, "%2d+ ", i/2);
- (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
+ PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
outfile);
fprintf(outfile, "\n");
}
@@ -3106,7 +3288,7 @@ while (!done)
if (use_size_offsets > 1)
{
fprintf(outfile, ": ");
- pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
+ PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
outfile);
}
fprintf(outfile, "\n");