summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:53:30 +0000
committerchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:53:30 +0000
commit62c2f93fe63ee94ff2692091a42a7d594f5d4fe3 (patch)
tree3d1739b24c57943c20fa880eed55ab341db96a81
parent3f6d05379ea067a3b4f4a61e4be268ee8c37e7a6 (diff)
downloadpcre-62c2f93fe63ee94ff2692091a42a7d594f5d4fe3.tar.gz
pcre32: Add 32-bit library
Create libpcre32 that operates on 32-bit characters (UTF-32). This turned out to be surprisingly simple after the UTF-16 support was introduced; mostly just extra ifdefs and adjusting and adding some tests. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1055 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--CMakeLists.txt72
-rw-r--r--HACKING35
-rw-r--r--Makefile.am87
-rw-r--r--NON-AUTOTOOLS-BUILD39
-rwxr-xr-xPrepareRelease10
-rw-r--r--README134
-rwxr-xr-xRunTest190
-rw-r--r--configure.ac48
-rw-r--r--doc/index.html.src13
-rw-r--r--doc/pcre-config.113
-rw-r--r--doc/pcre.329
-rw-r--r--doc/pcre16.313
-rw-r--r--doc/pcre32.3388
-rw-r--r--doc/pcre_assign_jit_stack.314
-rw-r--r--doc/pcre_compile.318
-rw-r--r--doc/pcre_compile2.320
-rw-r--r--doc/pcre_config.312
-rw-r--r--doc/pcre_copy_named_substring.314
-rw-r--r--doc/pcre_copy_substring.312
-rw-r--r--doc/pcre_dfa_exec.321
-rw-r--r--doc/pcre_exec.315
-rw-r--r--doc/pcre_free_study.36
-rw-r--r--doc/pcre_free_substring.36
-rw-r--r--doc/pcre_free_substring_list.36
-rw-r--r--doc/pcre_fullinfo.39
-rw-r--r--doc/pcre_get_named_substring.318
-rw-r--r--doc/pcre_get_stringnumber.310
-rw-r--r--doc/pcre_get_stringtable_entries.38
-rw-r--r--doc/pcre_get_substring.316
-rw-r--r--doc/pcre_get_substring_list.314
-rw-r--r--doc/pcre_jit_stack_alloc.310
-rw-r--r--doc/pcre_jit_stack_free.36
-rw-r--r--doc/pcre_maketables.38
-rw-r--r--doc/pcre_pattern_to_host_byte_order.39
-rw-r--r--doc/pcre_refcount.34
-rw-r--r--doc/pcre_study.310
-rw-r--r--doc/pcre_utf32_to_host_byte_order.346
-rw-r--r--doc/pcre_version.310
-rw-r--r--doc/pcreapi.381
-rw-r--r--doc/pcrebuild.335
-rw-r--r--doc/pcrecallout.335
-rw-r--r--doc/pcrecompat.37
-rw-r--r--doc/pcrecpp.32
-rw-r--r--doc/pcrejit.315
-rw-r--r--doc/pcrelimits.315
-rw-r--r--doc/pcrematching.323
-rw-r--r--doc/pcrepartial.328
-rw-r--r--doc/pcrepattern.369
-rw-r--r--doc/pcreperform.32
-rw-r--r--doc/pcreposix.32
-rw-r--r--doc/pcreprecompile.336
-rw-r--r--doc/pcrestack.340
-rw-r--r--doc/pcresyntax.35
-rw-r--r--doc/pcretest.1194
-rw-r--r--doc/pcreunicode.354
-rw-r--r--libpcre32.pc.in12
-rwxr-xr-xmaint/ManyConfigTests11
-rw-r--r--maint/README3
-rw-r--r--pcre-config.in11
-rw-r--r--pcre.h.in133
-rw-r--r--pcre32_byte_order.c45
-rw-r--r--pcre32_chartables.c45
-rw-r--r--pcre32_compile.c45
-rw-r--r--pcre32_config.c45
-rw-r--r--pcre32_dfa_exec.c45
-rw-r--r--pcre32_exec.c45
-rw-r--r--pcre32_fullinfo.c45
-rw-r--r--pcre32_get.c45
-rw-r--r--pcre32_globals.c45
-rw-r--r--pcre32_jit_compile.c45
-rw-r--r--pcre32_maketables.c45
-rw-r--r--pcre32_newline.c45
-rw-r--r--pcre32_ord2utf32.c90
-rw-r--r--pcre32_printint.c45
-rw-r--r--pcre32_refcount.c45
-rw-r--r--pcre32_string_utils.c45
-rw-r--r--pcre32_study.c45
-rw-r--r--pcre32_tables.c45
-rw-r--r--pcre32_ucd.c45
-rw-r--r--pcre32_utf32_utils.c138
-rw-r--r--pcre32_valid_utf32.c126
-rw-r--r--pcre32_version.c45
-rw-r--r--pcre32_xclass.c45
-rw-r--r--pcre_byte_order.c48
-rw-r--r--pcre_compile.c151
-rw-r--r--pcre_config.c24
-rw-r--r--pcre_dfa_exec.c42
-rw-r--r--pcre_exec.c64
-rw-r--r--pcre_fullinfo.c8
-rw-r--r--pcre_get.c143
-rw-r--r--pcre_internal.h259
-rw-r--r--pcre_jit_compile.c306
-rw-r--r--pcre_jit_test.c522
-rw-r--r--pcre_maketables.c7
-rw-r--r--pcre_newline.c4
-rw-r--r--pcre_printint.c50
-rw-r--r--pcre_refcount.c7
-rw-r--r--pcre_study.c77
-rw-r--r--pcre_tables.c6
-rw-r--r--pcre_version.c7
-rw-r--r--pcreposix.c7
-rw-r--r--pcretest.c887
-rw-r--r--testdata/saved32bin0 -> 100 bytes
-rw-r--r--testdata/saved32BE-1bin0 -> 544 bytes
-rw-r--r--testdata/saved32BE-2bin0 -> 448 bytes
-rw-r--r--testdata/saved32LE-1bin0 -> 544 bytes
-rw-r--r--testdata/saved32LE-2bin0 -> 448 bytes
-rw-r--r--testdata/testinput142
-rw-r--r--testdata/testinput1711
-rw-r--r--testdata/testinput1813
-rw-r--r--testdata/testinput192
-rw-r--r--testdata/testinput202
-rw-r--r--testdata/testinput214
-rw-r--r--testdata/testinput226
-rw-r--r--testdata/testinput2310
-rw-r--r--testdata/testinput243
-rw-r--r--testdata/testinput2523
-rw-r--r--testdata/testinput263
-rw-r--r--testdata/testoutput11-32713
-rw-r--r--testdata/testoutput146
-rw-r--r--testdata/testoutput1713
-rw-r--r--testdata/testoutput18-16 (renamed from testdata/testoutput18)20
-rw-r--r--testdata/testoutput18-321011
-rw-r--r--testdata/testoutput192
-rw-r--r--testdata/testoutput202
-rw-r--r--testdata/testoutput21-16 (renamed from testdata/testoutput21)12
-rw-r--r--testdata/testoutput21-3290
-rw-r--r--testdata/testoutput22-16 (renamed from testdata/testoutput22)14
-rw-r--r--testdata/testoutput22-3271
-rw-r--r--testdata/testoutput2312
-rw-r--r--testdata/testoutput243
-rw-r--r--testdata/testoutput2537
-rw-r--r--testdata/testoutput263
133 files changed, 6972 insertions, 1195 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 092f225..e56eebc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,6 +58,7 @@
# of the configure.ac file
# 2012-02-26 PH added support for libedit
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
+# 2012-09-08 ChPe added PCRE32 support
PROJECT(PCRE C CXX)
@@ -113,6 +114,8 @@ OPTION(PCRE_BUILD_PCRE8 "Build 8 bit PCRE library" ON)
OPTION(PCRE_BUILD_PCRE16 "Build 16 bit PCRE library" OFF)
+OPTION(PCRE_BUILD_PCRE32 "Build 32 bit PCRE library" OFF)
+
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
SET(PCRE_EBCDIC OFF CACHE BOOL
@@ -149,7 +152,7 @@ SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcregrep.")
SET(PCRE_SUPPORT_UTF OFF CACHE BOOL
- "Enable support for Unicode Transformation Format (UTF-8 and/or UTF-16) encoding.")
+ "Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
"Enable support for Unicode properties (if set, UTF support will be enabled as well).")
@@ -231,9 +234,9 @@ IF(NOT BUILD_SHARED_LIBS)
SET(PCRE_STATIC 1)
ENDIF(NOT BUILD_SHARED_LIBS)
-IF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16)
- MESSAGE(FATAL_ERROR "Either PCRE_BUILD_PCRE8 or PCRE_BUILD_PCRE16 must be enabled")
-ENDIF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16)
+IF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
+ MESSAGE(FATAL_ERROR "Either PCRE_BUILD_PCRE8, PCRE_BUILD_PCRE16 or PCRE_BUILD_PCRE32 must be enabled")
+ENDIF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
IF(PCRE_BUILD_PCRE8)
SET(SUPPORT_PCRE8 1)
@@ -243,6 +246,10 @@ IF(PCRE_BUILD_PCRE16)
SET(SUPPORT_PCRE16 1)
ENDIF(PCRE_BUILD_PCRE16)
+IF(PCRE_BUILD_PCRE32)
+ SET(SUPPORT_PCRE32 1)
+ENDIF(PCRE_BUILD_PCRE32)
+
IF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the C++ library support")
SET(PCRE_BUILD_PCRECPP OFF)
@@ -477,6 +484,33 @@ SET(PCRE16_SOURCES
)
ENDIF(PCRE_BUILD_PCRE16)
+IF(PCRE_BUILD_PCRE32)
+SET(PCRE32_SOURCES
+ pcre32_byte_order.c
+ pcre32_chartables.c
+ pcre32_compile.c
+ pcre32_config.c
+ pcre32_dfa_exec.c
+ pcre32_exec.c
+ pcre32_fullinfo.c
+ pcre32_get.c
+ pcre32_globals.c
+ pcre32_jit_compile.c
+ pcre32_maketables.c
+ pcre32_newline.c
+ pcre32_ord2utf32.c
+ pcre32_refcount.c
+ pcre32_string_utils.c
+ pcre32_study.c
+ pcre32_tables.c
+ pcre32_ucd.c
+ pcre32_utf32_utils.c
+ pcre32_valid_utf32.c
+ pcre32_version.c
+ pcre32_xclass.c
+)
+ENDIF(PCRE_BUILD_PCRE32)
+
IF(MINGW AND NOT PCRE_STATIC)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre.o
@@ -575,6 +609,26 @@ ENDIF(MINGW AND NOT PCRE_STATIC)
ENDIF(PCRE_BUILD_PCRE16)
+IF(PCRE_BUILD_PCRE32)
+ADD_LIBRARY(pcre32 ${PCRE_HEADERS} ${PCRE32_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+SET(targets ${targets} pcre32)
+
+IF(MINGW AND NOT PCRE_STATIC)
+ IF(NON_STANDARD_LIB_PREFIX)
+ SET_TARGET_PROPERTIES(pcre32
+ PROPERTIES PREFIX ""
+ )
+ ENDIF(NON_STANDARD_LIB_PREFIX)
+
+ IF(NON_STANDARD_LIB_SUFFIX)
+ SET_TARGET_PROPERTIES(pcre32
+ PROPERTIES SUFFIX "-0.dll"
+ )
+ ENDIF(NON_STANDARD_LIB_SUFFIX)
+ENDIF(MINGW AND NOT PCRE_STATIC)
+
+ENDIF(PCRE_BUILD_PCRE32)
+
# pcrecpp
IF(PCRE_BUILD_PCRECPP)
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
@@ -625,6 +679,9 @@ IF(PCRE_BUILD_TESTS)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRETEST_SOURCES pcre16_printint.c)
ENDIF(PCRE_BUILD_PCRE16)
+ IF(PCRE_BUILD_PCRE32)
+ LIST(APPEND PCRETEST_SOURCES pcre32_printint.c)
+ ENDIF(PCRE_BUILD_PCRE32)
ADD_EXECUTABLE(pcretest ${PCRETEST_SOURCES})
SET(targets ${targets} pcretest)
@@ -634,6 +691,9 @@ IF(PCRE_BUILD_TESTS)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRETEST_LIBS pcre16)
ENDIF(PCRE_BUILD_PCRE16)
+ IF(PCRE_BUILD_PCRE32)
+ LIST(APPEND PCRETEST_LIBS pcre32)
+ ENDIF(PCRE_BUILD_PCRE32)
TARGET_LINK_LIBRARIES(pcretest ${PCRETEST_LIBS})
IF(PCRE_SUPPORT_JIT)
@@ -646,6 +706,9 @@ IF(PCRE_BUILD_TESTS)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRE_JIT_TEST_LIBS pcre16)
ENDIF(PCRE_BUILD_PCRE16)
+ IF(PCRE_BUILD_PCRE32)
+ LIST(APPEND PCRE_JIT_TEST_LIBS pcre32)
+ ENDIF(PCRE_BUILD_PCRE32)
TARGET_LINK_LIBRARIES(pcre_jit_test ${PCRE_JIT_TEST_LIBS})
ENDIF(PCRE_SUPPORT_JIT)
@@ -823,6 +886,7 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS "")
MESSAGE(STATUS " Build 8 bit PCRE library ........ : ${PCRE_BUILD_PCRE8}")
MESSAGE(STATUS " Build 16 bit PCRE library ....... : ${PCRE_BUILD_PCRE16}")
+ MESSAGE(STATUS " Build 32 bit PCRE library ....... : ${PCRE_BUILD_PCRE32}")
MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE_SUPPORT_UTF}")
diff --git a/HACKING b/HACKING
index 87b8819..b4ee1c9 100644
--- a/HACKING
+++ b/HACKING
@@ -49,16 +49,17 @@ complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons.
-Support for 16-bit data strings
--------------------------------
+Support for 16-bit and 32-bit data strings
+-------------------------------------------
-From release 8.30, PCRE supports 16-bit as well as 8-bit data strings, by being
-compilable in either 8-bit or 16-bit modes, or both. Thus, two different
-libraries can be created. In the description that follows, the word "short" is
+From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
+release 8.FIXME, PCRE supports 32-bit data strings. The library can be compiled
+in any combination of 8-bit, 16-bit or 32-bit modes, creating different
+libraries. In the description that follows, the word "short" is
used for a 16-bit data quantity, and the word "unit" is used for a quantity
-that is a byte in 8-bit mode and a short in 16-bit mode. However, so as not to
-over-complicate the text, the names of PCRE functions are given in 8-bit form
-only.
+that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
+integer in 32-bit mode. However, so as not to over-complicate the text, the
+names of PCRE functions are given in 8-bit form only.
Computing the memory requirement: how it was
@@ -138,9 +139,10 @@ Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
-shorts in 16-bit mode), containing items of variable length. The first unit in
-an item contains an opcode, and the length of the item is either implicit in
-the opcode or contained in the data that follows it.
+shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
+items of variable length. The first unit in an item contains an opcode, and
+the length of the item is either implicit in the opcode or contained in the
+data that follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
@@ -207,7 +209,8 @@ Matching literal characters
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
-the character may be more than one unit long.
+the character may be more than one unit long. In UTF-32 mode, characters
+are always exactly one unit long.
Repeating single characters
@@ -228,7 +231,8 @@ following opcodes, which come in caseful and caseless versions:
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII mode,
-these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable.
+these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
+UTF-32 mode these are one-unit items.
Those with "MIN" in their names are the minimizing versions. Those with "POS"
in their names are possessive versions. Other repeats make use of these
opcodes:
@@ -299,7 +303,7 @@ bit map containing a 1 bit for every character that is acceptable. The bits are
counted from the least significant end of each unit. In caseless mode, bits for
both cases are set.
-The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16 mode,
+The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
subject characters with values greater than 255 can be handled correctly. For
OP_CLASS they do not match, whereas for OP_NCLASS they do.
@@ -412,7 +416,8 @@ OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a two byte (one short) count of the number of
characters to move back the pointer in the subject string. In ASCII mode, the
count is a number of units, but in UTF-8/16 mode each character may occupy more
-than one unit. A separate count is present in each alternative of a lookbehind
+than one unit; in UTF-32 mode each character occupies exactly one unit.
+A separate count is present in each alternative of a lookbehind
assertion, allowing them to have different fixed lengths.
diff --git a/Makefile.am b/Makefile.am
index 2dd553b..798acd7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -64,6 +64,10 @@ dist_html_DATA = \
doc/html/pcretest.html \
doc/html/pcreunicode.html
+# doc/html/pcre32.html \
+# doc/html/pcre_utf32_to_host_byte_order.html \
+#
+
pcrecpp_html = doc/html/pcrecpp.html
dist_noinst_DATA = $(pcrecpp_html)
@@ -81,7 +85,8 @@ check_SCRIPTS =
dist_noinst_SCRIPTS =
# Some of the binaries we make are to be installed, and others are
-# (non-user-visible) helper programs needed to build libpcre or libpcre16.
+# (non-user-visible) helper programs needed to build libpcre, libpcre16
+# or libpcre32.
bin_PROGRAMS =
noinst_PROGRAMS =
@@ -246,6 +251,39 @@ nodist_libpcre16_la_SOURCES = \
endif # WITH_PCRE16
+# Build the 32 bit library if it is enabled.
+if WITH_PCRE32
+lib_LTLIBRARIES += libpcre32.la
+libpcre32_la_SOURCES = \
+ pcre32_byte_order.c \
+ pcre32_chartables.c \
+ pcre32_compile.c \
+ pcre32_config.c \
+ pcre32_dfa_exec.c \
+ pcre32_exec.c \
+ pcre32_fullinfo.c \
+ pcre32_get.c \
+ pcre32_globals.c \
+ pcre32_jit_compile.c \
+ pcre32_maketables.c \
+ pcre32_newline.c \
+ pcre32_ord2utf32.c \
+ pcre32_refcount.c \
+ pcre32_string_utils.c \
+ pcre32_study.c \
+ pcre32_tables.c \
+ pcre32_ucd.c \
+ pcre32_utf32_utils.c \
+ pcre32_valid_utf32.c \
+ pcre32_version.c \
+ pcre32_xclass.c
+
+## This file is generated as part of the building process, so don't distribute.
+nodist_libpcre32_la_SOURCES = \
+ pcre_chartables.c
+
+endif # WITH_PCRE32
+
# The pcre_chartables.c.dist file is the default version of pcre_chartables.c,
# used unless --enable-rebuild-chartables is specified.
EXTRA_DIST += pcre_chartables.c.dist
@@ -276,6 +314,9 @@ endif # WITH_PCRE8
if WITH_PCRE16
libpcre16_la_LDFLAGS = $(EXTRA_LIBPCRE16_LDFLAGS)
endif # WITH_PCRE16
+if WITH_PCRE32
+libpcre32_la_LDFLAGS = $(EXTRA_LIBPCRE32_LDFLAGS)
+endif # WITH_PCRE32
CLEANFILES += pcre_chartables.c
@@ -291,6 +332,9 @@ endif # WITH_PCRE8
if WITH_PCRE16
pcre_jit_test_LDADD += libpcre16.la
endif # WITH_PCRE16
+if WITH_PCRE32
+pcre_jit_test_LDADD += libpcre32.la
+endif # WITH_PCRE32
endif # WITH_JIT
## A version of the main pcre library that has a posix re API.
@@ -350,6 +394,10 @@ if WITH_PCRE16
pcretest_SOURCES += pcre16_printint.c
pcretest_LDADD += libpcre16.la
endif # WITH_PCRE16
+if WITH_PCRE32
+pcretest_SOURCES += pcre32_printint.c
+pcretest_LDADD += libpcre32.la
+endif # WITH_PCRE32
if WITH_PCRE8
TESTS += RunGrepTest
@@ -378,6 +426,11 @@ EXTRA_DIST += \
testdata/saved16BE-2 \
testdata/saved16LE-1 \
testdata/saved16LE-2 \
+ testdata/saved32 \
+ testdata/saved32BE-1 \
+ testdata/saved32BE-2 \
+ testdata/saved32LE-1 \
+ testdata/saved32LE-2 \
testdata/saved8 \
testdata/testinput1 \
testdata/testinput2 \
@@ -477,6 +530,9 @@ pkgconfig_DATA = libpcre.pc libpcreposix.pc
if WITH_PCRE16
pkgconfig_DATA += libpcre16.pc
endif
+if WITH_PCRE32
+pkgconfig_DATA += libpcre32.pc
+endif
if WITH_PCRE_CPP
pkgconfig_DATA += libpcrecpp.pc
endif
@@ -484,6 +540,7 @@ endif
dist_man_MANS = \
doc/pcre.3 \
doc/pcre16.3 \
+ doc/pcre32.3 \
doc/pcre-config.1 \
doc/pcre_assign_jit_stack.3 \
doc/pcre_compile.3 \
@@ -509,6 +566,7 @@ dist_man_MANS = \
doc/pcre_refcount.3 \
doc/pcre_study.3 \
doc/pcre_utf16_to_host_byte_order.3 \
+ doc/pcre_utf32_to_host_byte_order.3 \
doc/pcre_version.3 \
doc/pcreapi.3 \
doc/pcrebuild.3 \
@@ -529,7 +587,7 @@ dist_man_MANS = \
doc/pcretest.1 \
doc/pcreunicode.3
-# Arrange for the per-function man pages to have 16-bit names as well.
+# Arrange for the per-function man pages to have 16- and 32-bit names as well.
install-data-hook:
ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre16_assign_jit_stack.3
ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre16_compile.3
@@ -556,6 +614,31 @@ install-data-hook:
ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre16_study.3
ln -sf pcre_utf16_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_utf16_to_host_byte_order.3
ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre16_version.3
+ ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre32_assign_jit_stack.3
+ ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre32_compile.3
+ ln -sf pcre_compile2.3 $(DESTDIR)$(man3dir)/pcre32_compile2.3
+ ln -sf pcre_config.3 $(DESTDIR)$(man3dir)/pcre32_config.3
+ ln -sf pcre_copy_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_named_substring.3
+ ln -sf pcre_copy_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_substring.3
+ ln -sf pcre_dfa_exec.3 $(DESTDIR)$(man3dir)/pcre32_dfa_exec.3
+ ln -sf pcre_exec.3 $(DESTDIR)$(man3dir)/pcre32_exec.3
+ ln -sf pcre_free_study.3 $(DESTDIR)$(man3dir)/pcre32_free_study.3
+ ln -sf pcre_free_substring.3 $(DESTDIR)$(man3dir)/pcre32_free_substring.3
+ ln -sf pcre_free_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_free_substring_list.3
+ ln -sf pcre_fullinfo.3 $(DESTDIR)$(man3dir)/pcre32_fullinfo.3
+ ln -sf pcre_get_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_named_substring.3
+ ln -sf pcre_get_stringnumber.3 $(DESTDIR)$(man3dir)/pcre32_get_stringnumber.3
+ ln -sf pcre_get_stringtable_entries.3 $(DESTDIR)$(man3dir)/pcre32_get_stringtable_entries.3
+ ln -sf pcre_get_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_substring.3
+ ln -sf pcre_get_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_get_substring_list.3
+ ln -sf pcre_jit_stack_alloc.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_alloc.3
+ ln -sf pcre_jit_stack_free.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_free.3
+ ln -sf pcre_maketables.3 $(DESTDIR)$(man3dir)/pcre32_maketables.3
+ ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_pattern_to_host_byte_order.3
+ ln -sf pcre_refcount.3 $(DESTDIR)$(man3dir)/pcre32_refcount.3
+ ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre32_study.3
+ ln -sf pcre_utf32_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_utf32_to_host_byte_order.3
+ ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre32_version.3
pcrecpp_man = doc/pcrecpp.3
EXTRA_DIST += $(pcrecpp_man)
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index 4f9e0af..bbe349d 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -142,7 +142,7 @@ can skip ahead to the CMake section.
once for each type.
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
- library) repeat steps 5-6 with the following files:
+ or 32-bit libraries) repeat steps 5-6 with the following files:
pcre16_byte_order.c
pcre16_chartables.c
@@ -167,13 +167,39 @@ can skip ahead to the CMake section.
pcre16_version.c
pcre16_xclass.c
+ (7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
+ or 32-bit libraries) repeat steps 5-6 with the following files:
+
+ pcre32_byte_order.c
+ pcre32_chartables.c
+ pcre32_compile.c
+ pcre32_config.c
+ pcre32_dfa_exec.c
+ pcre32_exec.c
+ pcre32_fullinfo.c
+ pcre32_get.c
+ pcre32_globals.c
+ pcre32_jit_compile.c
+ pcre32_maketables.c
+ pcre32_newline.c
+ pcre32_ord2utf32.c
+ pcre32_refcount.c
+ pcre32_string_utils.c
+ pcre32_study.c
+ pcre32_tables.c
+ pcre32_ucd.c
+ pcre32_utf32_utils.c
+ pcre32_valid_utf32.c
+ pcre32_version.c
+ pcre32_xclass.c
+
(8) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcreposix.h file and then compile
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
(on its own) as the pcreposix library.
-(9) The pcretest program can be linked with either or both of the 8-bit and
- 16-bit libraries (depending on what you selected in config.h). Compile
+ (9) The pcretest program can be linked with any of the 8-bit, 16-bit and 32-bit
+ libraries (depending on what you selected in config.h). Compile
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
link them together with the appropriate library/ies. If you compiled an
8-bit library, pcretest also needs the pcreposix wrapper library unless
@@ -182,11 +208,12 @@ can skip ahead to the CMake section.
(10) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE"
- in the README file. If you compiled both an 8-bit and a 16-bit library,
- you need to run pcretest with the -16 option to do 16-bit tests.
+ in the README file. If you compiled more than one of the 8-bit, 16-bit and
+ 32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
+ tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
- For example, test 4 is for UTF-8 or UTF-16 support, and will not run if
+ For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
you have built PCRE without it. See the comments at the start of each
testinput file. If you have a suitable Unix-like shell, the RunTest script
will run the appropriate tests for you.
diff --git a/PrepareRelease b/PrepareRelease
index 340449b..31d8380 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -58,8 +58,8 @@ pcretest commands.
End
echo "Making pcre.txt"
-for file in pcre pcre16 pcrebuild pcrematching pcreapi pcrecallout pcrecompat \
- pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
+for file in pcre pcre16 pcre32 pcrebuild pcrematching pcreapi pcrecallout \
+ pcrecompat \ pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
pcreprecompile pcreperform pcreposix pcrecpp pcresample \
pcrelimits pcrestack ; do
echo " Processing $file.3"
@@ -160,7 +160,7 @@ if [ "$1" = "doc" ] ; then exit; fi
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing. We don't bother with
-# those pcre16_xx files that just define COMPILE_PCRE16 and then #include the
+# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
# common file, because they aren't going to change.
files="\
@@ -185,6 +185,7 @@ files="\
pcre-config.in \
libpcre.pc.in \
libpcre16.pc.in \
+ libpcre32.pc.in \
libpcreposix.pc.in \
libpcrecpp.pc.in \
config.h.in \
@@ -211,6 +212,7 @@ files="\
pcre_newline.c \
pcre_ord2utf8.c \
pcre16_ord2utf16.c \
+ pcre32_ord2utf32.c \
pcre_printint.c \
pcre_refcount.c \
pcre_string_utils.c \
@@ -221,7 +223,9 @@ files="\
pcre_version.c \
pcre_xclass.c \
pcre16_utf16_utils.c \
+ pcre32_utf32_utils.c \
pcre16_valid_utf16.c \
+ pcre32_valid_utf32.c \
pcre_scanner.cc \
pcre_scanner.h \
pcre_scanner_unittest.cc \
diff --git a/README b/README
index 0d3cffc..a65cf9e 100644
--- a/README
+++ b/README
@@ -35,9 +35,10 @@ The contents of this README file are:
The PCRE APIs
-------------
-PCRE is written in C, and it has its own API. There are two sets of functions,
-one for the 8-bit library, which processes strings of bytes, and one for the
-16-bit library, which processes strings of 16-bit values. The distribution also
+PCRE is written in C, and it has its own API. There are three sets of functions,
+one for the 8-bit library, which processes strings of bytes, one for the
+16-bit library, which processes strings of 16-bit values, and one for the 32-bit
+library, which processes strings of 32-bit values. The distribution also
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
C++.
@@ -183,8 +184,10 @@ library. They are also documented in the pcrebuild man page.
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
- the "configure" command, the 16-bit library is also built. If you want only
- the 16-bit library, use "./configure --enable-pcre16 --disable-pcre8".
+ the "configure" command, the 16-bit library is also built. If you add
+ --enable-pcre32 to the "configure" command, the 32-bit library is also built.
+ If you want only the 16-bit or 32-bit library, --disable-pcre8 to disable
+ building the 8-bit library.
. If you are building the 8-bit library and want to suppress the building of
the C++ wrapper library, you can add --disable-cpp to the "configure"
@@ -203,23 +206,24 @@ library. They are also documented in the pcrebuild man page.
. If you want to make use of the support for UTF-8 Unicode character strings in
the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
- you must add --enable-utf to the "configure" command. Without it, the code
- for handling UTF-8 and UTF-16 is not included in the relevant library. Even
+ or UTF-32 Unicode character strings in the 32-bit library, you must add
+ --enable-utf to the "configure" command. Without it, the code for handling
+ UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
when --enable-utf is included, the use of a UTF encoding still has to be
enabled by an option at run time. When PCRE is compiled with this option, its
- input can only either be ASCII or UTF-8/16, even when running on EBCDIC
+ input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
the same time.
-. There are no separate options for enabling UTF-8 and UTF-16 independently
- because that would allow ridiculous settings such as requesting UTF-16
- support while building only the 8-bit library. However, the option
+. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
+ independently because that would allow ridiculous settings such as requesting
+ UTF-16 support while building only the 8-bit library. However, the option
--enable-utf8 is retained for backwards compatibility with earlier releases
- that did not support 16-bit character strings. It is synonymous with
+ that did not support 16-bit or 32-bit character strings. It is synonymous with
--enable-utf. It is not possible to configure one library with UTF support
and the other without in the same configuration.
-. If, in addition to support for UTF-8/16 character strings, you want to
+. If, in addition to support for UTF-8/16/32 character strings, you want to
include support for the \P, \p, and \X sequences that recognize Unicode
character properties, you must add --enable-unicode-properties to the
"configure" command. This adds about 30K to the size of the library (in the
@@ -281,7 +285,8 @@ library. They are also documented in the pcrebuild man page.
library, PCRE then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
the same as --with-link-size=4, which (in both libraries) uses four-byte
- offsets. Increasing the internal link size reduces performance.
+ offsets. Increasing the internal link size reduces performance. In the 32-bit
+ library, the only supported link size is 4.
. You can build PCRE so that its internal match() function that is called from
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
@@ -316,7 +321,7 @@ library. They are also documented in the pcrebuild man page.
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE is built this way, it always operates in EBCDIC. It cannot support
- both EBCDIC and UTF-8/16. There is a second option, --enable-ebcdic-nl25,
+ both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
@@ -368,6 +373,7 @@ The "configure" script builds the following files for the basic C library:
that were set for "configure"
. libpcre.pc ) data for the pkg-config command
. libpcre16.pc )
+. libpcre32.pc )
. libpcreposix.pc )
. libtool script that builds shared and/or static libraries
@@ -387,8 +393,8 @@ The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
-Once "configure" has run, you can run "make". This builds either or both of the
-libraries libpcre and libpcre16, and a test program called pcretest. If you
+Once "configure" has run, you can run "make". This builds the the libraries
+libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
built as well.
@@ -412,12 +418,14 @@ system. The following are installed (file names are all relative to the
Libraries (lib):
libpcre16 (if 16-bit support is enabled)
+ libpcre32 (if 32-bit support is enabled)
libpcre (if 8-bit support is enabled)
libpcreposix (if 8-bit support is enabled)
libpcrecpp (if 8-bit and C++ support is enabled)
Configuration information (lib/pkgconfig):
libpcre16.pc
+ libpcre32.pc
libpcre.pc
libpcreposix.pc
libpcrecpp.pc (if C++ support is enabled)
@@ -598,7 +606,7 @@ The RunTest script runs the pcretest test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. Some tests are relevant only when certain build-time options
-were selected. For example, the tests for UTF-8/16 support are run only if
+were selected. For example, the tests for UTF-8/16/32 support are run only if
--enable-utf was used. RunTest outputs a comment when it skips a test.
Many of the tests that are not skipped are run up to three times. The second
@@ -607,9 +615,9 @@ tests that are marked "never study" (see the pcretest program for how this is
done). If JIT support is available, the non-DFA tests are run a third time,
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
-When both 8-bit and 16-bit support is enabled, the entire set of tests is run
-twice, once for each library. If you want to run just one set of tests, call
-RunTest with either the -8 or -16 option.
+The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
+libraries that are enabled. If you want to run just one set of tests, call
+RunTest with either the -8, -16 or -32 option.
RunTest uses a file called testtry to hold the main output from pcretest.
Other files whose names begin with "test" are used as working files in some
@@ -660,13 +668,13 @@ RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
Windows versions of test 2. More info on using RunTest.bat is included in the
document entitled NON-UNIX-USE.]
-The fourth and fifth tests check the UTF-8/16 support and error handling and
+The fourth and fifth tests check the UTF-8/16/32 support and error handling and
internal UTF features of PCRE that are not relevant to Perl, respectively. The
sixth and seventh tests do the same for Unicode character properties support.
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
-matching function, in non-UTF-8/16 mode, UTF-8/16 mode, and UTF-8/16 mode with
-Unicode property support, respectively.
+matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
+mode with Unicode property support, respectively.
The eleventh test checks some internal offsets and code size features; it is
run only when the default "link size" of 2 is set (in other cases the sizes
@@ -677,16 +685,21 @@ test is run only when JIT support is not available. They test some JIT-specific
features such as information output from pcretest about JIT compilation.
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
-the seventeenth, eighteenth, and nineteenth tests are run only in 16-bit mode.
+the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
These are tests that generate different output in the two modes. They are for
-general cases, UTF-8/16 support, and Unicode property support, respectively.
+general cases, UTF-8/16/32 support, and Unicode property support, respectively.
-The twentieth test is run only in 16-bit mode. It tests some specific 16-bit
-features of the DFA matching engine.
+The twentieth test is run only in 16/32-bit mode. It tests some specific
+16/32-bit features of the DFA matching engine.
-The twenty-first and twenty-second tests are run only in 16-bit mode, when the
-link size is set to 2. They test reloading pre-compiled patterns.
+The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
+link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
+The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
+general cases, and UTF-16 support, respectively.
+
+The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
+general cases, and UTF-32 support, respectively.
Character tables
----------------
@@ -746,8 +759,8 @@ File manifest
-------------
The distribution should contain the files listed below. Where a file name is
-given as pcre[16]_xxx it means that there are two files, one with the name
-pcre_xxx and the other with the name pcre16_xxx.
+given as pcre[16|32]_xxx it means that there are three files, one with the name
+pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
(A) Source files of the PCRE library functions and their headers:
@@ -758,33 +771,35 @@ pcre_xxx and the other with the name pcre16_xxx.
coding; used, unless --enable-rebuild-chartables is
specified, by copying to pcre[16]_chartables.c
- pcreposix.c )
- pcre[16]_byte_order.c )
- pcre[16]_compile.c )
- pcre[16]_config.c )
- pcre[16]_dfa_exec.c )
- pcre[16]_exec.c )
- pcre[16]_fullinfo.c )
- pcre[16]_get.c ) sources for the functions in the library,
- pcre[16]_globals.c ) and some internal functions that they use
- pcre[16]_jit_compile.c )
- pcre[16]_maketables.c )
- pcre[16]_newline.c )
- pcre[16]_refcount.c )
- pcre[16]_string_utils.c )
- pcre[16]_study.c )
- pcre[16]_tables.c )
- pcre[16]_ucd.c )
- pcre[16]_version.c )
- pcre[16]_xclass.c )
- pcre_ord2utf8.c )
- pcre_valid_utf8.c )
- pcre16_ord2utf16.c )
- pcre16_utf16_utils.c )
- pcre16_valid_utf16.c )
-
- pcre[16]_printint.c ) debugging function that is used by pcretest,
- ) and can also be #included in pcre_compile()
+ pcreposix.c )
+ pcre[16|32]_byte_order.c )
+ pcre[16|32]_compile.c )
+ pcre[16|32]_config.c )
+ pcre[16|32]_dfa_exec.c )
+ pcre[16|32]_exec.c )
+ pcre[16|32]_fullinfo.c )
+ pcre[16|32]_get.c ) sources for the functions in the library,
+ pcre[16|32]_globals.c ) and some internal functions that they use
+ pcre[16|32]_jit_compile.c )
+ pcre[16|32]_maketables.c )
+ pcre[16|32]_newline.c )
+ pcre[16|32]_refcount.c )
+ pcre[16|32]_string_utils.c )
+ pcre[16|32]_study.c )
+ pcre[16|32]_tables.c )
+ pcre[16|32]_ucd.c )
+ pcre[16|32]_version.c )
+ pcre[16|32]_xclass.c )
+ pcre_ord2utf8.c )
+ pcre_valid_utf8.c )
+ pcre16_ord2utf16.c )
+ pcre16_utf16_utils.c )
+ pcre16_valid_utf16.c )
+ pcre32_utf32_utils.c )
+ pcre32_valid_utf32.c )
+
+ pcre[16|32]_printint.c ) debugging function that is used by pcretest,
+ ) and can also be #included in pcre_compile()
pcre.h.in template for pcre.h when built by "configure"
pcreposix.h header for the external POSIX wrapper API
@@ -849,6 +864,7 @@ pcre_xxx and the other with the name pcre16_xxx.
doc/perltest.txt plain text documentation of Perl test program
install-sh a shell script for installing files
libpcre16.pc.in template for libpcre16.pc for pkg-config
+ libpcre32.pc.in template for libpcre32.pc for pkg-config
libpcre.pc.in template for libpcre.pc for pkg-config
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
diff --git a/RunTest b/RunTest
index 61da889..115ce67 100755
--- a/RunTest
+++ b/RunTest
@@ -18,8 +18,8 @@
# two tests for JIT-specific features, one to be run when JIT support is
# available, and one when it is not.
#
-# Whichever of the 8-bit and 16-bit libraries exist are tested. It is also
-# possible to select which to test by the arguments -8 or -16.
+# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
+# possible to select which to test by the arguments -8, -16 or -32.
#
# Other arguments for this script can be individual test numbers, or the word
# "valgrind", or "sim" followed by an argument to run cross-compiled
@@ -58,12 +58,16 @@ title13="Test 13: JIT-specific features (JIT not available)"
title14="Test 14: Specials for the basic 8-bit library"
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
-title17="Test 17: Specials for the basic 16-bit library"
-title18="Test 18: Specials for the 16-bit library with UTF-16 support"
-title19="Test 19: Specials for the 16-bit library with Unicode propery support"
-title20="Test 20: DFA specials for the basic 16-bit library"
-title21="Test 21: Reloads for the basic 16-bit library"
-title22="Test 22: Reloads for the 16-bit library with UTF-16 support"
+title17="Test 17: Specials for the basic 16/32-bit library"
+title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
+title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
+title20="Test 20: DFA specials for the basic 16/32-bit library"
+title21="Test 21: Reloads for the basic 16/32-bit library"
+title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
+title23="Test 23: Specials for the 16-bit library"
+title24="Test 24: Specials for the 16-bit library with UTF-16 support"
+title25="Test 25: Specials for the 32-bit library"
+title26="Test 26: Specials for the 32-bit library with UTF-32 support"
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title1
@@ -88,6 +92,10 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title20
echo $title21
echo $title22
+ echo $title23
+ echo $title24
+ echo $title25
+ echo $title26
exit 0
fi
@@ -147,6 +155,7 @@ valgrind=
sim=
arg8=
arg16=
+arg32=
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
@@ -176,6 +185,10 @@ do19=no
do20=no
do21=no
do22=no
+do23=no
+do24=no
+do25=no
+do26=no
while [ $# -gt 0 ] ; do
case $1 in
@@ -201,9 +214,14 @@ while [ $# -gt 0 ] ; do
20) do20=yes;;
21) do21=yes;;
22) do22=yes;;
+ 23) do23=yes;;
+ 24) do24=yes;;
+ 25) do25=yes;;
+ 26) do26=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
- valgrind) valgrind="valgrind -q --smc-check=all";;
+ -32) arg32=yes;;
+ valgrind) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
sim) shift; sim=$1;;
*) echo "Unknown test number '$1'"; exit 1;;
esac
@@ -230,14 +248,24 @@ $sim ./pcretest -C pcre8 >/dev/null
support8=$?
$sim ./pcretest -C pcre16 >/dev/null
support16=$?
-if [ `expr $support8 + $support16` -eq 2 ] ; then
+$sim ./pcretest -C pcre32 >/dev/null
+support32=$?
+
+if [ `expr $support8 + $support16 + $support32` -gt 1 ] ; then
test8=
test16=-16
- if [ "$arg8" = yes -a "$arg16" != yes ] ; then
+ test32=-32
+ if [ "$arg8" = yes -a "$arg16$arg32" != nono ] ; then
test16=skip
+ test32=skip
+ fi
+ if [ "$arg16" = yes -a "$arg8$arg32" != nono ] ; then
+ test8=skip
+ test32=skip
fi
- if [ "$arg16" = yes -a "$arg8" != yes ] ; then
+ if [ "$arg32" = yes -a "$arg8$arg16" != nono ] ; then
test8=skip
+ test16=skip
fi
else
if [ $support8 -ne 0 ] ; then
@@ -245,15 +273,37 @@ else
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
exit 1
fi
+ if [ "$arg32" = yes ] ; then
+ echo "Cannot run 32-bit library tests: 32-bit library not compiled"
+ exit 1
+ fi
test8=
test16=skip
- else
+ test32=skip
+ elif [ $support16 -ne 0 ] ; then
if [ "$arg8" = yes ] ; then
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
exit 1
fi
+ if [ "$arg32" = yes ] ; then
+ echo "Cannot run 32-bit library tests: 32-bit library not compiled"
+ exit 1
+ fi
test8=skip
test16=-16
+ test32=skip
+ else # $support32 -ne 0
+ if [ "$arg8" = yes ] ; then
+ echo "Cannot run 8-bit library tests: 8-bit library not compiled"
+ exit 1
+ fi
+ if [ "$arg16" = yes ] ; then
+ echo "Cannot run 16-bit library tests: 16-bit library not compiled"
+ exit 1
+ fi
+ test8=skip
+ test16=skip
+ test32=-32
fi
fi
@@ -348,7 +398,8 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
$do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
$do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
$do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
- $do21 = no -a $do22 = no ] ; then
+ $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
+ $do25 = no -a $do26 = no ] ; then
do1=yes
do2=yes
do3=yes
@@ -371,6 +422,10 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
do20=yes
do21=yes
do22=yes
+ do23=yes
+ do24=yes
+ do25=yes
+ do26=yes
fi
# Show which release and which test data
@@ -379,11 +434,13 @@ echo ""
echo PCRE C library tests using test data from $testdata
$sim ./pcretest /dev/null
-for bmode in "$test8" "$test16"; do
+for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
- -16) if [ "$test8" != "skip" ] ; then echo ""; fi
+ -16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
+ -32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
+ bits=32; echo "---- Testing 32-bit library ----"; echo "";;
*) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
esac
@@ -687,10 +744,11 @@ fi
if [ "$do14" = yes ] ; then
echo $title14
- if [ "$bits" = "16" ] ; then
- echo " Skipped when running 16-bit tests"
+ if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+ echo " Skipped when running 16/32-bit tests"
else
cp -f $testdata/saved16 testsaved16
+ cp -f $testdata/saved32 testsaved32
for opt in "" "-s" $jitopt; do
$sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput14 testtry
if [ $? = 0 ] ; then
@@ -710,8 +768,8 @@ fi
if [ "$do15" = yes ] ; then
echo $title15
- if [ "$bits" = "16" ] ; then
- echo " Skipped when running 16-bit tests"
+ if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+ echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
@@ -734,8 +792,8 @@ fi
if [ $do16 = yes ] ; then
echo $title16
- if [ "$bits" = "16" ] ; then
- echo " Skipped when running 16-bit tests"
+ if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+ echo " Skipped when running 16/32-bit tests"
elif [ $ucp -eq 0 ] ; then
echo " Skipped because Unicode property support is not available"
else
@@ -754,7 +812,7 @@ if [ $do16 = yes ] ; then
fi
fi
-# Tests for 16-bit-specific features
+# Tests for 16/32-bit-specific features
if [ $do17 = yes ] ; then
echo $title17
@@ -776,7 +834,7 @@ if [ $do17 = yes ] ; then
fi
fi
-# Tests for 16-bit-specific features (UTF-16 support)
+# Tests for 16/32-bit-specific features (UTF-16/32 support)
if [ $do18 = yes ] ; then
echo $title18
@@ -788,7 +846,7 @@ if [ $do18 = yes ] ; then
for opt in "" "-s" $jitopt; do
$sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput18 testtry
if [ $? = 0 ] ; then
- $cf $testdata/testoutput18 testtry
+ $cf $testdata/testoutput18-$bits testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
@@ -800,7 +858,7 @@ if [ $do18 = yes ] ; then
fi
fi
-# Tests for 16-bit-specific features (Unicode property support)
+# Tests for 16/32-bit-specific features (Unicode property support)
if [ $do19 = yes ] ; then
echo $title19
@@ -824,7 +882,7 @@ if [ $do19 = yes ] ; then
fi
fi
-# Tests for 16-bit-specific features in DFA non-UTF-16 mode
+# Tests for 16/32-bit-specific features in DFA non-UTF-16 mode
if [ $do20 = yes ] ; then
echo $title20
@@ -845,7 +903,7 @@ if [ $do20 = yes ] ; then
fi
fi
-# Tests for reloads with 16-bit library
+# Tests for reloads with 16/32-bit library
if [ $do21 = yes ] ; then
echo $title21
@@ -857,9 +915,11 @@ if [ $do21 = yes ] ; then
cp -f $testdata/saved8 testsaved8
cp -f $testdata/saved16LE-1 testsaved16LE-1
cp -f $testdata/saved16BE-1 testsaved16BE-1
+ cp -f $testdata/saved32LE-1 testsaved32LE-1
+ cp -f $testdata/saved32BE-1 testsaved32BE-1
$sim $valgrind ./pcretest -q $bmode $testdata/testinput21 testtry
if [ $? = 0 ] ; then
- $cf $testdata/testoutput21 testtry
+ $cf $testdata/testoutput21-$bits testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
@@ -867,7 +927,7 @@ if [ $do21 = yes ] ; then
fi
fi
-# Tests for reloads with 16-bit library (UTF-16 support)
+# Tests for reloads with 16/32-bit library (UTF-16 support)
if [ $do22 = yes ] ; then
echo $title22
@@ -880,9 +940,75 @@ if [ $do22 = yes ] ; then
else
cp -f $testdata/saved16LE-2 testsaved16LE-2
cp -f $testdata/saved16BE-2 testsaved16BE-2
+ cp -f $testdata/saved32LE-2 testsaved32LE-2
+ cp -f $testdata/saved32BE-2 testsaved32BE-2
$sim $valgrind ./pcretest -q $bmode $testdata/testinput22 testtry
if [ $? = 0 ] ; then
- $cf $testdata/testoutput22 testtry
+ $cf $testdata/testoutput22-$bits testtry
+ if [ $? != 0 ] ; then exit 1; fi
+ else exit 1
+ fi
+ echo " OK"
+ fi
+fi
+
+if [ $do23 = yes ] ; then
+ echo $title23
+ if [ "$bits" = "8" -o "$bits" = "32" ] ; then
+ echo " Skipped when running 8/32-bit tests"
+ else
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput23 testtry
+ if [ $? = 0 ] ; then
+ $cf $testdata/testoutput23 testtry
+ if [ $? != 0 ] ; then exit 1; fi
+ else exit 1
+ fi
+ echo " OK"
+ fi
+fi
+
+if [ $do24 = yes ] ; then
+ echo $title24
+ if [ "$bits" = "8" -o "$bits" = "32" ] ; then
+ echo " Skipped when running 8/32-bit tests"
+ elif [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF-$bits support is not available"
+ else
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput24 testtry
+ if [ $? = 0 ] ; then
+ $cf $testdata/testoutput24 testtry
+ if [ $? != 0 ] ; then exit 1; fi
+ else exit 1
+ fi
+ echo " OK"
+ fi
+fi
+
+if [ $do25 = yes ] ; then
+ echo $title25
+ if [ "$bits" = "8" -o "$bits" = "16" ] ; then
+ echo " Skipped when running 8/16-bit tests"
+ else
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput25 testtry
+ if [ $? = 0 ] ; then
+ $cf $testdata/testoutput25 testtry
+ if [ $? != 0 ] ; then exit 1; fi
+ else exit 1
+ fi
+ echo " OK"
+ fi
+fi
+
+if [ $do26 = yes ] ; then
+ echo $title26
+ if [ "$bits" = "8" -o "$bits" = "16" ] ; then
+ echo " Skipped when running 8/16-bit tests"
+ elif [ $utf -eq 0 ] ; then
+ echo " Skipped because UTF-$bits support is not available"
+ else
+ $sim $valgrind ./pcretest -q $bmode $testdata/testinput26 testtry
+ if [ $? = 0 ] ; then
+ $cf $testdata/testoutput26 testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
@@ -890,7 +1016,7 @@ if [ $do22 = yes ] ; then
fi
fi
-# End of loop for 8-bit/16-bit tests
+# End of loop for 8/16/32-bit tests
done
# Clean up local working files
diff --git a/configure.ac b/configure.ac
index a2f8daa..b09ef6a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,6 +19,7 @@ m4_define(pcre_date, [2012-08-08])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [1:1:0])
m4_define(libpcre16_version, [0:1:0])
+m4_define(libpcre32_version, [0:1:0])
m4_define(libpcreposix_version, [0:1:0])
m4_define(libpcrecpp_version, [0:0:0])
@@ -123,6 +124,13 @@ AC_ARG_ENABLE(pcre16,
, enable_pcre16=unset)
AC_SUBST(enable_pcre16)
+# Handle --enable-pcre32 (disabled by default)
+AC_ARG_ENABLE(pcre32,
+ AS_HELP_STRING([--enable-pcre32],
+ [enable 32 bit character support]),
+ , enable_pcre32=unset)
+AC_SUBST(enable_pcre32)
+
# Handle --disable-cpp. The substitution of enable_cpp is needed for use in
# pcre-config.
AC_ARG_ENABLE(cpp,
@@ -158,7 +166,7 @@ AC_ARG_ENABLE(utf8,
# Handle --enable-utf (disabled by default)
AC_ARG_ENABLE(utf,
AS_HELP_STRING([--enable-utf],
- [enable UTF-8/16 support (incompatible with --enable-ebcdic)]),
+ [enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
, enable_utf=unset)
# Handle --enable-unicode-properties
@@ -298,10 +306,16 @@ then
enable_pcre16=no
fi
+# Set the default value for pcre32
+if test "x$enable_pcre32" = "xunset"
+then
+ enable_pcre32=no
+fi
+
# Make sure enable_pcre8 or enable_pcre16 was set
-if test "x$enable_pcre8$enable_pcre16" = "xnono"
+if test "x$enable_pcre8$enable_pcre16$enable_pcre32" = "xnonono"
then
- AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled])
+ AC_MSG_ERROR([At least one of 8, 16 or 32 bit pcre library must be enabled])
fi
# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
@@ -309,7 +323,7 @@ if test "x$enable_unicode_properties" = "xyes"
then
if test "x$enable_utf" = "xno"
then
- AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support])
+ AC_MSG_ERROR([support for Unicode properties requires UTF-8/16/32 support])
fi
enable_utf=yes
fi
@@ -365,7 +379,7 @@ if test "x$enable_ebcdic" = "xyes"; then
enable_rebuild_chartables=yes
if test "x$enable_utf" = "xyes"; then
- AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time])
+ AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
fi
if test "x$enable_ebcdic_nl25" = "xno"; then
@@ -506,6 +520,7 @@ AC_SUBST(pcre_have_bits_type_traits)
# Conditional compilation
AM_CONDITIONAL(WITH_PCRE8, test "x$enable_pcre8" = "xyes")
AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
+AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
@@ -624,6 +639,11 @@ if test "$enable_pcre16" = "yes"; then
Define to any value to enable the 16 bit PCRE library.])
fi
+if test "$enable_pcre32" = "yes"; then
+ AC_DEFINE([SUPPORT_PCRE32], [], [
+ Define to any value to enable the 32 bit PCRE library.])
+fi
+
if test "$enable_jit" = "yes"; then
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
CC="$PTHREAD_CC"
@@ -642,10 +662,10 @@ fi
if test "$enable_utf" = "yes"; then
AC_DEFINE([SUPPORT_UTF], [], [
- Define to any value to enable support for the UTF-8/16 Unicode encoding.
+ Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
This will work even in an EBCDIC environment, but it is incompatible
with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
- code *or* ASCII/UTF-8/16, but not both at once.])
+ code *or* ASCII/UTF-8/16/32, but not both at once.])
fi
if test "$enable_unicode_properties" = "yes"; then
@@ -778,8 +798,8 @@ if test "$enable_ebcdic" = "yes"; then
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
strings are in EBCDIC. If you do not define this macro, PCRE will assume
- input strings are ASCII or UTF-8/16 Unicode. It is not possible to build a
- version of PCRE that supports both EBCDIC and UTF-8/16.])
+ input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
+ a version of PCRE that supports both EBCDIC and UTF-8/16/32.])
fi
if test "$enable_ebcdic_nl25" = "yes"; then
@@ -812,6 +832,9 @@ EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \
EXTRA_LIBPCRE16_LDFLAGS="$EXTRA_LIBPCRE16_LDFLAGS \
$NO_UNDEFINED -version-info libpcre16_version"
+EXTRA_LIBPCRE32_LDFLAGS="$EXTRA_LIBPCRE32_LDFLAGS \
+ $NO_UNDEFINED -version-info libpcre32_version"
+
EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \
$NO_UNDEFINED -version-info libpcreposix_version"
@@ -821,12 +844,13 @@ EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS \
AC_SUBST(EXTRA_LIBPCRE_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE16_LDFLAGS)
+AC_SUBST(EXTRA_LIBPCRE32_LDFLAGS)
AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS)
# When we run 'make distcheck', use these arguments. Turning off compiler
# optimization makes it run faster.
-DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-jit --enable-cpp --enable-unicode-properties"
+DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-pcre32 --enable-jit --enable-cpp --enable-unicode-properties"
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
@@ -897,6 +921,7 @@ AC_CONFIG_FILES(
Makefile
libpcre.pc
libpcre16.pc
+ libpcre32.pc
libpcreposix.pc
libpcrecpp.pc
pcre-config
@@ -942,9 +967,10 @@ $PACKAGE-$VERSION configuration summary:
Build 8 bit pcre library ........ : ${enable_pcre8}
Build 16 bit pcre library ....... : ${enable_pcre16}
+ Build 32 bit pcre library ....... : ${enable_pcre32}
Build C++ library ............... : ${enable_cpp}
Enable JIT compiling support .... : ${enable_jit}
- Enable UTF-8/16 support ......... : ${enable_utf}
+ Enable UTF-8/16/32 support ...... : ${enable_utf}
Unicode properties .............. : ${enable_unicode_properties}
Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
diff --git a/doc/index.html.src b/doc/index.html.src
index b9bb91f..c7bc196 100644
--- a/doc/index.html.src
+++ b/doc/index.html.src
@@ -21,6 +21,9 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcre16.html">pcre16</a></td>
<td>&nbsp;&nbsp;Discussion of the 16-bit PCRE library</td></tr>
+<tr><td><a href="pcre32.html">pcre32</a></td>
+ <td>&nbsp;&nbsp;Discussion of the 32-bit PCRE library</td></tr>
+
<tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
@@ -82,12 +85,13 @@ The HTML documentation for PCRE comprises the following pages:
<td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
- <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16 support</td></tr>
+ <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
</table>
<p>
-There are also individual pages that summarize the interface for each function
-in the library. There is a single page for each pair of 8-bit/16-bit functions.
+There are also individual pages that summarize the interface for each function
+in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
+functions.
</p>
<table>
@@ -166,6 +170,9 @@ in the library. There is a single page for each pair of 8-bit/16-bit functions.
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
+<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
+ <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
+
<tr><td><a href="pcre_version.html">pcre_version</a></td>
<td>&nbsp;&nbsp;Return PCRE version and release date</td></tr>
</table>
diff --git a/doc/pcre-config.1 b/doc/pcre-config.1
index 666378c..0430904 100644
--- a/doc/pcre-config.1
+++ b/doc/pcre-config.1
@@ -6,9 +6,9 @@ pcre-config - program to return PCRE configuration
.sp
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
.ti +5n
-.B [--libs16] [--libs-cpp] [--libs-posix] [--cflags]
+.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
.ti +5n
-.B [--cflags-posix]
+.B [--cflags] [--cflags-posix]
.
.
.SH DESCRIPTION
@@ -16,7 +16,8 @@ pcre-config - program to return PCRE configuration
.sp
\fBpcre-config\fP returns the configuration of the installed PCRE
libraries and the options required to compile a program to use them. Some of
-the options apply only to the 8-bit or 16-bit libraries, respectively, and are
+the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
+respectively, and are
not available if only one of those libraries has been built. If an unavailable
option is encountered, the "usage" information is output.
.
@@ -45,6 +46,10 @@ with the 8-bit PCRE library (\fB-lpcre\fP on many systems).
Writes to the standard output the command line options required to link
with the 16-bit PCRE library (\fB-lpcre16\fP on many systems).
.TP 10
+\fB--libs32\fP
+Writes to the standard output the command line options required to link
+with the 32-bit PCRE library (\fB-lpcre32\fP on many systems).
+.TP 10
\fB--libs-cpp\fP
Writes to the standard output the command line options required to link with
PCRE's C++ wrapper library (\fB-lpcrecpp\fP \fB-lpcre\fP on many
@@ -83,5 +88,5 @@ system. It has been subsequently revised as a generic PCRE man page.
.rs
.sp
.nf
-Last updated: 01 January 2012
+Last updated: 24 June 2012
.fi
diff --git a/doc/pcre.3 b/doc/pcre.3
index 4c5111c..0bf7d9d 100644
--- a/doc/pcre.3
+++ b/doc/pcre.3
@@ -18,17 +18,27 @@ UTF-8 strings), and a second library that supports 16-bit character strings
built. The majority of the work to make this possible was done by Zoltan
Herczeg.
.P
-The two libraries contain identical sets of functions, except that the names in
-the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP. To avoid
+Starting with release 8.FIXME, it is possible to compile a third separate PCRE
+library, which supports 32-bit character strings (including
+UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
+libraries.
+.P
+The three libraries contain identical sets of functions, except that the names in
+the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the names
+in the 32-bit library start with \fBpcre32_\fP instead of \fBpcre_\fP. To avoid
over-complication and reduce the documentation maintenance load, most of the
documentation describes the 8-bit library, with the differences for the 16-bit
-library described separately in the
+and 32-bit library described separately in the
.\" HREF
\fBpcre16\fP
+or
+.\" HREF
+\fBpcre32\fP
.\"
-page. References to functions or structures of the form \fIpcre[16]_xxx\fP
+page. References to functions or structures of the form \fIpcre[16|32]_xxx\fP
should be read as meaning "\fIpcre_xxx\fP when using the 8-bit library and
-\fIpcre16_xxx\fP when using the 16-bit library".
+\fIpcre16_xxx\fP when using the 16-bit library and
+\fIpcre32_xxx\fP when using the 32-bit library".
.P
The current implementation of PCRE corresponds approximately with Perl 5.12,
including support for UTF-8/16 encoded strings and Unicode general category
@@ -90,9 +100,9 @@ distribution.
The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with
-"_pcre_" or "_pcre16_", which hopefully will not provoke any name clashes. In
-some environments, it is possible to control which external symbols are
-exported when a shared library is built, and in these cases the undocumented
+"_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
+clashes. In some environments, it is possible to control which external symbols
+are exported when a shared library is built, and in these cases the undocumented
symbols are not exported.
.
.
@@ -107,6 +117,7 @@ of searching. The sections are as follows:
.sp
pcre this document
pcre16 details of the 16-bit library
+ pcre32 details of the 32-bit library
pcre-config show PCRE installation configuration information
pcreapi details of PCRE's native C API
pcrebuild options for building PCRE
@@ -129,7 +140,7 @@ of searching. The sections are as follows:
pcrestack discussion of stack usage
pcresyntax quick syntax reference
pcretest description of the \fBpcretest\fP testing command
- pcreunicode discussion of Unicode and UTF-8/16 support
+ pcreunicode discussion of Unicode and UTF-8/16/32 support
.sp
In addition, in the "man" and HTML formats, there is a short page for each
8-bit C library function, listing its arguments and results.
diff --git a/doc/pcre16.3 b/doc/pcre16.3
index 7b97099..adcbaee 100644
--- a/doc/pcre16.3
+++ b/doc/pcre16.3
@@ -278,8 +278,9 @@ page.
.P
For the \fBpcre16_config()\fP function there is an option PCRE_CONFIG_UTF16
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
-given to \fBpcre_config()\fP, or if the PCRE_CONFIG_UTF8 option is given to
-\fBpcre16_config()\fP, the result is the PCRE_ERROR_BADOPTION error.
+given to \fBpcre_config()\fP or \fBpcre32_config()\fP, or if the
+PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to \fBpcre16_config()\fP,
+the result is the PCRE_ERROR_BADOPTION error.
.
.
.SH "CHARACTER CODES"
@@ -354,12 +355,12 @@ files, but it can be used for testing the 16-bit library. If it is run with the
command line option \fB-16\fP, patterns and subject strings are converted from
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
-8-bit for output. If the 8-bit library was not compiled, \fBpcretest\fP
-defaults to 16-bit and the \fB-16\fP option is ignored.
+8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
+\fBpcretest\fP defaults to 16-bit and the \fB-16\fP option is ignored.
.P
When PCRE is being built, the \fBRunTest\fP script that is called by "make
-check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit
-and 16-bit libraries has been built, and runs the tests appropriately.
+check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
+16-bit and 32-bit libraries has been built, and runs the tests appropriately.
.
.
.SH "NOT SUPPORTED IN 16-BIT MODE"
diff --git a/doc/pcre32.3 b/doc/pcre32.3
new file mode 100644
index 0000000..e893306
--- /dev/null
+++ b/doc/pcre32.3
@@ -0,0 +1,388 @@
+.TH PCRE 3 "24 June 2012" "PCRE 8.31"
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.sp
+.B #include <pcre.h>
+.
+.
+.SH "PCRE 32-BIT API BASIC FUNCTIONS"
+.rs
+.sp
+.SM
+.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
+.ti +5n
+.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
+.ti +5n
+.B const unsigned char *\fItableptr\fP);
+.PP
+.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
+.ti +5n
+.B int *\fIerrorcodeptr\fP,
+.ti +5n
+.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
+.ti +5n
+.B const unsigned char *\fItableptr\fP);
+.PP
+.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
+.ti +5n
+.B const char **\fIerrptr\fP);
+.PP
+.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
+.PP
+.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
+.ti +5n
+.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
+.PP
+.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
+.ti +5n
+.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
+.ti +5n
+.B int *\fIworkspace\fP, int \fIwscount\fP);
+.
+.
+.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
+.rs
+.sp
+.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
+.ti +5n
+.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
+.PP
+.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
+.ti +5n
+.B int \fIbuffersize\fP);
+.PP
+.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
+.ti +5n
+.B PCRE_SPTR32 *\fIstringptr\fP);
+.PP
+.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIname\fP);
+.PP
+.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
+.PP
+.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, int \fIstringnumber\fP,
+.ti +5n
+.B PCRE_SPTR32 *\fIstringptr\fP);
+.PP
+.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
+.ti +5n
+.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
+.PP
+.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
+.PP
+.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
+.
+.
+.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
+.rs
+.sp
+.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
+.PP
+.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
+.PP
+.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
+.ti +5n
+.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
+.PP
+.B const unsigned char *pcre32_maketables(void);
+.PP
+.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B int \fIwhat\fP, void *\fIwhere\fP);
+.PP
+.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
+.PP
+.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
+.PP
+.B const char *pcre32_version(void);
+.PP
+.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
+.ti +5n
+.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
+.
+.
+.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
+.rs
+.sp
+.B void *(*pcre32_malloc)(size_t);
+.PP
+.B void (*pcre32_free)(void *);
+.PP
+.B void *(*pcre32_stack_malloc)(size_t);
+.PP
+.B void (*pcre32_stack_free)(void *);
+.PP
+.B int (*pcre32_callout)(pcre32_callout_block *);
+.
+.
+.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
+.rs
+.sp
+.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
+.ti +5n
+.B int \fIkeep_boms\fP);
+.
+.
+.SH "THE PCRE 32-BIT LIBRARY"
+.rs
+.sp
+Starting with release 8.FIXME, it is possible to compile a PCRE library that
+supports 32-bit character strings, including UTF-32 strings, as well as or
+instead of the original 8-bit library. The majority of the work to make this
+possible was done by Zoltan Herczeg for the 16-bit library. All three libraries
+contain identical sets of functions, used in exactly the same way. Only the
+names of the functions and the data types of their arguments and results are
+different. To avoid over-complication and reduce the documentation maintenance
+load, most of the PCRE documentation describes the 8-bit library, with only
+occasional references to the 16-bit and 32-bit libraries. This page describes
+what is different when you use the 32-bit library.
+.P
+WARNING: A single application can be linked with all or any of the three
+libraries, but you must take care when processing any particular pattern
+to use functions from just one library. For example, if you want to study
+a pattern that was compiled with \fBpcre32_compile()\fP, you must do so
+with \fBpcre32_study()\fP, not \fBpcre_study()\fP, and you must free the
+study data with \fBpcre32_free_study()\fP.
+.
+.
+.SH "THE HEADER FILE"
+.rs
+.sp
+There is only one header file, \fBpcre.h\fP. It contains prototypes for all the
+functions in both libraries, as well as definitions of flags, structures, error
+codes, etc.
+.
+.
+.SH "THE LIBRARY NAME"
+.rs
+.sp
+In Unix-like systems, the 32-bit library is called \fBlibpcre32\fP, and can
+normally be accesss by adding \fB-lpcre32\fP to the command for linking an
+application that uses PCRE.
+.
+.
+.SH "STRING TYPES"
+.rs
+.sp
+In the 8-bit library, strings are passed to PCRE library functions as vectors
+of bytes with the C type "char *". In the 32-bit library, strings are passed as
+vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
+appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
+very many environments, "unsigned int" is a 32-bit data type. When PCRE is built,
+it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is a 32-bit
+data type. If it is not, the build fails with an error message telling the
+maintainer to modify the definition appropriately.
+.
+.
+.SH "STRUCTURE TYPES"
+.rs
+.sp
+The types of the opaque structures that are used for compiled 32-bit patterns
+and JIT stacks are \fBpcre32\fP and \fBpcre32_jit_stack\fP respectively. The
+type of the user-accessible structure that is returned by \fBpcre32_study()\fP
+is \fBpcre32_extra\fP, and the type of the structure that is used for passing
+data to a callout function is \fBpcre32_callout_block\fP. These structures
+contain the same fields, with the same names, as their 8-bit counterparts. The
+only difference is that pointers to character strings are 32-bit instead of
+8-bit types.
+.
+.
+.SH "32-BIT FUNCTIONS"
+.rs
+.sp
+For every function in the 8-bit library there is a corresponding function in
+the 32-bit library with a name that starts with \fBpcre32_\fP instead of
+\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra
+function, \fBpcre32_utf32_to_host_byte_order()\fP. This is a utility function
+that converts a UTF-32 character string to host byte order if necessary. The
+other 32-bit functions expect the strings they are passed to be in host byte
+order.
+.P
+The \fIinput\fP and \fIoutput\fP arguments of
+\fBpcre32_utf32_to_host_byte_order()\fP may point to the same address, that is,
+conversion in place is supported. The output buffer must be at least as long as
+the input.
+.P
+The \fIlength\fP argument specifies the number of 32-bit data units in the
+input string; a negative value specifies a zero-terminated string.
+.P
+If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host
+byte order. This may be changed by byte-order marks (BOMs) anywhere in the
+string (commonly as the first character).
+.P
+If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it
+points means that the input starts off in host byte order, otherwise the
+opposite order is assumed. Again, BOMs in the string can change this. The final
+byte order is passed back at the end of processing.
+.P
+If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied
+into the output string. Otherwise they are discarded.
+.P
+The result of the function is the number of 32-bit units placed into the output
+buffer, including the zero terminator if the string was zero-terminated.
+.
+.
+.SH "SUBJECT STRING OFFSETS"
+.rs
+.sp
+The offsets within subject strings that are returned by the matching functions
+are in 32-bit units rather than bytes.
+.
+.
+.SH "NAMED SUBPATTERNS"
+.rs
+.sp
+The name-to-number translation table that is maintained for named subpatterns
+uses 32-bit characters. The \fBpcre32_get_stringtable_entries()\fP function
+returns the length of each entry in the table as the number of 32-bit data
+units.
+.
+.
+.SH "OPTION NAMES"
+.rs
+.sp
+There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
+which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
+fact, these new options define the same bits in the options word. There is a
+discussion about the
+.\" HTML <a href="pcreunicode.html#utf32strings">
+.\" </a>
+validity of UTF-32 strings
+.\"
+in the
+.\" HREF
+\fBpcreunicode\fP
+.\"
+page.
+.P
+For the \fBpcre32_config()\fP function there is an option PCRE_CONFIG_UTF32
+that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
+given to \fBpcre_config()\fP or \fBpcre16_config()\fP, or if the
+PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to \fBpcre32_config()\fP,
+the result is the PCRE_ERROR_BADOPTION error.
+.
+.
+.SH "CHARACTER CODES"
+.rs
+.sp
+In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
+same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
+from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
+than 0xff can therefore be influenced by the locale in the same way as before.
+Characters greater than 0xff have only one case, and no "type" (such as letter
+or digit).
+.P
+In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
+the exception of values in the range 0xd800 to 0xdfff because those are
+"surrogate" values that are ill-formed in UTF-32.
+.P
+A UTF-32 string can indicate its endianness by special code knows as a
+byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
+to be in host byte order. A utility function called
+\fBpcre32_utf32_to_host_byte_order()\fP is provided to help with this (see
+above).
+.
+.
+.SH "ERROR NAMES"
+.rs
+.sp
+The error PCRE_ERROR_BADUTF32_OFFSET correspond to its 8-bit counterpart.
+The error PCRE_ERROR_BADMODE is given when a compiled
+pattern is passed to a function that processes patterns in the other
+mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to
+\fBpcre32_exec()\fP.
+.P
+There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
+UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
+are described in the section entitled
+.\" HTML <a href="pcreapi.html#badutf8reasons">
+.\" </a>
+"Reason codes for invalid UTF-8 strings"
+.\"
+in the main
+.\" HREF
+\fBpcreapi\fP
+.\"
+page. The UTF-32 errors are:
+.sp
+ PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
+ PCRE_UTF32_ERR2 Invalid character 0xfffe
+.
+.
+.SH "ERROR TEXTS"
+.rs
+.sp
+If there is an error while compiling a pattern, the error text that is passed
+back by \fBpcre32_compile()\fP or \fBpcre32_compile2()\fP is still an 8-bit
+character string, zero-terminated.
+.
+.
+.SH "CALLOUTS"
+.rs
+.sp
+The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to
+a callout function point to 32-bit vectors.
+.
+.
+.SH "TESTING"
+.rs
+.sp
+The \fBpcretest\fP program continues to operate with 8-bit input and output
+files, but it can be used for testing the 32-bit library. If it is run with the
+command line option \fB-32\fP, patterns and subject strings are converted from
+8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
+are used instead of the 8-bit ones. Returned 32-bit strings are converted to
+8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
+\fBpcretest\fP defaults to 32-bit and the \fB-32\fP option is ignored.
+.P
+When PCRE is being built, the \fBRunTest\fP script that is called by "make
+check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
+16-bit and 32-bit libraries has been built, and runs the tests appropriately.
+.
+.
+.SH "NOT SUPPORTED IN 32-BIT MODE"
+.rs
+.sp
+Not all the features of the 8-bit library are available with the 32-bit
+library. The C++ and POSIX wrapper functions support only the 8-bit library,
+and the \fBpcregrep\fP program is at present 8-bit only.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 24 June 2012
+Copyright (c) 1997-2012 University of Cambridge.
+.fi
diff --git a/doc/pcre_assign_jit_stack.3 b/doc/pcre_assign_jit_stack.3
index fc32dda..e1563b6 100644
--- a/doc/pcre_assign_jit_stack.3
+++ b/doc/pcre_assign_jit_stack.3
@@ -1,4 +1,4 @@
-.TH PCRE_ASSIGN_JIT_STACK 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_ASSIGN_JIT_STACK 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,15 +14,19 @@ PCRE - Perl-compatible regular expressions
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
.ti +5n
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
+.PP
+.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
+.ti +5n
+.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
.
.SH DESCRIPTION
.rs
.sp
This function provides control over the memory used as a stack at run-time by a
-call to \fBpcre[16]_exec()\fP with a pattern that has been successfully
+call to \fBpcre[16|32]_exec()\fP with a pattern that has been successfully
compiled with JIT optimization. The arguments are:
.sp
- extra the data pointer returned by \fBpcre[16]_study()\fP
+ extra the data pointer returned by \fBpcre[16|32]_study()\fP
callback a callback function
data a JIT stack or a value to be passed to the callback
function
@@ -31,12 +35,12 @@ If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on
the machine stack is used.
.P
If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must
-be a valid JIT stack, the result of calling \fBpcre[16]_jit_stack_alloc()\fP.
+be a valid JIT stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
.P
If \fIcallback\fP not NULL, it is called with \fIdata\fP as an argument at
the start of matching, in order to set up a JIT stack. If the result is NULL,
the internal 32K stack is used; otherwise the return value must be a valid JIT
-stack, the result of calling \fBpcre[16]_jit_stack_alloc()\fP.
+stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
.P
You may safely assign the same JIT stack to multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
diff --git a/doc/pcre_compile.3 b/doc/pcre_compile.3
index c38c251..d09768d 100644
--- a/doc/pcre_compile.3
+++ b/doc/pcre_compile.3
@@ -1,4 +1,4 @@
-.TH PCRE_COMPILE 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -18,12 +18,18 @@ PCRE - Perl-compatible regular expressions
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
+.PP
+.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
+.ti +5n
+.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
+.ti +5n
+.B const unsigned char *\fItableptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function compiles a regular expression into an internal form. It is the
-same as \fBpcre[16]_compile2()\fP, except for the absence of the
+same as \fBpcre[16|32]_compile2()\fP, except for the absence of the
\fIerrorcodeptr\fP argument. Its arguments are:
.sp
\fIpattern\fP A zero-terminated string containing the
@@ -61,16 +67,20 @@ The option bits are:
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)
+ PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
+ validity (only relevant if
+ PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if
PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run in \fBpcre16_compile()\fP UTF-16 mode
+ PCRE_UTF32 Run in \fBpcre32_compile()\fP UTF-32 mode
PCRE_UTF8 Run in \fBpcre_compile()\fP UTF-8 mode
.sp
-PCRE must be built with UTF support in order to use PCRE_UTF8/16 and
-PCRE_NO_UTF8/16_CHECK, and with UCP support if PCRE_UCP is used.
+PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
+PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
.P
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
diff --git a/doc/pcre_compile2.3 b/doc/pcre_compile2.3
index 58b8a14..1fcae43 100644
--- a/doc/pcre_compile2.3
+++ b/doc/pcre_compile2.3
@@ -1,4 +1,4 @@
-.TH PCRE_COMPILE2 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -22,12 +22,20 @@ PCRE - Perl-compatible regular expressions
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
+.PP
+.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
+.ti +5n
+.B int *\fIerrorcodeptr\fP,
+.ti +5n
+.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
+.ti +5n
+.B const unsigned char *\fItableptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function compiles a regular expression into an internal form. It is the
-same as \fBpcre[16]_compile()\fP, except for the addition of the
+same as \fBpcre[16|32]_compile()\fP, except for the addition of the
\fIerrorcodeptr\fP argument. The arguments are:
.
.sp
@@ -67,16 +75,20 @@ The option bits are:
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)
+ PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
+ validity (only relevant if
+ PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if
PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run \fBpcre16_compile()\fP in UTF-16 mode
+ PCRE_UTF32 Run \fBpcre32_compile()\fP in UTF-32 mode
PCRE_UTF8 Run \fBpcre_compile()\fP in UTF-8 mode
.sp
-PCRE must be built with UTF support in order to use PCRE_UTF8/16 and
-PCRE_NO_UTF8/16_CHECK, and with UCP support if PCRE_UCP is used.
+PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
+PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
.P
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
diff --git a/doc/pcre_config.3 b/doc/pcre_config.3
index 45013a4..5a6e6be 100644
--- a/doc/pcre_config.3
+++ b/doc/pcre_config.3
@@ -1,4 +1,4 @@
-.TH PCRE_CONFIG 3 "21 January 2012" "PCRE 8.30"
+.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,6 +10,8 @@ PCRE - Perl-compatible regular expressions
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
+.PP
+.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
.
.SH DESCRIPTION
.rs
@@ -49,6 +51,8 @@ point to an unsigned long integer. The available codes are:
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes
0=no); option for \fBpcre16_config()\fP
+ PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes
+ 0=no); option for \fBpcre32_config()\fP
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no);
option for \fBpcre_config()\fP
PCRE_CONFIG_UNICODE_PROPERTIES
@@ -56,8 +60,10 @@ point to an unsigned long integer. The available codes are:
(1=yes 0=no)
.sp
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
-is also given if PCRE_CONFIG_UTF16 is passed to \fBpcre_config()\fP or if
-PCRE_CONFIG_UTF8 is passed to \fBpcre16_config()\fP.
+is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
+\fBpcre_config()\fP, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
+\fBpcre16_config()\fP, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
+\fBpcre32_config()\fP.
.P
There is a complete description of the PCRE native API in the
.\" HREF
diff --git a/doc/pcre_copy_named_substring.3 b/doc/pcre_copy_named_substring.3
index 9838816..e3281d8 100644
--- a/doc/pcre_copy_named_substring.3
+++ b/doc/pcre_copy_named_substring.3
@@ -1,4 +1,4 @@
-.TH PCRE_COPY_NAMED_SUBSTRING 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_COPY_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -22,6 +22,14 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
+.PP
+.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
+.ti +5n
+.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
.
.SH DESCRIPTION
.rs
@@ -31,8 +39,8 @@ by name, into a given buffer. The arguments are:
.sp
\fIcode\fP Pattern that was successfully matched
\fIsubject\fP Subject that has been successfully matched
- \fIovector\fP Offset vector that \fBpcre[16]_exec()\fP used
- \fIstringcount\fP Value returned by \fBpcre[16]_exec()\fP
+ \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
+ \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringname\fP Name of the required substring
\fIbuffer\fP Buffer to receive the string
\fIbuffersize\fP Size of buffer
diff --git a/doc/pcre_copy_substring.3 b/doc/pcre_copy_substring.3
index 6bb09f8..96bff3a 100644
--- a/doc/pcre_copy_substring.3
+++ b/doc/pcre_copy_substring.3
@@ -1,4 +1,4 @@
-.TH PCRE_COPY_SUBSTRING 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_COPY_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -18,6 +18,12 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
+.PP
+.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
+.ti +5n
+.B int \fIbuffersize\fP);
.
.SH DESCRIPTION
.rs
@@ -26,8 +32,8 @@ This is a convenience function for extracting a captured substring into a given
buffer. The arguments are:
.sp
\fIsubject\fP Subject that has been successfully matched
- \fIovector\fP Offset vector that \fBpcre[16]_exec()\fP used
- \fIstringcount\fP Value returned by \fBpcre[16]_exec()\fP
+ \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
+ \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringnumber\fP Number of the required substring
\fIbuffer\fP Buffer to receive the string
\fIbuffersize\fP Size of buffer
diff --git a/doc/pcre_dfa_exec.3 b/doc/pcre_dfa_exec.3
index 2df5d89..d1901a5 100644
--- a/doc/pcre_dfa_exec.3
+++ b/doc/pcre_dfa_exec.3
@@ -1,4 +1,4 @@
-.TH PCRE_DFA_EXEC 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -22,6 +22,14 @@ PCRE - Perl-compatible regular expressions
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
+.PP
+.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
+.ti +5n
+.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
+.ti +5n
+.B int *\fIworkspace\fP, int \fIwscount\fP);
.
.SH DESCRIPTION
.rs
@@ -29,11 +37,11 @@ PCRE - Perl-compatible regular expressions
This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
-matching function is \fBpcre[16]_exec()\fP. The arguments for this function
+matching function is \fBpcre[16|32]_exec()\fP. The arguments for this function
are:
.sp
\fIcode\fP Points to the compiled pattern
- \fIextra\fP Points to an associated \fBpcre[16]_extra\fP structure,
+ \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL
\fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes
@@ -64,6 +72,9 @@ The options are:
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
validity (only relevant if PCRE_UTF16
was set at compile time)
+ PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
+ validity (only relevant if PCRE_UTF32
+ was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
@@ -85,10 +96,10 @@ documentation. For details of partial matching, see the
.\"
page.
.P
-A \fBpcre[16]_extra\fP structure contains the following fields:
+A \fBpcre[16|32]_extra\fP structure contains the following fields:
.sp
\fIflags\fP Bits indicating which fields are set
- \fIstudy_data\fP Opaque data from \fBpcre[16]_study()\fP
+ \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
\fImatch_limit\fP Limit on internal resource use
\fImatch_limit_recursion\fP Limit on internal recursion depth
\fIcallout_data\fP Opaque data passed back to callouts
diff --git a/doc/pcre_exec.3 b/doc/pcre_exec.3
index 0ff0f6f..78012ed 100644
--- a/doc/pcre_exec.3
+++ b/doc/pcre_exec.3
@@ -1,4 +1,4 @@
-.TH PCRE_EXEC 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -18,6 +18,12 @@ PCRE - Perl-compatible regular expressions
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
+.PP
+.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
+.ti +5n
+.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.
.SH DESCRIPTION
.rs
@@ -27,7 +33,7 @@ string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are:
.sp
\fIcode\fP Points to the compiled pattern
- \fIextra\fP Points to an associated \fBpcre[16]_extra\fP structure,
+ \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL
\fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes
@@ -56,6 +62,9 @@ The options are:
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
validity (only relevant if PCRE_UTF16
was set at compile time)
+ PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
+ validity (only relevant if PCRE_UTF32
+ was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
@@ -71,7 +80,7 @@ For details of partial matching, see the
page. A \fBpcre_extra\fP structure contains the following fields:
.sp
\fIflags\fP Bits indicating which fields are set
- \fIstudy_data\fP Opaque data from \fBpcre[16]_study()\fP
+ \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
\fImatch_limit\fP Limit on internal resource use
\fImatch_limit_recursion\fP Limit on internal recursion depth
\fIcallout_data\fP Opaque data passed back to callouts
diff --git a/doc/pcre_free_study.3 b/doc/pcre_free_study.3
index 9fd5d80..8826b73 100644
--- a/doc/pcre_free_study.3
+++ b/doc/pcre_free_study.3
@@ -1,4 +1,4 @@
-.TH PCRE_FREE_STUDY 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_FREE_STUDY 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,12 +10,14 @@ PCRE - Perl-compatible regular expressions
.B void pcre_free_study(pcre_extra *\fIextra\fP);
.PP
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
+.PP
+.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is used to free the memory used for the data generated by a call
-to \fBpcre[16]_study()\fP when it is no longer needed. The argument must be the
+to \fBpcre[16|32]_study()\fP when it is no longer needed. The argument must be the
result of such a call.
.P
There is a complete description of the PCRE native API in the
diff --git a/doc/pcre_free_substring.3 b/doc/pcre_free_substring.3
index dff5bb0..88c0401 100644
--- a/doc/pcre_free_substring.3
+++ b/doc/pcre_free_substring.3
@@ -1,4 +1,4 @@
-.TH PCRE_FREE_SUBSTRING 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_FREE_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,12 +10,14 @@ PCRE - Perl-compatible regular expressions
.B void pcre_free_substring(const char *\fIstringptr\fP);
.PP
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
+.PP
+.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for freeing the store obtained by a previous
-call to \fBpcre[16]_get_substring()\fP or \fBpcre[16]_get_named_substring()\fP.
+call to \fBpcre[16|32]_get_substring()\fP or \fBpcre[16|32]_get_named_substring()\fP.
Its only argument is a pointer to the string.
.P
There is a complete description of the PCRE native API in the
diff --git a/doc/pcre_free_substring_list.3 b/doc/pcre_free_substring_list.3
index a587759..248b4bd 100644
--- a/doc/pcre_free_substring_list.3
+++ b/doc/pcre_free_substring_list.3
@@ -1,4 +1,4 @@
-.TH PCRE_FREE_SUBSTRING_LIST 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_FREE_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,12 +10,14 @@ PCRE - Perl-compatible regular expressions
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
.PP
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
+.PP
+.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for freeing the store obtained by a previous
-call to \fBpcre[16]_get_substring_list()\fP. Its only argument is a pointer to
+call to \fBpcre[16|32]_get_substring_list()\fP. Its only argument is a pointer to
the list of string pointers.
.P
There is a complete description of the PCRE native API in the
diff --git a/doc/pcre_fullinfo.3 b/doc/pcre_fullinfo.3
index 1c2a58f..d722bc7 100644
--- a/doc/pcre_fullinfo.3
+++ b/doc/pcre_fullinfo.3
@@ -1,4 +1,4 @@
-.TH PCRE_FULLINFO 3 "21 January 2012" "PCRE 8.30"
+.TH PCRE_FULLINFO 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,6 +14,10 @@ PCRE - Perl-compatible regular expressions
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
+.PP
+.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
+.ti +5n
+.B int \fIwhat\fP, void *\fIwhere\fP);
.
.SH DESCRIPTION
.rs
@@ -21,7 +25,7 @@ PCRE - Perl-compatible regular expressions
This function returns information about a compiled pattern. Its arguments are:
.sp
\fIcode\fP Compiled regular expression
- \fIextra\fP Result of \fBpcre[16]_study()\fP or NULL
+ \fIextra\fP Result of \fBpcre[16|32]_study()\fP or NULL
\fIwhat\fP What information is required
\fIwhere\fP Where to put the information
.sp
@@ -56,6 +60,7 @@ following \fIwhat\fP values:
PCRE_INFO_DEFAULT_TABLES const unsigned char *
PCRE_INFO_FIRSTTABLE const unsigned char *
PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library)
+ PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library)
PCRE_INFO_NAMETABLE const unsigned char * (8-bit library)
PCRE_INFO_OPTIONS unsigned long int
PCRE_INFO_SIZE size_t
diff --git a/doc/pcre_get_named_substring.3 b/doc/pcre_get_named_substring.3
index 88dd2da..f81a243 100644
--- a/doc/pcre_get_named_substring.3
+++ b/doc/pcre_get_named_substring.3
@@ -1,4 +1,4 @@
-.TH PCRE_GET_NAMED_SUBSTRING 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_GET_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -22,6 +22,14 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
+.PP
+.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
+.ti +5n
+.B PCRE_SPTR32 *\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
@@ -31,14 +39,14 @@ arguments are:
.sp
\fIcode\fP Compiled pattern
\fIsubject\fP Subject that has been successfully matched
- \fIovector\fP Offset vector that \fBpcre[16]_exec()\fP used
- \fIstringcount\fP Value returned by \fBpcre[16]_exec()\fP
+ \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
+ \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringname\fP Name of the required substring
\fIstringptr\fP Where to put the string pointer
.sp
The memory in which the substring is placed is obtained by calling
-\fBpcre[16]_malloc()\fP. The convenience function
-\fBpcre[16]_free_substring()\fP can be used to free it when it is no longer
+\fBpcre[16|32]_malloc()\fP. The convenience function
+\fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer
needed. The yield of the function is the length of the extracted substring,
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
diff --git a/doc/pcre_get_stringnumber.3 b/doc/pcre_get_stringnumber.3
index 79c52dc..7def00b 100644
--- a/doc/pcre_get_stringnumber.3
+++ b/doc/pcre_get_stringnumber.3
@@ -1,4 +1,4 @@
-.TH PCRE_GET_STRINGNUMBER 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_GET_STRINGNUMBER 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,6 +14,10 @@ PCRE - Perl-compatible regular expressions
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP);
+.PP
+.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIname\fP);
.
.SH DESCRIPTION
.rs
@@ -27,8 +31,8 @@ parenthesis in a compiled pattern. Its arguments are:
The yield of the function is the number of the parenthesis if the name is
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
-\fBpcre[16]_get_stringnumber()\fP. You can obtain the complete list by calling
-\fBpcre[16]_get_stringtable_entries()\fP.
+\fBpcre[16|32]_get_stringnumber()\fP. You can obtain the complete list by calling
+\fBpcre[16|32]_get_stringtable_entries()\fP.
.P
There is a complete description of the PCRE native API in the
.\" HREF
diff --git a/doc/pcre_get_stringtable_entries.3 b/doc/pcre_get_stringtable_entries.3
index a192e83..3917816 100644
--- a/doc/pcre_get_stringtable_entries.3
+++ b/doc/pcre_get_stringtable_entries.3
@@ -1,4 +1,4 @@
-.TH PCRE_GET_STRINGTABLE_ENTRIES 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_GET_STRINGTABLE_ENTRIES 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,6 +14,10 @@ PCRE - Perl-compatible regular expressions
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
+.PP
+.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
.
.SH DESCRIPTION
.rs
@@ -21,7 +25,7 @@ PCRE - Perl-compatible regular expressions
This convenience function finds, for a compiled pattern, the first and last
entries for a given name in the table that translates capturing parenthesis
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
-\fInot\fP set), it is usually easier to use \fBpcre[16]_get_stringnumber()\fP
+\fInot\fP set), it is usually easier to use \fBpcre[16|32]_get_stringnumber()\fP
instead.
.sp
\fIcode\fP Compiled regular expression
diff --git a/doc/pcre_get_substring.3 b/doc/pcre_get_substring.3
index 3af1948..d5bc60c 100644
--- a/doc/pcre_get_substring.3
+++ b/doc/pcre_get_substring.3
@@ -1,4 +1,4 @@
-.TH PCRE_GET_SUBSTRING 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_GET_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -18,6 +18,12 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
+.PP
+.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
+.ti +5n
+.B int \fIstringcount\fP, int \fIstringnumber\fP,
+.ti +5n
+.B PCRE_SPTR32 *\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
@@ -26,14 +32,14 @@ This is a convenience function for extracting a captured substring. The
arguments are:
.sp
\fIsubject\fP Subject that has been successfully matched
- \fIovector\fP Offset vector that \fBpcre[16]_exec()\fP used
- \fIstringcount\fP Value returned by \fBpcre[16]_exec()\fP
+ \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
+ \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringnumber\fP Number of the required substring
\fIstringptr\fP Where to put the string pointer
.sp
The memory in which the substring is placed is obtained by calling
-\fBpcre[16]_malloc()\fP. The convenience function
-\fBpcre[16]_free_substring()\fP can be used to free it when it is no longer
+\fBpcre[16|32]_malloc()\fP. The convenience function
+\fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer
needed. The yield of the function is the length of the substring,
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
PCRE_ERROR_NOSUBSTRING if the string number is invalid.
diff --git a/doc/pcre_get_substring_list.3 b/doc/pcre_get_substring_list.3
index 33c3a51..a1a5749 100644
--- a/doc/pcre_get_substring_list.3
+++ b/doc/pcre_get_substring_list.3
@@ -1,4 +1,4 @@
-.TH PCRE_GET_SUBSTRING_LIST 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_GET_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,6 +14,10 @@ PCRE - Perl-compatible regular expressions
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
+.PP
+.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
+.ti +5n
+.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
.
.SH DESCRIPTION
.rs
@@ -22,13 +26,13 @@ This is a convenience function for extracting a list of all the captured
substrings. The arguments are:
.sp
\fIsubject\fP Subject that has been successfully matched
- \fIovector\fP Offset vector that \fBpcre[16]_exec\fP used
- \fIstringcount\fP Value returned by \fBpcre[16]_exec\fP
+ \fIovector\fP Offset vector that \fBpcre[16|32]_exec\fP used
+ \fIstringcount\fP Value returned by \fBpcre[16|32]_exec\fP
\fIlistptr\fP Where to put a pointer to the list
.sp
The memory in which the substrings and the list are placed is obtained by
-calling \fBpcre[16]_malloc()\fP. The convenience function
-\fBpcre[16]_free_substring_list()\fP can be used to free it when it is no
+calling \fBpcre[16|32]_malloc()\fP. The convenience function
+\fBpcre[16|32]_free_substring_list()\fP can be used to free it when it is no
longer needed. A pointer to a list of pointers is put in the variable whose
address is in \fIlistptr\fP. The list is terminated by a NULL pointer. The
yield of the function is zero on success or PCRE_ERROR_NOMEMORY if sufficient
diff --git a/doc/pcre_jit_stack_alloc.3 b/doc/pcre_jit_stack_alloc.3
index b488d85..5d2a117 100644
--- a/doc/pcre_jit_stack_alloc.3
+++ b/doc/pcre_jit_stack_alloc.3
@@ -1,4 +1,4 @@
-.TH PCRE_JIT_STACK_ALLOC 3 "21 January 2012" "PCRE 8.30"
+.TH PCRE_JIT_STACK_ALLOC 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,14 +14,18 @@ PCRE - Perl-compatible regular expressions
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
.ti +5n
.B int \fImaxsize\fP);
+.PP
+.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
+.ti +5n
+.B int \fImaxsize\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is used to create a stack for use by the code compiled by the JIT
-optimization of \fBpcre[16]_study()\fP. The arguments are a starting size for
+optimization of \fBpcre[16|32]_study()\fP. The arguments are a starting size for
the stack, and a maximum size to which it is allowed to grow. The result can be
-passed to the JIT run-time code by \fBpcre[16]_assign_jit_stack()\fP, or that
+passed to the JIT run-time code by \fBpcre[16|32]_assign_jit_stack()\fP, or that
function can set up a callback for obtaining a stack. A maximum stack size of
512K to 1M should be more than enough for any pattern. For more details, see
the
diff --git a/doc/pcre_jit_stack_free.3 b/doc/pcre_jit_stack_free.3
index 9f6528b..494724e 100644
--- a/doc/pcre_jit_stack_free.3
+++ b/doc/pcre_jit_stack_free.3
@@ -1,4 +1,4 @@
-.TH PCRE_JIT_STACK_FREE 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_JIT_STACK_FREE 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,12 +10,14 @@ PCRE - Perl-compatible regular expressions
.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
.PP
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
+.PP
+.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is used to free a JIT stack that was created by
-\fBpcre[16]_jit_stack_alloc()\fP when it is no longer needed. For more details,
+\fBpcre[16|32]_jit_stack_alloc()\fP when it is no longer needed. For more details,
see the
.\" HREF
\fBpcrejit\fP
diff --git a/doc/pcre_maketables.3 b/doc/pcre_maketables.3
index 73b188b..b2c3d23 100644
--- a/doc/pcre_maketables.3
+++ b/doc/pcre_maketables.3
@@ -1,4 +1,4 @@
-.TH PCRE_MAKETABLES 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_MAKETABLES 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,13 +10,15 @@ PCRE - Perl-compatible regular expressions
.B const unsigned char *pcre_maketables(void);
.PP
.B const unsigned char *pcre16_maketables(void);
+.PP
+.B const unsigned char *pcre32_maketables(void);
.
.SH DESCRIPTION
.rs
.sp
This function builds a set of character tables for character values less than
-256. These can be passed to \fBpcre[16]_compile()\fP to override PCRE's
-internal, built-in tables (which were made by \fBpcre[16]_maketables()\fP when
+256. These can be passed to \fBpcre[16|32]_compile()\fP to override PCRE's
+internal, built-in tables (which were made by \fBpcre[16|32]_maketables()\fP when
PCRE was compiled). You might want to do this if you are using a non-standard
locale. The function yields a pointer to the tables.
.P
diff --git a/doc/pcre_pattern_to_host_byte_order.3 b/doc/pcre_pattern_to_host_byte_order.3
index 8c34473..4c306ee 100644
--- a/doc/pcre_pattern_to_host_byte_order.3
+++ b/doc/pcre_pattern_to_host_byte_order.3
@@ -1,4 +1,4 @@
-.TH PCRE_PATTERN_TO_HOST_BYTE_ORDER 3 "21 January 2012" "PCRE 8.30"
+.TH PCRE_PATTERN_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,7 +14,10 @@ PCRE - Perl-compatible regular expressions
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
.ti +5n
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
-.
+.PP
+.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
+.ti +5n
+.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.
.SH DESCRIPTION
.rs
@@ -25,7 +28,7 @@ pattern that has been compiled on one host is transferred to another that might
have different endianness. The arguments are:
.sp
\fIcode\fP A compiled regular expression
- \fIextra\fP Points to an associated \fBpcre[16]_extra\fP structure,
+ \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL
\fItables\fP Pointer to character tables, or NULL to
set the built-in default
diff --git a/doc/pcre_refcount.3 b/doc/pcre_refcount.3
index a30eecf..45a41fe 100644
--- a/doc/pcre_refcount.3
+++ b/doc/pcre_refcount.3
@@ -1,4 +1,4 @@
-.TH PCRE_REFCOUNT 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_REFCOUNT 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,6 +10,8 @@ PCRE - Perl-compatible regular expressions
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
.PP
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
+.PP
+.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
.
.SH DESCRIPTION
.rs
diff --git a/doc/pcre_study.3 b/doc/pcre_study.3
index 13ea6c4..1f2b465 100644
--- a/doc/pcre_study.3
+++ b/doc/pcre_study.3
@@ -1,4 +1,4 @@
-.TH PCRE_STUDY 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_STUDY 3 " 24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -14,6 +14,10 @@ PCRE - Perl-compatible regular expressions
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
+.PP
+.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
+.ti +5n
+.B const char **\fIerrptr\fP);
.
.SH DESCRIPTION
.rs
@@ -22,11 +26,11 @@ This function studies a compiled pattern, to see if additional information can
be extracted that might speed up matching. Its arguments are:
.sp
\fIcode\fP A compiled regular expression
- \fIoptions\fP Options for \fBpcre[16]_study()\fP
+ \fIoptions\fP Options for \fBpcre[16|32]_study()\fP
\fIerrptr\fP Where to put an error message
.sp
If the function succeeds, it returns a value that can be passed to
-\fBpcre[16]_exec()\fP or \fBpcre[16]_dfa_exec()\fP via their \fIextra\fP
+\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP via their \fIextra\fP
arguments.
.P
If the function returns NULL, either it could not find any additional
diff --git a/doc/pcre_utf32_to_host_byte_order.3 b/doc/pcre_utf32_to_host_byte_order.3
new file mode 100644
index 0000000..fa4c179
--- /dev/null
+++ b/doc/pcre_utf32_to_host_byte_order.3
@@ -0,0 +1,46 @@
+.TH PCRE_UTF32_TO_HOST_BYTE_ORDER 3 "24 June 2012" "PCRE 8.30"
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre.h>
+.PP
+.SM
+.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
+.ti +5n
+.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
+.ti +5n
+.B int \fIkeep_boms\fP);
+.
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function, which exists only in the 32-bit library, converts a UTF-32
+string to the correct order for the current host, taking account of any byte
+order marks (BOMs) within the string. Its arguments are:
+.sp
+ \fIoutput\fP pointer to output buffer, may be the same as \fIinput\fP
+ \fIinput\fP pointer to input buffer
+ \fIlength\fP number of 32-bit units in the input, or negative for
+ a zero-terminated string
+ \fIhost_byte_order\fP a NULL value or a non-zero value pointed to means
+ start in host byte order
+ \fIkeep_boms\fP if non-zero, BOMs are copied to the output string
+.sp
+The result of the function is the number of 32-bit units placed into the output
+buffer, including the zero terminator if the string was zero-terminated.
+.P
+If \fIhost_byte_order\fP is not NULL, it is set to indicate the byte order that
+is current at the end of the string.
+.P
+There is a complete description of the PCRE native API in the
+.\" HREF
+\fBpcreapi\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcreposix\fP
+.\"
+page.
diff --git a/doc/pcre_version.3 b/doc/pcre_version.3
index bcbd4f2..0f4973f 100644
--- a/doc/pcre_version.3
+++ b/doc/pcre_version.3
@@ -1,4 +1,4 @@
-.TH PCRE_VERSION 3 "13 January 2012" "PCRE 8.30"
+.TH PCRE_VERSION 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -10,13 +10,15 @@ PCRE - Perl-compatible regular expressions
.B const char *pcre_version(void);
.PP
.B const char *pcre16_version(void);
+.PP
+.B const char *pcre32_version(void);
.
.SH DESCRIPTION
.rs
.sp
-This function (even in the 16-bit library) returns a zero-terminated, 8-bit
-character string that gives the version number of the PCRE library and the date
-of its release.
+This function (even in the 16-bit and 32-bit libraries) returns a
+zero-terminated, 8-bit character string that gives the version number of the
+PCRE library and the date of its release.
.P
There is a complete description of the PCRE native API in the
.\" HREF
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index cfab3a8..de49b0f 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -134,14 +134,15 @@ PCRE - Perl-compatible regular expressions
.B int (*pcre_callout)(pcre_callout_block *);
.
.
-.SH "PCRE 8-BIT AND 16-BIT LIBRARIES"
+.SH "PCRE 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
.rs
.sp
From release 8.30, PCRE can be compiled as a library for handling 16-bit
character strings as well as, or instead of, the original library that handles
-8-bit character strings. To avoid too much complication, this document
-describes the 8-bit versions of the functions, with only occasional references
-to the 16-bit library.
+8-bit character strings. From release 8.FIXME, PCRE can also be compiled as a
+library for handling 32-bit character strings. To avoid too much complication,
+this document describes the 8-bit versions of the functions, with only
+occasional references to the 16-bit and 32-bit libraries.
.P
The 16-bit functions operate in the same way as their 8-bit counterparts; they
just use different data types for their arguments and results, and their names
@@ -150,6 +151,13 @@ in its name (for example, PCRE_UTF8), there is a corresponding 16-bit name with
UTF8 replaced by UTF16. This facility is in fact just cosmetic; the 16-bit
option names define the same bit values.
.P
+The 32-bit functions operate in the same way as their 8-bit counterparts; they
+just use different data types for their arguments and results, and their names
+start with \fBpcre32_\fP instead of \fBpcre_\fP. For every option that has UTF8
+in its name (for example, PCRE_UTF8), there is a corresponding 32-bit name with
+UTF8 replaced by UTF32. This facility is in fact just cosmetic; the 32-bit
+option names define the same bit values.
+.P
References to bytes and UTF-8 in this document should be read as references to
16-bit data quantities and UTF-16 when using the 16-bit library, unless
specified otherwise. More details of the specific differences for the 16-bit
@@ -159,6 +167,16 @@ library are given in the
.\"
page.
.
+.P
+References to bytes and UTF-8 in this document should be read as references to
+32-bit data quantities and UTF-32 when using the 32-bit library, unless
+specified otherwise. More details of the specific differences for the 32-bit
+library are given in the
+.\" HREF
+\fBpcre32\fP
+.\"
+page.
+.
.
.SH "PCRE API OVERVIEW"
.rs
@@ -392,15 +410,23 @@ not recognized. The following information is available:
PCRE_CONFIG_UTF8
.sp
The output is an integer that is set to one if UTF-8 support is available;
-otherwise it is set to zero. If this option is given to the 16-bit version of
-this function, \fBpcre16_config()\fP, the result is PCRE_ERROR_BADOPTION.
+otherwise it is set to zero. This value should normally be given to the 8-bit
+version of this function, \fBpcre_config()\fP. If it is given to the 16-bit
+or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
.sp
PCRE_CONFIG_UTF16
.sp
The output is an integer that is set to one if UTF-16 support is available;
otherwise it is set to zero. This value should normally be given to the 16-bit
version of this function, \fBpcre16_config()\fP. If it is given to the 8-bit
-version of this function, the result is PCRE_ERROR_BADOPTION.
+or 32-bit version of this function, the result is PCRE_ERROR_BADOPTION.
+.sp
+ PCRE_CONFIG_UTF32
+.sp
+The output is an integer that is set to one if UTF-32 support is available;
+otherwise it is set to zero. This value should normally be given to the 32-bit
+version of this function, \fBpcre32_config()\fP. If it is given to the 8-bit
+or 16-bit version of this function, the result is PCRE_ERROR_BADOPTION.
.sp
PCRE_CONFIG_UNICODE_PROPERTIES
.sp
@@ -442,10 +468,11 @@ or CRLF. The default can be overridden when a pattern is compiled or matched.
The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. For the 8-bit library, the value can
be 2, 3, or 4. For the 16-bit library, the value is either 2 or 4 and is still
-a number of bytes. The default value of 2 is sufficient for all but the most
-massive patterns, since it allows the compiled pattern to be up to 64K in size.
-Larger values allow larger regular expressions to be compiled, at the expense
-of slower matching.
+a number of bytes. For the 32-bit library, the value is either 2 or 4 and is
+still a number of bytes. The default value of 2 is sufficient for all but the
+most massive patterns, since it allows the compiled pattern to be up to 64K in
+size. Larger values allow larger regular expressions to be compiled, at the
+expense of slower matching.
.sp
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
.sp
@@ -858,8 +885,8 @@ validity checking of subject strings.
The following table lists the error codes than may be returned by
\fBpcre_compile2()\fP, along with the error messages that may be returned by
both compiling functions. Note that error messages are always 8-bit ASCII
-strings, even in 16-bit mode. As PCRE has developed, some error codes have
-fallen out of use. To avoid confusion, they have not been re-used.
+strings, even in 16-bit or 32-bit mode. As PCRE has developed, some error codes
+have fallen out of use. To avoid confusion, they have not been re-used.
.sp
0 no error
1 \e at end of pattern
@@ -942,6 +969,7 @@ fallen out of use. To avoid confusion, they have not been re-used.
74 invalid UTF-16 string (specifically UTF-16)
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
76 character value in \eu.... sequence is too large
+ 77 invalid UTF-32 string (specifically UTF-32)
.sp
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
@@ -1047,7 +1075,8 @@ find out the value in a calling program via the \fBpcre_fullinfo()\fP function.
Studying a pattern is also useful for non-anchored patterns that do not have a
single fixed starting character. A bitmap of possible starting bytes is
created. This speeds up finding a position in the subject at which to start
-matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256.)
+matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256.
+In 32-bit mode, the bitmap is used for 32-bit values less than 256.)
.P
These two optimizations apply to both \fBpcre_exec()\fP and
\fBpcre_dfa_exec()\fP, and the information is also used by the JIT compiler.
@@ -1192,8 +1221,8 @@ variable.
.P
If there is a fixed first value, for example, the letter "c" from a pattern
such as (cat|cow|coyote), its value is returned. In the 8-bit library, the
-value is always less than 256; in the 16-bit library the value can be up to
-0xffff.
+value is always less than 256. In the 16-bit library the value can be up to
+0xffff. In the 32-bit library the value can be up to 0x10ffff.
.P
If there is no fixed first value, and if either
.sp
@@ -1292,7 +1321,9 @@ length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
entry of the table. This is a pointer to \fBchar\fP in the 8-bit library, where
the first two bytes of each entry are the number of the capturing parenthesis,
most significant byte first. In the 16-bit library, the pointer points to
-16-bit data units, the first of which contains the parenthesis number. The rest
+16-bit data units, the first of which contains the parenthesis number.
+In the 32-bit library, the pointer points to 32-bit data units, the first of
+which contains the parenthesis number. The rest
of the entry is the corresponding name, zero terminated.
.P
The names are in alphabetical order. Duplicate names may appear if (?| is used
@@ -1490,6 +1521,9 @@ fields (not necessarily in this order):
.sp
In the 16-bit version of this structure, the \fImark\fP field has type
"PCRE_UCHAR16 **".
+.sp
+In the 32-bit version of this structure, the \fImark\fP field has type
+"PCRE_UCHAR32 **".
.P
The \fIflags\fP field is used to specify which of the other fields are set. The
flag bits are:
@@ -2126,7 +2160,7 @@ documentation for more details.
PCRE_ERROR_BADMODE (-28)
.sp
This error is given if a pattern that was compiled by the 8-bit library is
-passed to a 16-bit library function, or vice versa.
+passed to a 16-bit or 32-bit library function, or vice versa.
.sp
PCRE_ERROR_BADENDIANNESS (-29)
.sp
@@ -2147,6 +2181,10 @@ for the 16-bit library is given in the
.\" HREF
\fBpcre16\fP
.\"
+page. The corresponding information for the 32-bit library is given in the
+.\" HREF
+\fBpcre32\fP
+.\"
page.
.P
When \fBpcre_exec()\fP returns either PCRE_ERROR_BADUTF8 or
@@ -2665,9 +2703,10 @@ fail, this error is given.
.SH "SEE ALSO"
.rs
.sp
-\fBpcre16\fP(3), \fBpcrebuild\fP(3), \fBpcrecallout\fP(3), \fBpcrecpp(3)\fP(3),
-\fBpcrematching\fP(3), \fBpcrepartial\fP(3), \fBpcreposix\fP(3),
-\fBpcreprecompile\fP(3), \fBpcresample\fP(3), \fBpcrestack\fP(3).
+\fBpcre16\fP(3), \fBpcre32\fP(3), \fBpcrebuild\fP(3), \fBpcrecallout\fP(3),
+\fBpcrecpp(3)\fP(3), \fBpcrematching\fP(3), \fBpcrepartial\fP(3),
+\fBpcreposix\fP(3), \fBpcreprecompile\fP(3), \fBpcresample\fP(3),
+\fBpcrestack\fP(3).
.
.
.SH AUTHOR
diff --git a/doc/pcrebuild.3 b/doc/pcrebuild.3
index 5268f24..8c0694b 100644
--- a/doc/pcrebuild.3
+++ b/doc/pcrebuild.3
@@ -33,7 +33,7 @@ The following sections include descriptions of options whose names begin with
exists as well, but as it specifies the default, it is not described.
.
.
-.SH "BUILDING 8-BIT and 16-BIT LIBRARIES"
+.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
.rs
.sp
By default, a library called \fBlibpcre\fP is built, containing functions that
@@ -45,13 +45,21 @@ strings, by adding
.sp
--enable-pcre16
.sp
+to the \fBconfigure\fP command. You can also build a separate
+library, called \fBlibpcre32\fP, in which strings are contained in vectors of
+32-bit data units and interpreted either as single-unit characters or UTF-32
+strings, by adding
+.sp
+ --enable-pcre32
+.sp
to the \fBconfigure\fP command. If you do not want the 8-bit library, add
.sp
--disable-pcre8
.sp
as well. At least one of the two libraries must be built. Note that the C++ and
POSIX wrappers are for the 8-bit library only, and that \fBpcregrep\fP is an
-8-bit program. None of these are built if you select only the 16-bit library.
+8-bit program. None of these are built if you select only the 16-bit or 32-bit
+libraries.
.
.
.SH "BUILDING SHARED AND STATIC LIBRARIES"
@@ -79,7 +87,7 @@ strings). You can disable this by adding
to the \fBconfigure\fP command.
.
.
-.SH "UTF-8 and UTF-16 SUPPORT"
+.SH "UTF-8, UTF-16 AND UTF-32 SUPPORT"
.rs
.sp
To build PCRE with support for UTF Unicode character strings, add
@@ -87,18 +95,19 @@ To build PCRE with support for UTF Unicode character strings, add
--enable-utf
.sp
to the \fBconfigure\fP command. This setting applies to both libraries, adding
-support for UTF-8 to the 8-bit library and support for UTF-16 to the 16-bit
-library. There are no separate options for enabling UTF-8 and UTF-16
+support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
+library, and support for UTF-32 to the to the 32-bit library.
+There are no separate options for enabling UTF-8, UTF-16 and UTF-32
independently because that would allow ridiculous settings such as requesting
UTF-16 support while building only the 8-bit library. It is not possible to
build one library with UTF support and the other without in the same
configuration. (For backwards compatibility, --enable-utf8 is a synonym of
--enable-utf.)
.P
-Of itself, this setting does not make PCRE treat strings as UTF-8 or UTF-16. As
-well as compiling PCRE with this option, you also have have to set the
-PCRE_UTF8 or PCRE_UTF16 option when you call one of the pattern compiling
-functions.
+Of itself, this setting does not make PCRE treat strings as UTF-8, UTF-16 or
+UTF-32. As well as compiling PCRE with this option, you also have have to set
+the PCRE_UTF8 or PCRE_UTF16 or PCRE_UTF32 option when you call one of the
+pattern compiling functions.
.P
If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects
its input to be either ASCII or UTF-8 (depending on the run-time option). It is
@@ -231,9 +240,9 @@ three-byte or four-byte offsets by adding a setting such as
--with-link-size=3
.sp
to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the
-16-bit library, a value of 3 is rounded up to 4. Using longer offsets slows
-down the operation of PCRE because it has to load additional data when handling
-them.
+16-bit and 32-bit library, a value of 3 is rounded up to 4. Using longer offsets
+slows down the operation of PCRE because it has to load additional data when
+handling them.
.
.
.SH "AVOIDING EXCESSIVE STACK USAGE"
@@ -419,7 +428,7 @@ immediately before the \fBconfigure\fP command.
.SH "SEE ALSO"
.rs
.sp
-\fBpcreapi\fP(3), \fBpcre16\fP, \fBpcre_config\fP(3).
+\fBpcreapi\fP(3), \fBpcre16\fP, \fBpcre32\fP, \fBpcre_config\fP(3).
.
.
.SH AUTHOR
diff --git a/doc/pcrecallout.3 b/doc/pcrecallout.3
index 6d30111..5681335 100644
--- a/doc/pcrecallout.3
+++ b/doc/pcrecallout.3
@@ -1,19 +1,27 @@
-.TH PCRECALLOUT 3 "08 January 2012" "PCRE 8.30"
+.TH PCRECALLOUT 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
-.SH "PCRE CALLOUTS"
+.SH SYNOPSIS
.rs
.sp
+.B #include <pcre.h>
+.PP
+.SM
.B int (*pcre_callout)(pcre_callout_block *);
.PP
.B int (*pcre16_callout)(pcre16_callout_block *);
.PP
+.B int (*pcre32_callout)(pcre32_callout_block *);
+.
+.SH DESCRIPTION
+.rs
+.sp
PCRE provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE in the middle of pattern matching. The
caller of PCRE provides an external function by putting its entry point in the
global variable \fIpcre_callout\fP (\fIpcre16_callout\fP for the 16-bit
-library). By default, this variable contains NULL, which disables all calling
-out.
+library, \fIpcre32_callout\fP for the 32-bit library). By default, this
+variable contains NULL, which disables all calling out.
.P
Within a regular expression, (?C) indicates the points at which the external
function is to be called. Different callout points can be identified by putting
@@ -76,9 +84,10 @@ callouts such as the example above are obeyed.
.rs
.sp
During matching, when PCRE reaches a callout point, the external function
-defined by \fIpcre_callout\fP or \fIpcre16_callout\fP is called (if it is set).
-This applies to both normal and DFA matching. The only argument to the callout
-function is a pointer to a \fBpcre_callout\fP or \fBpcre16_callout\fP block.
+defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
+(if it is set). This applies to both normal and DFA matching. The only
+argument to the callout function is a pointer to a \fBpcre_callout\fP
+or \fBpcre[16|32]_callout\fP block.
These structures contains the following fields:
.sp
int \fIversion\fP;
@@ -86,6 +95,7 @@ These structures contains the following fields:
int *\fIoffset_vector\fP;
const char *\fIsubject\fP; (8-bit version)
PCRE_SPTR16 \fIsubject\fP; (16-bit version)
+ PCRE_SPTR32 \fIsubject\fP; (32-bit version)
int \fIsubject_length\fP;
int \fIstart_match\fP;
int \fIcurrent_position\fP;
@@ -96,6 +106,7 @@ These structures contains the following fields:
int \fInext_item_length\fP;
const unsigned char *\fImark\fP; (8-bit version)
const PCRE_UCHAR16 *\fImark\fP; (16-bit version)
+ const PCRE_UCHAR32 *\fImark\fP; (32-bit version)
.sp
The \fIversion\fP field is an integer containing the version number of the
block format. The initial version was 0; the current version is 2. The version
@@ -108,7 +119,7 @@ automatically generated callouts).
.P
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
passed by the caller to the matching function. When \fBpcre_exec()\fP or
-\fBpcre16_exec()\fP is used, the contents can be inspected, in order to extract
+\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For the DFA matching functions, this
field is not useful.
@@ -126,7 +137,7 @@ in the subject.
The \fIcurrent_position\fP field contains the offset within the subject of the
current match pointer.
.P
-When the \fBpcre_exec()\fP or \fBpcre16_exec()\fP is used, the
+When the \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP is used, the
\fIcapture_top\fP field contains one more than the number of the highest
numbered captured substring so far. If no substrings have been captured, the
value of \fIcapture_top\fP is one. This is always the case when the DFA
@@ -138,7 +149,7 @@ the case for the DFA matching functions.
.P
The \fIcallout_data\fP field contains a value that is passed to a matching
function specifically so that it can be passed back in callouts. It is passed
-in the \fIcallout_data\fP field of a \fBpcre_extra\fP or \fBpcre16_extra\fP
+in the \fIcallout_data\fP field of a \fBpcre_extra\fP or \fBpcre[16|32]_extra\fP
data structure. If no such data was passed, the value of \fIcallout_data\fP in
a callout block is NULL. There is a description of the \fBpcre_extra\fP
structure in the
@@ -162,7 +173,7 @@ help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts.
.P
The \fImark\fP field is present from version 2 of the callout structure. In
-callouts from \fBpcre_exec()\fP or \fBpcre16_exec()\fP it contains a pointer to
+callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
(*THEN) item in the match, or NULL if no such items have been passed. Instances
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
@@ -198,6 +209,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 08 Janurary 2012
+Last updated: 24 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrecompat.3 b/doc/pcrecompat.3
index 416c927..f24823f 100644
--- a/doc/pcrecompat.3
+++ b/doc/pcrecompat.3
@@ -1,4 +1,4 @@
-.TH PCRECOMPAT 3 "08 January 2012" "PCRE 8.30"
+.TH PCRECOMPAT 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
@@ -158,8 +158,9 @@ by the PCRE_BSR_ANYCRLF option.
different hosts that have the other endianness. However, this does not apply to
optimized data created by the just-in-time compiler.
.sp
-(k) The alternative matching functions (\fBpcre_dfa_exec()\fP and
-\fBpcre16_dfa_exec()\fP) match in a different way and are not Perl-compatible.
+(k) The alternative matching functions (\fBpcre_dfa_exec()\fP,
+\fBpcre16_dfa_exec()\fP and \fBpcre32_dfa_exec()\fP,) match in a different way
+and are not Perl-compatible.
.sp
(l) PCRE recognizes some special sequences such as (*CR) at the start of
a pattern that set overall options that cannot be changed within the pattern.
diff --git a/doc/pcrecpp.3 b/doc/pcrecpp.3
index fb1c00a..fbddd86 100644
--- a/doc/pcrecpp.3
+++ b/doc/pcrecpp.3
@@ -13,7 +13,7 @@ The C++ wrapper for PCRE was provided by Google Inc. Some additional
functionality was added by Giuseppe Maxia. This brief man page was constructed
from the notes in the \fIpcrecpp.h\fP file, which should be consulted for
further details. Note that the C++ wrapper supports only the original 8-bit
-PCRE library. There is no 16-bit support at present.
+PCRE library. There is no 16-bit or 32-bit support at present.
.
.
.SH "MATCHING INTERFACE"
diff --git a/doc/pcrejit.3 b/doc/pcrejit.3
index de935a4..7716ed6 100644
--- a/doc/pcrejit.3
+++ b/doc/pcrejit.3
@@ -18,14 +18,16 @@ It does not apply when the DFA matching function is being used. The code for
this support was written by Zoltan Herczeg.
.
.
-.SH "8-BIT and 16-BIT SUPPORT"
+.SH "8-BIT, 16-BIT AND 32-BIT SUPPORT"
.rs
.sp
-JIT support is available for both the 8-bit and 16-bit PCRE libraries. To keep
-this documentation simple, only the 8-bit interface is described in what
+JIT support is available for all of the 8-bit, 16-bit and 32-bit PCRE libraries.
+To keep this documentation simple, only the 8-bit interface is described in what
follows. If you are using the 16-bit library, substitute the 16-bit functions
and 16-bit structures (for example, \fIpcre16_jit_stack\fP instead of
-\fIpcre_jit_stack\fP).
+\fIpcre_jit_stack\fP). If you are using the 32-bit library, substitute the 32-bit functions
+and 32-bit structures (for example, \fIpcre32_jit_stack\fP instead of
+\fIpcre_jit_stack\fP).
.
.
.SH "AVAILABILITY OF JIT SUPPORT"
@@ -142,8 +144,9 @@ times as you like for matching different subject strings.
.rs
.sp
The only \fBpcre_exec()\fP options that are supported for JIT execution are
-PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NOTBOL, PCRE_NOTEOL,
-PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT.
+PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NO_UTF32_CHECK, PCRE_NOTBOL,
+PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
+PCRE_PARTIAL_SOFT.
.P
The unsupported pattern items are:
.sp
diff --git a/doc/pcrelimits.3 b/doc/pcrelimits.3
index 0e25f82..14ffbc4 100644
--- a/doc/pcrelimits.3
+++ b/doc/pcrelimits.3
@@ -1,4 +1,4 @@
-.TH PCRELIMITS 3 "04 May 2012" "PCRE 8.30"
+.TH PCRELIMITS 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "SIZE AND OTHER LIMITATIONS"
@@ -8,11 +8,12 @@ There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
.P
The maximum length of a compiled pattern is approximately 64K data units (bytes
-for the 8-bit library, 16-bit units for the 16-bit library) if PCRE is compiled
-with the default internal linkage size of 2 bytes. If you want to process
-regular expressions that are truly enormous, you can compile PCRE with an
-internal linkage size of 3 or 4 (when building the 16-bit library, 3 is rounded
-up to 4). See the \fBREADME\fP file in the source distribution and the
+for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
+the 32-bit library) if PCRE is compiled with the default internal linkage size
+of 2 bytes. If you want to process regular expressions that are truly enormous,
+you can compile PCRE with an internal linkage size of 3 or 4 (when building the
+16-bit or 32-bit library, 3 is rounded up to 4). See the \fBREADME\fP file in
+the source distribution and the
.\" HREF
\fBpcrebuild\fP
.\"
@@ -33,7 +34,7 @@ The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
.P
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
-is 255 for the 8-bit library and 65535 for the 16-bit library.
+is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
.P
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
diff --git a/doc/pcrematching.3 b/doc/pcrematching.3
index 1a510e0..a9977d5 100644
--- a/doc/pcrematching.3
+++ b/doc/pcrematching.3
@@ -6,19 +6,20 @@ PCRE - Perl-compatible regular expressions
.sp
This document describes the two different algorithms that are available in PCRE
for matching a compiled regular expression against a given subject string. The
-"standard" algorithm is the one provided by the \fBpcre_exec()\fP and
-\fBpcre16_exec()\fP functions. These work in the same was as Perl's matching
-function, and provide a Perl-compatible matching operation. The just-in-time
-(JIT) optimization that is described in the
+"standard" algorithm is the one provided by the \fBpcre_exec()\fP,
+\fBpcre16_exec()\fP and \fBpcre32_exec()\fP functions. These work in the same
+as as Perl's matching function, and provide a Perl-compatible matching operation.
+The just-in-time (JIT) optimization that is described in the
.\" HREF
\fBpcrejit\fP
.\"
documentation is compatible with these functions.
.P
-An alternative algorithm is provided by the \fBpcre_dfa_exec()\fP and
-\fBpcre16_dfa_exec()\fP functions; they operate in a different way, and are not
-Perl-compatible. This alternative has advantages and disadvantages compared
-with the standard algorithm, and these are described below.
+An alternative algorithm is provided by the \fBpcre_dfa_exec()\fP,
+\fBpcre16_dfa_exec()\fP and \fBpcre32_dfa_exec()\fP functions; they operate in
+a different way, and are not Perl-compatible. This alternative has advantages
+and disadvantages compared with the standard algorithm, and these are described
+below.
.P
When there is only one possible way in which a given subject string can match a
pattern, the two algorithms give the same answer. A difference arises, however,
@@ -140,9 +141,9 @@ and not on others), is not supported. It causes an error if encountered.
always 1, and the value of the \fIcapture_last\fP field is always -1.
.P
7. The \eC escape sequence, which (in the standard algorithm) always matches a
-single data unit, even in UTF-8 or UTF-16 modes, is not supported in these
-modes, because the alternative algorithm moves through the subject string one
-character (not data unit) at a time, for all active paths through the tree.
+single data unit, even in UTF-8, UTF-16 or UTF-32 modes, is not supported in
+these modes, because the alternative algorithm moves through the subject string
+one character (not data unit) at a time, for all active paths through the tree.
.P
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
diff --git a/doc/pcrepartial.3 b/doc/pcrepartial.3
index c93e3d1..d5cd74e 100644
--- a/doc/pcrepartial.3
+++ b/doc/pcrepartial.3
@@ -1,4 +1,4 @@
-.TH PCREPARTIAL 3 "24 February 2012" "PCRE 8.31"
+.TH PCREPARTIAL 3 "24 June 2012" "PCRE 8.31"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PARTIAL MATCHING IN PCRE"
@@ -33,8 +33,8 @@ the details differ between the two types of matching function. If both options
are set, PCRE_PARTIAL_HARD takes precedence.
.P
If you want to use partial matching with just-in-time optimized code, you must
-call \fBpcre_study()\fP or \fBpcre16_study()\fP with one or both of these
-options:
+call \fBpcre_study()\fP, \fBpcre16_study()\fP or \fBpcre32_study()\fP with one
+or both of these options:
.sp
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
@@ -52,11 +52,11 @@ matching string, and does not bother to run the matching function on shorter
strings. This optimization is also disabled for partial matching.
.
.
-.SH "PARTIAL MATCHING USING pcre_exec() OR pcre16_exec()"
+.SH "PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()"
.rs
.sp
A partial match occurs during a call to \fBpcre_exec()\fP or
-\fBpcre16_exec()\fP when the end of the subject string is reached successfully,
+\fBpcre[16|32]_exec()\fP when the end of the subject string is reached successfully,
but matching cannot continue because more characters are needed. However, at
least one character in the subject must have been inspected. This character
need not form part of the final matched string; lookbehind assertions and the
@@ -86,10 +86,10 @@ What happens when a partial match is identified depends on which of the two
partial matching options are set.
.
.
-.SS "PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre16_exec()"
+.SS "PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre[16|32]_exec()"
.rs
.sp
-If PCRE_PARTIAL_SOFT is set when \fBpcre_exec()\fP or \fBpcre16_exec()\fP
+If PCRE_PARTIAL_SOFT is set when \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP
identifies a partial match, the partial match is remembered, but matching
continues as normal, and other alternatives in the pattern are tried. If no
complete match can be found, PCRE_ERROR_PARTIAL is returned instead of
@@ -114,10 +114,10 @@ example, there are two partial matches, because "dog" on its own partially
matches the second alternative.)
.
.
-.SS "PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre16_exec()"
+.SS "PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre[16|32]_exec()"
.rs
.sp
-If PCRE_PARTIAL_HARD is set for \fBpcre_exec()\fP or \fBpcre16_exec()\fP,
+If PCRE_PARTIAL_HARD is set for \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP,
PCRE_ERROR_PARTIAL is returned as soon as a partial match is found, without
continuing to search for possible complete matches. This option is "hard"
because it prefers an earlier partial match over a later complete match. For
@@ -162,7 +162,7 @@ The second pattern will never match "dogsbody", because it will always find the
shorter match first.
.
.
-.SH "PARTIAL MATCHING USING pcre_dfa_exec() OR pcre16_dfa_exec()"
+.SH "PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()"
.rs
.sp
The DFA functions move along the subject string character by character, without
@@ -254,7 +254,7 @@ If the escape sequence \eP is present more than once in a \fBpcretest\fP data
line, the PCRE_PARTIAL_HARD option is set for the match.
.
.
-.SH "MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre16_dfa_exec()"
+.SH "MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()"
.rs
.sp
When a partial match has been found using a DFA matching function, it is
@@ -283,7 +283,7 @@ facility can be used to pass very long subject strings to the DFA matching
functions.
.
.
-.SH "MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre16_exec()"
+.SH "MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()"
.rs
.sp
From release 8.00, the standard matching functions can also be used to do
@@ -330,7 +330,7 @@ includes the effect of PCRE_NOTEOL.
offsets that are returned for a partial match. However a lookbehind assertion
later in the pattern could require even earlier characters to be inspected. You
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
-\fBpcre_fullinfo()\fP or \fBpcre16_fullinfo()\fP functions to obtain the length
+\fBpcre_fullinfo()\fP or \fBpcre[16|32]_fullinfo()\fP functions to obtain the length
of the largest lookbehind in the pattern. This length is given in characters,
not bytes. If you always retain at least that many characters before the
partially matched string, all should be well. (Of course, near the start of the
@@ -440,6 +440,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 24 February 2012
+Last updated: 24 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 1e2c078..c8091b7 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -21,15 +21,17 @@ published by O'Reilly, covers regular expressions in great detail. This
description of PCRE's regular expressions is intended as reference material.
.P
The original operation of PCRE was on strings of one-byte characters. However,
-there is now also support for UTF-8 strings in the original library, and a
-second library that supports 16-bit and UTF-16 character strings. To use these
+there is now also support for UTF-8 strings in the original library, an
+extra library that supports 16-bit and UTF-16 character strings, and an
+extra library that supports 32-bit and UTF-32 character strings. To use these
features, PCRE must be built to include appropriate support. When using UTF
-strings you must either call the compiling function with the PCRE_UTF8 or
-PCRE_UTF16 option, or the pattern must start with one of these special
-sequences:
+strings you must either call the compiling function with the PCRE_UTF8,
+PCRE_UTF16 or PCRE_UTF32 option, or the pattern must start with one of
+these special sequences:
.sp
(*UTF8)
(*UTF16)
+ (*UTF32)
.sp
Starting a pattern with such a sequence is equivalent to setting the relevant
option. This feature is not Perl-compatible. How setting a UTF mode affects
@@ -41,7 +43,7 @@ of features in the
page.
.P
Another special sequence that may appear at the start of a pattern or in
-combination with (*UTF8) or (*UTF16) is:
+combination with (*UTF8) or (*UTF16) or (*UTF32) is:
.sp
(*UCP)
.sp
@@ -57,12 +59,12 @@ of newlines; they are described below.
.P
The remainder of this document discusses the patterns that are supported by
PCRE when one its main matching functions, \fBpcre_exec()\fP (8-bit) or
-\fBpcre16_exec()\fP (16-bit), is used. PCRE also has alternative matching
-functions, \fBpcre_dfa_exec()\fP and \fBpcre16_dfa_exec()\fP, which match using
-a different algorithm that is not Perl-compatible. Some of the features
-discussed below are not available when DFA matching is used. The advantages and
-disadvantages of the alternative functions, and how they differ from the normal
-functions, are discussed in the
+\fBpcre[16|32]_exec()\fP (16- or 32-bit), is used. PCRE also has alternative
+matching functions, \fBpcre_dfa_exec()\fP and \fBpcre[16|32_dfa_exec()\fP,
+which match using a different algorithm that is not Perl-compatible. Some of
+the features discussed below are not available when DFA matching is used. The
+advantages and disadvantages of the alternative functions, and how they differ
+from the normal functions, are discussed in the
.\" HREF
\fBpcrematching\fP
.\"
@@ -280,9 +282,11 @@ between \ex{ and }, but the character code is constrained as follows:
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
16-bit non-UTF mode less than 0x10000
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
+ 32-bit non-UTF mode less than 0x80000000
+ 32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
.sp
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
-"surrogate" codepoints).
+"surrogate" codepoints), and 0xffef.
.P
If characters other than hexadecimal digits appear between \ex{ and }, or if
there is no terminating }, this form of escape is not recognized. Instead, the
@@ -568,7 +572,7 @@ change of newline convention; for example, a pattern can start with:
.sp
(*ANY)(*BSR_ANYCRLF)
.sp
-They can also be combined with the (*UTF8), (*UTF16), or (*UCP) special
+They can also be combined with the (*UTF8), (*UTF16), (*UTF32) or (*UCP) special
sequences. Inside a character class, \eR is treated as an unrecognized escape
sequence, and so matches the letter "R" by default, but causes an error if
PCRE_EXTRA is set.
@@ -779,7 +783,8 @@ a modifier or "other".
The Cs (Surrogate) property applies only to characters in the range U+D800 to
U+DFFF. Such characters are not valid in Unicode strings and so
cannot be tested by PCRE, unless UTF validity checking has been turned off
-(see the discussion of PCRE_NO_UTF8_CHECK and PCRE_NO_UTF16_CHECK in the
+(see the discussion of PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK and
+PCRE_NO_UTF32_CHECK in the
.\" HREF
\fBpcreapi\fP
.\"
@@ -1056,15 +1061,16 @@ name; PCRE does not support this.
.sp
Outside a character class, the escape sequence \eC matches any one data unit,
whether or not a UTF mode is set. In the 8-bit library, one data unit is one
-byte; in the 16-bit library it is a 16-bit unit. Unlike a dot, \eC always
+byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is
+a 32-bit unit. Unlike a dot, \eC always
matches line-ending characters. The feature is provided in Perl in order to
match individual bytes in UTF-8 mode, but it is unclear how it can usefully be
used. Because \eC breaks up characters into individual data units, matching one
unit with \eC in a UTF mode means that the rest of the string may start with a
malformed UTF character. This has undefined results, because PCRE assumes that
it is dealing with valid UTF strings (and by default it checks this at the
-start of processing unless the PCRE_NO_UTF8_CHECK or PCRE_NO_UTF16_CHECK option
-is used).
+start of processing unless the PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or
+PCRE_NO_UTF32_CHECK option is used).
.P
PCRE does not allow \eC to appear in lookbehind assertions
.\" HTML <a href="#lookbehind">
@@ -1123,9 +1129,9 @@ circumflex is not an assertion; it still consumes a character from the subject
string, and therefore it fails if the current pointer is at the end of the
string.
.P
-In UTF-8 (UTF-16) mode, characters with values greater than 255 (0xffff) can be
-included in a class as a literal string of data units, or by using the \ex{
-escaping mechanism.
+In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255 (0xffff)
+can be included in a class as a literal string of data units, or by using the
+\ex{ escaping mechanism.
.P
When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches
@@ -1338,9 +1344,10 @@ the section entitled
.\" </a>
"Newline sequences"
.\"
-above. There are also the (*UTF8), (*UTF16), and (*UCP) leading sequences that
-can be used to set UTF and Unicode property modes; they are equivalent to
-setting the PCRE_UTF8, PCRE_UTF16, and the PCRE_UCP options, respectively.
+above. There are also the (*UTF8), (*UTF16),(*UTF32) and (*UCP) leading
+sequences that can be used to set UTF and Unicode property modes; they are
+equivalent to setting the PCRE_UTF8, PCRE_UTF16, PCRE_UTF32 and the PCRE_UCP
+options, respectively.
.
.
.\" HTML <a name="subpattern"></a>
@@ -2602,8 +2609,8 @@ same pair of parentheses when there is a repetition.
PCRE provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE provides an external
function by putting its entry point in the global variable \fIpcre_callout\fP
-(8-bit library) or \fIpcre16_callout\fP (16-bit library). By default, this
-variable contains NULL, which disables all calling out.
+(8-bit library) or \fIpcre[16|32]_callout\fP (16-bit or 32-bit library).
+By default, this variable contains NULL, which disables all calling out.
.P
Within a regular expression, (?C) indicates the points at which the external
function is to be called. If you want to identify different callout points, you
@@ -2658,10 +2665,10 @@ parenthesis followed by an asterisk. They are generally of the form
(*VERB) or (*VERB:NAME). Some may take either form, with differing behaviour,
depending on whether or not an argument is present. A name is any sequence of
characters that does not include a closing parenthesis. The maximum length of
-name is 255 in the 8-bit library and 65535 in the 16-bit library. If the name
-is empty, that is, if the closing parenthesis immediately follows the colon,
-the effect is as if the colon were not there. Any number of these verbs may
-occur in a pattern.
+name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit library.
+If the name is empty, that is, if the closing parenthesis immediately follows
+the colon, the effect is as if the colon were not there. Any number of these
+verbs may occur in a pattern.
.
.
.\" HTML <a name="nooptimize"></a>
@@ -2946,7 +2953,7 @@ overrides.
.rs
.sp
\fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3),
-\fBpcresyntax\fP(3), \fBpcre\fP(3), \fBpcre16(3)\fP.
+\fBpcresyntax\fP(3), \fBpcre\fP(3), \fBpcre16(3)\fP, \fBpcre32(3)\fP.
.
.
.SH AUTHOR
diff --git a/doc/pcreperform.3 b/doc/pcreperform.3
index d56cf7e..fb2aa95 100644
--- a/doc/pcreperform.3
+++ b/doc/pcreperform.3
@@ -68,7 +68,7 @@ that PCRE cannot otherwise handle.
.SH "STACK USAGE AT RUN TIME"
.rs
.sp
-When \fBpcre_exec()\fP or \fBpcre16_exec()\fP is used for matching, certain
+When \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP is used for matching, certain
kinds of pattern can cause it to use large amounts of the process stack. In
some environments the default process stack is quite small, and if it runs out
the result is often SIGSEGV. This issue is probably the most frequently raised
diff --git a/doc/pcreposix.3 b/doc/pcreposix.3
index 411e548..b25a891 100644
--- a/doc/pcreposix.3
+++ b/doc/pcreposix.3
@@ -31,7 +31,7 @@ expression 8-bit library. See the
.\"
documentation for a description of PCRE's native API, which contains much
additional functionality. There is no POSIX-style wrapper for PCRE's 16-bit
-library.
+and 32-bit library.
.P
The functions described here are just wrapper functions that ultimately call
the PCRE native API. Their prototypes are defined in the \fBpcreposix.h\fP
diff --git a/doc/pcreprecompile.3 b/doc/pcreprecompile.3
index 13ee212..39eb82b 100644
--- a/doc/pcreprecompile.3
+++ b/doc/pcreprecompile.3
@@ -1,4 +1,4 @@
-.TH PCREPRECOMPILE 3 "10 January 2012" "PCRE 8.30"
+.TH PCREPRECOMPILE 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS"
@@ -18,7 +18,7 @@ JIT data.
.P
If you save compiled patterns to a file, you can copy them to a different host
and run them there. If the two hosts have different endianness (byte order),
-you should run the \fBpcre[16]_pattern_to_host_byte_order()\fP function on the
+you should run the \fBpcre[16|32]_pattern_to_host_byte_order()\fP function on the
new host before trying to match the pattern. The matching functions return
PCRE_ERROR_BADENDIANNESS if they detect a pattern with the wrong endianness.
.P
@@ -30,9 +30,9 @@ restoring a compiled pattern loses any JIT optimization data.
.SH "SAVING A COMPILED PATTERN"
.rs
.sp
-The value returned by \fBpcre[16]_compile()\fP points to a single block of
+The value returned by \fBpcre[16|32]_compile()\fP points to a single block of
memory that holds the compiled pattern and associated data. You can find the
-length of this block in bytes by calling \fBpcre[16]_fullinfo()\fP with an
+length of this block in bytes by calling \fBpcre[16|32]_fullinfo()\fP with an
argument of PCRE_INFO_SIZE. You can then save the data in any appropriate
manner. Here is sample code for the 8-bit library that compiles a pattern and
writes it to a file. It assumes that the variable \fIfd\fP refers to a file
@@ -68,8 +68,8 @@ If the pattern has been studied, it is also possible to save the normal study
data in a similar way to the compiled pattern itself. However, if the
PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is created cannot
be saved because it is too dependent on the current environment. When studying
-generates additional information, \fBpcre[16]_study()\fP returns a pointer to a
-\fBpcre[16]_extra\fP data block. Its format is defined in the
+generates additional information, \fBpcre[16|32]_study()\fP returns a pointer to a
+\fBpcre[16|32]_extra\fP data block. Its format is defined in the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
section on matching a pattern
@@ -79,10 +79,10 @@ in the
\fBpcreapi\fP
.\"
documentation. The \fIstudy_data\fP field points to the binary study data, and
-this is what you must save (not the \fBpcre[16]_extra\fP block itself). The
-length of the study data can be obtained by calling \fBpcre[16]_fullinfo()\fP
+this is what you must save (not the \fBpcre[16|32]_extra\fP block itself). The
+length of the study data can be obtained by calling \fBpcre[16|32]_fullinfo()\fP
with an argument of PCRE_INFO_STUDYSIZE. Remember to check that
-\fBpcre[16]_study()\fP did return a non-NULL value before trying to save the
+\fBpcre[16|32]_study()\fP did return a non-NULL value before trying to save the
study data.
.
.
@@ -90,15 +90,15 @@ study data.
.rs
.sp
Re-using a precompiled pattern is straightforward. Having reloaded it into main
-memory, called \fBpcre[16]_pattern_to_host_byte_order()\fP if necessary,
-you pass its pointer to \fBpcre[16]_exec()\fP or \fBpcre[16]_dfa_exec()\fP in
+memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary,
+you pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in
the usual way.
.P
However, if you passed a pointer to custom character tables when the pattern
-was compiled (the \fItableptr\fP argument of \fBpcre[16]_compile()\fP), you
-must now pass a similar pointer to \fBpcre[16]_exec()\fP or
-\fBpcre[16]_dfa_exec()\fP, because the value saved with the compiled pattern
-will obviously be nonsense. A field in a \fBpcre[16]_extra()\fP block is used
+was compiled (the \fItableptr\fP argument of \fBpcre[16|32]_compile()\fP), you
+must now pass a similar pointer to \fBpcre[16|32]_exec()\fP or
+\fBpcre[16|32]_dfa_exec()\fP, because the value saved with the compiled pattern
+will obviously be nonsense. A field in a \fBpcre[16|32]_extra()\fP block is used
to pass this data, as described in the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
@@ -116,10 +116,10 @@ functions to use PCRE's internal tables. Thus, you do not need to take any
special action at run time in this case.
.P
If you saved study data with the compiled pattern, you need to create your own
-\fBpcre[16]_extra\fP data block and set the \fIstudy_data\fP field to point to the
+\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point to the
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
\fIflags\fP field to indicate that study data is present. Then pass the
-\fBpcre[16]_extra\fP block to the matching function in the usual way. If the
+\fBpcre[16|32]_extra\fP block to the matching function in the usual way. If the
pattern was studied for just-in-time optimization, that data cannot be saved,
and so is lost by a save/restore cycle.
.
@@ -146,6 +146,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 10 January 2012
+Last updated: 24 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrestack.3 b/doc/pcrestack.3
index fdd7fd9..798f0bc 100644
--- a/doc/pcrestack.3
+++ b/doc/pcrestack.3
@@ -1,10 +1,10 @@
-.TH PCRESTACK 3 "21 January 2012" "PCRE 8.30"
+.TH PCRESTACK 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE DISCUSSION OF STACK USAGE"
.rs
.sp
-When you call \fBpcre[16]_exec()\fP, it makes use of an internal function
+When you call \fBpcre[16|32]_exec()\fP, it makes use of an internal function
called \fBmatch()\fP. This calls itself recursively at branch points in the
pattern, in order to remember the state of the match so that it can back up and
try a different alternative if the first one fails. As matching proceeds deeper
@@ -19,10 +19,10 @@ different numbers of a's. Furthermore, in a number of cases where the result of
the recursive call would immediately be passed back as the result of the
current call (a "tail recursion"), the function is just restarted instead.
.P
-The above comments apply when \fBpcre[16]_exec()\fP is run in its normal
+The above comments apply when \fBpcre[16|32]_exec()\fP is run in its normal
interpretive manner. If the pattern was studied with the
PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling was successful, and
-the options passed to \fBpcre[16]_exec()\fP were not incompatible, the matching
+the options passed to \fBpcre[16|32]_exec()\fP were not incompatible, the matching
process uses the JIT-compiled code instead of the \fBmatch()\fP function. In
this case, the memory requirements are handled entirely differently. See the
.\" HREF
@@ -30,21 +30,21 @@ this case, the memory requirements are handled entirely differently. See the
.\"
documentation for details.
.P
-The \fBpcre[16]_dfa_exec()\fP function operates in an entirely different way,
+The \fBpcre[16|32]_dfa_exec()\fP function operates in an entirely different way,
and uses recursion only when there is a regular expression recursion or
subroutine call in the pattern. This includes the processing of assertion and
"once-only" subpatterns, which are handled like subroutine calls. Normally,
these are never very deep, and the limit on the complexity of
-\fBpcre[16]_dfa_exec()\fP is controlled by the amount of workspace it is given.
+\fBpcre[16|32]_dfa_exec()\fP is controlled by the amount of workspace it is given.
However, it is possible to write patterns with runaway infinite recursions;
-such patterns will cause \fBpcre[16]_dfa_exec()\fP to run out of stack. At
+such patterns will cause \fBpcre[16|32]_dfa_exec()\fP to run out of stack. At
present, there is no protection against this.
.P
-The comments that follow do NOT apply to \fBpcre[16]_dfa_exec()\fP; they are
-relevant only for \fBpcre[16]_exec()\fP without the JIT optimization.
+The comments that follow do NOT apply to \fBpcre[16|32]_dfa_exec()\fP; they are
+relevant only for \fBpcre[16|32]_exec()\fP without the JIT optimization.
.
.
-.SS "Reducing \fBpcre[16]_exec()\fP's stack usage"
+.SS "Reducing \fBpcre[16|32]_exec()\fP's stack usage"
.rs
.sp
Each time that \fBmatch()\fP is actually called recursively, it uses memory
@@ -79,19 +79,19 @@ subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
.
.
-.SS "Compiling PCRE to use heap instead of stack for \fBpcre[16]_exec()\fP"
+.SS "Compiling PCRE to use heap instead of stack for \fBpcre[16|32]_exec()\fP"
.rs
.sp
In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points when
-\fBpcre[16]_exec()\fP is running. This makes it run a lot more slowly, however.
+\fBpcre[16|32]_exec()\fP is running. This makes it run a lot more slowly, however.
Details of how to do this are given in the
.\" HREF
\fBpcrebuild\fP
.\"
documentation. When built in this way, instead of using the stack, PCRE obtains
and frees memory by calling the functions that are pointed to by the
-\fBpcre[16]_stack_malloc\fP and \fBpcre[16]_stack_free\fP variables. By
+\fBpcre[16|32]_stack_malloc\fP and \fBpcre[16|32]_stack_free\fP variables. By
default, these point to \fBmalloc()\fP and \fBfree()\fP, but you can replace
the pointers to cause PCRE to use your own functions. Since the block sizes are
always the same, and are always freed in reverse order, it may be possible to
@@ -99,22 +99,22 @@ implement customized memory handlers that are more efficient than the standard
functions.
.
.
-.SS "Limiting \fBpcre[16]_exec()\fP's stack usage"
+.SS "Limiting \fBpcre[16|32]_exec()\fP's stack usage"
.rs
.sp
You can set limits on the number of times that \fBmatch()\fP is called, both in
-total and recursively. If a limit is exceeded, \fBpcre[16]_exec()\fP returns an
+total and recursively. If a limit is exceeded, \fBpcre[16|32]_exec()\fP returns an
error code. Setting suitable limits should prevent it from running out of
stack. The default values of the limits are very large, and unlikely ever to
operate. They can be changed when PCRE is built, and they can also be set when
-\fBpcre[16]_exec()\fP is called. For details of these interfaces, see the
+\fBpcre[16|32]_exec()\fP is called. For details of these interfaces, see the
.\" HREF
\fBpcrebuild\fP
.\"
documentation and the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
-section on extra data for \fBpcre[16]_exec()\fP
+section on extra data for \fBpcre[16|32]_exec()\fP
.\"
in the
.\" HREF
@@ -131,7 +131,7 @@ In Unix-like environments, the \fBpcretest\fP test program has a command line
option (\fB-S\fP) that can be used to increase the size of its stack. As long
as the stack is large enough, another option (\fB-M\fP) can be used to find the
smallest limits that allow a particular pattern to match a given subject
-string. This is done by calling \fBpcre[16]_exec()\fP repeatedly with different
+string. This is done by calling \fBpcre[16|32]_exec()\fP repeatedly with different
limits.
.
.
@@ -181,7 +181,7 @@ limit on stack size by code such as this:
.sp
This reads the current limits (soft and hard) using \fBgetrlimit()\fP, then
attempts to increase the soft limit to 100Mb using \fBsetrlimit()\fP. You must
-do this before calling \fBpcre[16]_exec()\fP.
+do this before calling \fBpcre[16|32]_exec()\fP.
.
.
.SS "Changing stack size in Mac OS X"
@@ -210,6 +210,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 21 January 2012
+Last updated: 24 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcresyntax.3 b/doc/pcresyntax.3
index 01ae778..f634d4b 100644
--- a/doc/pcresyntax.3
+++ b/doc/pcresyntax.3
@@ -1,4 +1,4 @@
-.TH PCRESYNTAX 3 "10 January 2012" "PCRE 8.30"
+.TH PCRESYNTAX 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -348,6 +348,7 @@ newline-setting options with similar syntax:
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
+ (*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
(*UCP) set PCRE_UCP (use Unicode properties for \ed etc)
.
.
@@ -442,7 +443,7 @@ pattern is not anchored.
.rs
.sp
These are recognized only at the very start of the pattern or after a
-(*BSR_...), (*UTF8), (*UTF16) or (*UCP) option.
+(*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
.sp
(*CR) carriage return only
(*LF) linefeed only
diff --git a/doc/pcretest.1 b/doc/pcretest.1
index 163ac63..c0441ad 100644
--- a/doc/pcretest.1
+++ b/doc/pcretest.1
@@ -18,9 +18,12 @@ options, see the
.\" HREF
\fBpcreapi\fP
.\"
-and
+,
.\" HREF
\fBpcre16\fP
+and
+.\" HREF
+\fBpcre32\fP
.\"
documentation.
.P
@@ -37,19 +40,21 @@ PCRE, and are unlikely to be of use otherwise. They are all documented here,
but without much justification.
.
.
-.SH "PCRE's 8-BIT and 16-BIT LIBRARIES"
+.SH "PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
.rs
.sp
From release 8.30, two separate PCRE libraries can be built. The original one
supports 8-bit character strings, whereas the newer 16-bit library supports
-character strings encoded in 16-bit units. The \fBpcretest\fP program can be
-used to test both libraries. However, it is itself still an 8-bit program,
-reading 8-bit input and writing 8-bit output. When testing the 16-bit library,
-the patterns and data strings are converted to 16-bit format before being
-passed to the PCRE library functions. Results are converted to 8-bit for
-output.
+character strings encoded in 16-bit units. From release 8.FIXME, a third
+library can be built, supporting character strings encoded in 32-bit units.
+The \fBpcretest\fP program can be
+used to test all three libraries. However, it is itself still an 8-bit program,
+reading 8-bit input and writing 8-bit output. When testing the 16-bit or 32-bit
+library, the patterns and data strings are converted to 16- or 32-bit format
+before being passed to the PCRE library functions. Results are converted to
+8-bit for output.
.P
-References to functions and structures of the form \fBpcre[16]_xx\fP below
+References to functions and structures of the form \fBpcre[16|32]_xx\fP below
mean "\fBpcre_xx\fP when using the 8-bit library or \fBpcre16_xx\fP when using
the 16-bit library".
.
@@ -58,10 +63,16 @@ the 16-bit library".
.rs
.TP 10
\fB-16\fP
-If both the 8-bit and the 16-bit libraries have been built, this option causes
-the 16-bit library to be used. If only the 16-bit library has been built, this
-is the default (so has no effect). If only the 8-bit library has been built,
-this option causes an error.
+If both the 8-bit or the 32-bit, and the 16-bit libraries have been built, this
+option causes the 16-bit library to be used. If only the 16-bit library has been
+built, this is the default (so has no effect). If only the 8-bit or the 32-bit
+library has been built, this option causes an error.
+.TP 10
+\fB-32\fP
+If both the 8-bit or the 16-bit, and the 32-bit libraries have been built, this
+option causes the 32-bit library to be used. If only the 32-bit library has been
+built, this is the default (so has no effect). If only the 8-bit or the 16-bit
+library has been built, this option causes an error.
.TP 10
\fB-b\fP
Behave as if each pattern has the \fB/B\fP (show byte code) modifier; the
@@ -89,9 +100,10 @@ The following options output 1 for true or zero for false:
ebcdic compiled for an EBCDIC environment
jit just-in-time support is available
pcre16 the 16-bit library was built
+ pcre32 the 32-bit library was built
pcre8 the 8-bit library was built
ucp Unicode property support is available
- utf UTF-8 and/or UTF-16 support is available
+ utf UTF-8 and/or UTF-16 and/or UTF-32 support is available
.TP 10
\fB-d\fP
Behave as if each pattern has the \fB/D\fP (debug) modifier; the internal
@@ -100,8 +112,8 @@ form and information about the compiled pattern is output after compilation;
.TP 10
\fB-dfa\fP
Behave as if each data line contains the \eD escape sequence; this causes the
-alternative matching function, \fBpcre[16]_dfa_exec()\fP, to be used instead of
-the standard \fBpcre[16]_exec()\fP function (more detail is given below).
+alternative matching function, \fBpcre[16|32]_dfa_exec()\fP, to be used instead
+of the standard \fBpcre[16|32]_exec()\fP function (more detail is given below).
.TP 10
\fB-help\fP
Output a brief summary these options and then exit.
@@ -113,7 +125,7 @@ compiled pattern is given after compilation.
\fB-M\fP
Behave as if each data line contains the \eM escape sequence; this causes
PCRE to discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings by
-calling \fBpcre[16]_exec()\fP repeatedly with different limits.
+calling \fBpcre[16|32]_exec()\fP repeatedly with different limits.
.TP 10
\fB-m\fP
Output the size of each compiled pattern after it has been compiled. This is
@@ -122,9 +134,10 @@ bytes for both libraries.
.TP 10
\fB-o\fP \fIosize\fP
Set the number of elements in the output vector that is used when calling
-\fBpcre[16]_exec()\fP or \fBpcre[16]_dfa_exec()\fP to be \fIosize\fP. The
+\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP to be \fIosize\fP. The
default value is 45, which is enough for 14 capturing subexpressions for
-\fBpcre[16]_exec()\fP or 22 different matches for \fBpcre[16]_dfa_exec()\fP.
+\fBpcre[16|32]_exec()\fP or 22 different matches for
+\fBpcre[16|32]_dfa_exec()\fP.
The vector size can be changed for individual matching calls by including \eO
in the data line (see below).
.TP 10
@@ -143,7 +156,7 @@ megabytes.
\fB-s\fP or \fB-s+\fP
Behave as if each pattern has the \fB/S\fP modifier; in other words, force each
pattern to be studied. If \fB-s+\fP is used, all the JIT compile options are
-passed to \fBpcre[16]_study()\fP, causing just-in-time optimization to be set
+passed to \fBpcre[16|32]_study()\fP, causing just-in-time optimization to be set
up if it is available, for both full and partial matching. Specific JIT compile
options can be selected by following \fB-s+\fP with a digit in the range 1 to
7, which selects the JIT compile modes as follows:
@@ -310,7 +323,7 @@ sections.
.sp
The \fB/i\fP, \fB/m\fP, \fB/s\fP, and \fB/x\fP modifiers set the PCRE_CASELESS,
PCRE_MULTILINE, PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
-\fBpcre[16]_compile()\fP is called. These four modifier letters have the same
+\fBpcre[16|32]_compile()\fP is called. These four modifier letters have the same
effect as they do in Perl. For example:
.sp
/caseless/i
@@ -329,6 +342,9 @@ options that do not correspond to anything in Perl:
\fB/8\fP PCRE_UTF16 ) when using the 16-bit
\fB/?\fP PCRE_NO_UTF16_CHECK ) library
.sp
+ \fB/8\fP PCRE_UTF32 ) when using the 32-bit
+ \fB/?\fP PCRE_NO_UTF32_CHECK ) library
+.sp
\fB/A\fP PCRE_ANCHORED
\fB/C\fP PCRE_AUTO_CALLOUT
\fB/E\fP PCRE_DOLLAR_ENDONLY
@@ -354,7 +370,7 @@ This example sets multiline matching with CRLF as the line ending sequence:
.sp
/^abc/m<CRLF>
.sp
-As well as turning on the PCRE_UTF8/16 option, the \fB/8\fP modifier causes
+As well as turning on the PCRE_UTF8/16/32 option, the \fB/8\fP modifier causes
all non-printing characters in output strings to be printed using the
\ex{hh...} notation. Otherwise, those less than 0x100 are output in hex without
the curly brackets.
@@ -373,12 +389,12 @@ Searching for all possible matches within each subject string can be requested
by the \fB/g\fP or \fB/G\fP modifier. After finding a match, PCRE is called
again to search the remainder of the subject string. The difference between
\fB/g\fP and \fB/G\fP is that the former uses the \fIstartoffset\fP argument to
-\fBpcre[16]_exec()\fP to start searching at a new point within the entire
+\fBpcre[16|32]_exec()\fP to start searching at a new point within the entire
string (which is in effect what Perl does), whereas the latter passes over a
shortened substring. This makes a difference to the matching process if the
pattern begins with a lookbehind assertion (including \eb or \eB).
.P
-If any call to \fBpcre[16]_exec()\fP in a \fB/g\fP or \fB/G\fP sequence matches
+If any call to \fBpcre[16|32]_exec()\fP in a \fB/g\fP or \fB/G\fP sequence matches
an empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
PCRE_ANCHORED flags set in order to search for another, non-empty, match at the
same point. If this second match fails, the start offset is advanced, and the
@@ -407,7 +423,7 @@ modifier because /S+ and /S++ have other meanings.
The \fB/=\fP modifier requests that the values of all potential captured
parentheses be output after a match. By default, only those up to the highest
one actually used in the match are output (corresponding to the return code
-from \fBpcre[16]_exec()\fP). Values in the offsets vector corresponding to
+from \fBpcre[16|32]_exec()\fP). Values in the offsets vector corresponding to
higher numbers should be set to -1, and these are output as "<unset>". This
modifier gives a way of checking that this is happening.
.P
@@ -431,15 +447,15 @@ below.
.P
The \fB/I\fP modifier requests that \fBpcretest\fP output information about the
compiled pattern (whether it is anchored, has a fixed first character, and
-so on). It does this by calling \fBpcre[16]_fullinfo()\fP after compiling a
+so on). It does this by calling \fBpcre[16|32]_fullinfo()\fP after compiling a
pattern. If the pattern is studied, the results of that are also output.
.P
The \fB/K\fP modifier requests \fBpcretest\fP to show names from backtracking
-control verbs that are returned from calls to \fBpcre[16]_exec()\fP. It causes
-\fBpcretest\fP to create a \fBpcre[16]_extra\fP block if one has not already
-been created by a call to \fBpcre[16]_study()\fP, and to set the
+control verbs that are returned from calls to \fBpcre[16|32]_exec()\fP. It causes
+\fBpcretest\fP to create a \fBpcre[16|32]_extra\fP block if one has not already
+been created by a call to \fBpcre[16|32]_study()\fP, and to set the
PCRE_EXTRA_MARK flag and the \fBmark\fP field within it, every time that
-\fBpcre[16]_exec()\fP is called. If the variable that the \fBmark\fP field
+\fBpcre[16|32]_exec()\fP is called. If the variable that the \fBmark\fP field
points to is non-NULL for a match, non-match, or partial match, \fBpcretest\fP
prints the string to which it points. For a match, this is shown on a line by
itself, tagged with "MK:". For a non-match it is added to the message.
@@ -450,24 +466,24 @@ example,
/pattern/Lfr_FR
.sp
For this reason, it must be the last modifier. The given locale is set,
-\fBpcre[16]_maketables()\fP is called to build a set of character tables for
-the locale, and this is then passed to \fBpcre[16]_compile()\fP when compiling
+\fBpcre[16|32]_maketables()\fP is called to build a set of character tables for
+the locale, and this is then passed to \fBpcre[16|32]_compile()\fP when compiling
the regular expression. Without an \fB/L\fP (or \fB/T\fP) modifier, NULL is
passed as the tables pointer; that is, \fB/L\fP applies only to the expression
on which it appears.
.P
The \fB/M\fP modifier causes the size in bytes of the memory block used to hold
the compiled pattern to be output. This does not include the size of the
-\fBpcre[16]\fP block; it is just the actual compiled data. If the pattern is
+\fBpcre[16|32]\fP block; it is just the actual compiled data. If the pattern is
successfully studied with the PCRE_STUDY_JIT_COMPILE option, the size of the
JIT compiled code is also output.
.P
-The \fB/S\fP modifier causes \fBpcre[16]_study()\fP to be called after the
+The \fB/S\fP modifier causes \fBpcre[16|32]_study()\fP to be called after the
expression has been compiled, and the results used when the expression is
matched. There are a number of qualifying characters that may follow \fB/S\fP.
They may appear in any order.
.P
-If \fBS\fP is followed by an exclamation mark, \fBpcre[16]_study()\fP is called
+If \fBS\fP is followed by an exclamation mark, \fBpcre[16|32]_study()\fP is called
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
\fBpcre_extra\fP block, even when studying discovers no useful information.
.P
@@ -478,7 +494,7 @@ never studied, independently of \fB-s\fP. This feature is used in the test
files in a few cases where the output is different when the pattern is studied.
.P
If the \fB/S\fP modifier is followed by a + character, the call to
-\fBpcre[16]_study()\fP is made with all the JIT study options, requesting
+\fBpcre[16|32]_study()\fP is made with all the JIT study options, requesting
just-in-time optimization support if it is available, for both normal and
partial matching. If you want to restrict the JIT compiling modes, you can
follow \fB/S+\fP with a digit in the range 1 to 7:
@@ -498,7 +514,7 @@ Note that there is also an independent \fB/+\fP modifier; it must not be given
immediately after \fB/S\fP or \fB/S+\fP because this will be misinterpreted.
.P
If JIT studying is successful, the compiled JIT code will automatically be used
-when \fBpcre[16]_exec()\fP is run, except when incompatible run-time options
+when \fBpcre[16|32]_exec()\fP is run, except when incompatible run-time options
are specified. For more details, see the
.\" HREF
\fBpcrejit\fP
@@ -512,7 +528,7 @@ option. This makes it possible to specify that JIT is never to be used for
certain patterns.
.P
The \fB/T\fP modifier must be followed by a single digit. It causes a specific
-set of built-in character tables to be passed to \fBpcre[16]_compile()\fP. It
+set of built-in character tables to be passed to \fBpcre[16|32]_compile()\fP. It
is used in the standard PCRE tests to check behaviour with different character
tables. The digit specifies the tables as follows:
.sp
@@ -547,7 +563,7 @@ ignored.
.SH "DATA LINES"
.rs
.sp
-Before each data line is passed to \fBpcre[16]_exec()\fP, leading and trailing
+Before each data line is passed to \fBpcre[16|32]_exec()\fP, leading and trailing
white space is removed, and it is then scanned for \e escapes. Some of these
are pretty esoteric features, intended for checking out some of the more
complicated features of PCRE. If you are just testing "ordinary" regular
@@ -566,20 +582,20 @@ recognized:
\et tab (\ex09)
\ev vertical tab (\ex0b)
\ennn octal character (up to 3 octal digits); always
- a byte unless > 255 in UTF-8 or 16-bit mode
+ a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
\exhh hexadecimal byte (up to 2 hex digits)
\ex{hh...} hexadecimal character (any number of hex digits)
.\" JOIN
- \eA pass the PCRE_ANCHORED option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \eA pass the PCRE_ANCHORED option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \eB pass the PCRE_NOTBOL option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \eB pass the PCRE_NOTBOL option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \eCdd call pcre[16]_copy_substring() for substring dd
+ \eCdd call pcre[16|32]_copy_substring() for substring dd
after a successful match (number less than 32)
.\" JOIN
- \eCname call pcre[16]_copy_named_substring() for substring
+ \eCname call pcre[16|32]_copy_named_substring() for substring
"name" after a successful match (name termin-
ated by next non alphanumeric character)
.\" JOIN
@@ -595,68 +611,68 @@ recognized:
.\" JOIN
\eC*n pass the number n (may be negative) as callout
data; this is used as the callout return value
- \eD use the \fBpcre[16]_dfa_exec()\fP match function
- \eF only shortest match for \fBpcre[16]_dfa_exec()\fP
+ \eD use the \fBpcre[16|32]_dfa_exec()\fP match function
+ \eF only shortest match for \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \eGdd call pcre[16]_get_substring() for substring dd
+ \eGdd call pcre[16|32]_get_substring() for substring dd
after a successful match (number less than 32)
.\" JOIN
- \eGname call pcre[16]_get_named_substring() for substring
+ \eGname call pcre[16|32]_get_named_substring() for substring
"name" after a successful match (name termin-
ated by next non-alphanumeric character)
.\" JOIN
\eJdd set up a JIT stack of dd kilobytes maximum (any
number of digits)
.\" JOIN
- \eL call pcre[16]_get_substringlist() after a
+ \eL call pcre[16|32]_get_substringlist() after a
successful match
.\" JOIN
\eM discover the minimum MATCH_LIMIT and
MATCH_LIMIT_RECURSION settings
.\" JOIN
- \eN pass the PCRE_NOTEMPTY option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP; if used twice, pass the
+ \eN pass the PCRE_NOTEMPTY option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP; if used twice, pass the
PCRE_NOTEMPTY_ATSTART option
.\" JOIN
\eOdd set the size of the output vector passed to
- \fBpcre[16]_exec()\fP to dd (any number of digits)
+ \fBpcre[16|32]_exec()\fP to dd (any number of digits)
.\" JOIN
- \eP pass the PCRE_PARTIAL_SOFT option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP; if used twice, pass the
+ \eP pass the PCRE_PARTIAL_SOFT option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP; if used twice, pass the
PCRE_PARTIAL_HARD option
.\" JOIN
\eQdd set the PCRE_MATCH_LIMIT_RECURSION limit to dd
(any number of digits)
- \eR pass the PCRE_DFA_RESTART option to \fBpcre[16]_dfa_exec()\fP
+ \eR pass the PCRE_DFA_RESTART option to \fBpcre[16|32]_dfa_exec()\fP
\eS output details of memory get/free calls during matching
.\" JOIN
- \eY pass the PCRE_NO_START_OPTIMIZE option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \eY pass the PCRE_NO_START_OPTIMIZE option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \eZ pass the PCRE_NOTEOL option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \eZ pass the PCRE_NOTEOL option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e? pass the PCRE_NO_UTF[8|16]_CHECK option to
- \fBpcre[16]_exec()\fP or \fBpcre[16]_dfa_exec()\fP
+ \e? pass the PCRE_NO_UTF[8|16|32]_CHECK option to
+ \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
\e>dd start the match at offset dd (optional "-"; then
any number of digits); this sets the \fIstartoffset\fP
- argument for \fBpcre[16]_exec()\fP or \fBpcre[16]_dfa_exec()\fP
+ argument for \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e<cr> pass the PCRE_NEWLINE_CR option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \e<cr> pass the PCRE_NEWLINE_CR option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e<lf> pass the PCRE_NEWLINE_LF option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \e<lf> pass the PCRE_NEWLINE_LF option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e<crlf> pass the PCRE_NEWLINE_CRLF option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \e<crlf> pass the PCRE_NEWLINE_CRLF option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e<anycrlf> pass the PCRE_NEWLINE_ANYCRLF option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \e<anycrlf> pass the PCRE_NEWLINE_ANYCRLF option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.\" JOIN
- \e<any> pass the PCRE_NEWLINE_ANY option to \fBpcre[16]_exec()\fP
- or \fBpcre[16]_dfa_exec()\fP
+ \e<any> pass the PCRE_NEWLINE_ANY option to \fBpcre[16|32]_exec()\fP
+ or \fBpcre[16|32]_dfa_exec()\fP
.sp
The use of \ex{hh...} is not dependent on the use of the \fB/8\fP modifier on
the pattern. It is recognized always. There may be any number of hexadecimal
@@ -672,6 +688,9 @@ for values less than 256, and causes an error for greater values.
In UTF-16 mode, all 4-digit \ex{hhhh} values are accepted. This makes it
possible to construct invalid UTF-16 sequences for testing purposes.
.P
+In UTF-32 mode, all 4- to 8-digit \ex{...} values are accepted. This makes it
+possible to construct invalid UTF-32 sequences for testing purposes.
+.P
The escapes that specify line ending sequences are literal strings, exactly as
shown. No more than one newline setting should be present in any data line.
.P
@@ -685,12 +704,12 @@ used by the just-in-time optimization code. It is ignored if JIT optimization
is not being used. Providing a stack that is larger than the default 32K is
necessary only for very complicated patterns.
.P
-If \eM is present, \fBpcretest\fP calls \fBpcre[16]_exec()\fP several times,
+If \eM is present, \fBpcretest\fP calls \fBpcre[16|32]_exec()\fP several times,
with different values in the \fImatch_limit\fP and \fImatch_limit_recursion\fP
-fields of the \fBpcre[16]_extra\fP data structure, until it finds the minimum
-numbers for each parameter that allow \fBpcre[16]_exec()\fP to complete without
+fields of the \fBpcre[16|32]_extra\fP data structure, until it finds the minimum
+numbers for each parameter that allow \fBpcre[16|32]_exec()\fP to complete without
error. Because this is testing a specific feature of the normal interpretive
-\fBpcre[16]_exec()\fP execution, the use of any JIT optimization that might
+\fBpcre[16|32]_exec()\fP execution, the use of any JIT optimization that might
have been set up by the \fB/S+\fP qualifier of \fB-s+\fP option is disabled.
.P
The \fImatch_limit\fP number is a measure of the amount of backtracking
@@ -703,7 +722,7 @@ needed to complete the match attempt.
.P
When \eO is used, the value specified may be higher or lower than the size set
by the \fB-O\fP command line option (or defaulted to 45); \eO applies only to
-the call of \fBpcre[16]_exec()\fP for the line in which it appears.
+the call of \fBpcre[16|32]_exec()\fP for the line in which it appears.
.P
If the \fB/P\fP modifier was present on the pattern, causing the POSIX wrapper
API to be used, the only option-setting sequences that have any effect are \eB,
@@ -715,8 +734,8 @@ to be passed to \fBregexec()\fP.
.rs
.sp
By default, \fBpcretest\fP uses the standard PCRE matching function,
-\fBpcre[16]_exec()\fP to match each data line. PCRE also supports an
-alternative matching function, \fBpcre[16]_dfa_test()\fP, which operates in a
+\fBpcre[16|32]_exec()\fP to match each data line. PCRE also supports an
+alternative matching function, \fBpcre[16|32]_dfa_test()\fP, which operates in a
different way, and has some restrictions. The differences between the two
functions are described in the
.\" HREF
@@ -735,13 +754,13 @@ found. This is always the shortest possible match.
.rs
.sp
This section describes the output when the normal matching function,
-\fBpcre[16]_exec()\fP, is being used.
+\fBpcre[16|32]_exec()\fP, is being used.
.P
When a match succeeds, \fBpcretest\fP outputs the list of captured substrings
-that \fBpcre[16]_exec()\fP returns, starting with number 0 for the string that
+that \fBpcre[16|32]_exec()\fP returns, starting with number 0 for the string that
matched the whole pattern. Otherwise, it outputs "No match" when the return is
PCRE_ERROR_NOMATCH, and "Partial match:" followed by the partially matching
-substring when \fBpcre[16]_exec()\fP returns PCRE_ERROR_PARTIAL. (Note that
+substring when \fBpcre[16|32]_exec()\fP returns PCRE_ERROR_PARTIAL. (Note that
this is the entire substring that was inspected during the partial match; it
may include characters before the actual match start if a lookbehind assertion,
\eK, \eb, or \eB was involved.) For any other return, \fBpcretest\fP outputs
@@ -761,7 +780,7 @@ at least two. Here is an example of an interactive \fBpcretest\fP run.
No match
.sp
Unset capturing substrings that are not followed by one that is set are not
-returned by \fBpcre[16]_exec()\fP, and are not shown by \fBpcretest\fP. In the
+returned by \fBpcre[16|32]_exec()\fP, and are not shown by \fBpcretest\fP. In the
following example, there are two capturing substrings, but when the first data
line is matched, the second, unset substring is not shown. An "internal" unset
substring is shown as "<unset>", as for the second data line.
@@ -824,7 +843,7 @@ the newline sequence setting).
.SH "OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION"
.rs
.sp
-When the alternative matching function, \fBpcre[16]_dfa_exec()\fP, is used (by
+When the alternative matching function, \fBpcre[16|32]_dfa_exec()\fP, is used (by
means of the \eD escape sequence or the \fB-dfa\fP command line option), the
output consists of a list of all the matches that start at the first point in
the subject where there is at least one match. For example:
@@ -1030,7 +1049,8 @@ result is undefined.
.SH "SEE ALSO"
.rs
.sp
-\fBpcre\fP(3), \fBpcre16\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3),
+\fBpcre\fP(3), \fBpcre16\fP(3), \fBpcre32\fP(3), \fBpcreapi\fP(3),
+\fBpcrecallout\fP(3),
\fBpcrejit\fP, \fBpcrematching\fP(3), \fBpcrepartial\fP(d),
\fBpcrepattern\fP(3), \fBpcreprecompile\fP(3).
.
diff --git a/doc/pcreunicode.3 b/doc/pcreunicode.3
index 7777f2e..38309e5 100644
--- a/doc/pcreunicode.3
+++ b/doc/pcreunicode.3
@@ -7,7 +7,10 @@ PCRE - Perl-compatible regular expressions
From Release 8.30, in addition to its previous UTF-8 support, PCRE also
supports UTF-16 by means of a separate 16-bit library. This can be built as
well as, or instead of, the 8-bit library.
-.
+.P
+From Release 8.FIXME, in addition to its previous UTF-8 and UTF-16 support,
+PCRE also supports UTF-32 by means of a separate 32-bit library. This can be
+built as well as, or instead of, the 8-bit and 16-bit libraries.
.
.SH "UTF-8 SUPPORT"
.rs
@@ -38,12 +41,27 @@ strings that are matched against it are treated as UTF-16 strings instead of
strings of 16-bit characters.
.
.
+.SH "UTF-32 SUPPORT"
+.rs
+.sp
+In order process UTF-32 strings, you must build PCRE's 32-bit library with UTF
+support, and, in addition, you must call
+.\" HTML <a href="pcre_compile.html">
+.\" </a>
+\fBpcre32_compile()\fP
+.\"
+with the PCRE_UTF32 option flag, or the pattern must start with the sequence
+(*UTF32). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF-32 strings instead of
+strings of 32-bit characters.
+.
+.
.SH "UTF SUPPORT OVERHEAD"
.rs
.sp
If you compile PCRE with UTF support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
-to testing the PCRE_UTF8/16 flag occasionally, so should not be very big.
+to testing the PCRE_UTF[8|16|32] flag occasionally, so should not be very big.
.
.
.SH "UNICODE PROPERTY SUPPORT"
@@ -138,6 +156,28 @@ the pattern or subject it is given (respectively) contains only valid UTF-16
sequences. In this case, it does not diagnose an invalid UTF-16 string.
.
.
+.\" HTML <a name="utf32strings"></a>
+.SS "Validity of UTF-32 strings"
+.rs
+.sp
+When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
+passed as patterns and subjects are (by default) checked for validity on entry
+to the relevant functions. This check allows only values in the range U+0
+to U+10FFFF, excluding the surrogate are U+D800 to U+DFFF, and U+FFEF.
+.P
+If an invalid UTF-32 string is passed to PCRE, an error return is given. At
+compile time, the only additional information is the offset to the first data
+unit of the failing character. The run-time functions \fBpcre32_exec()\fP and
+\fBpcre32_dfa_exec()\fP also pass back this information, as well as a more
+detailed reason code if the caller has provided memory in which to do this.
+.P
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance. If you set
+the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that
+the pattern or subject it is given (respectively) contains only valid UTF-32
+sequences. In this case, it does not diagnose an invalid UTF-32 string.
+.
+.
.SS "General comments about UTF modes"
.rs
.sp
@@ -155,15 +195,15 @@ data units, for example: \ex{100}{3}.
unit.
.P
5. The escape sequence \eC can be used to match a single byte in UTF-8 mode, or
-a single 16-bit data unit in UTF-16 mode, but its use can lead to some strange
-effects because it breaks up multi-unit characters (see the description of \eC
-in the
+a single 16-bit data unit in UTF-16 mode, or a single 32-bit data unit in
+UTF-32 mode, but its use can lead to some strange effects because it breaks up
+multi-unit characters (see the description of \eC in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation). The use of \eC is not supported in the alternative matching
-function \fBpcre[16]_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
-optimization of \fBpcre[16]_exec()\fP. If JIT optimization is requested for a
+function \fBpcre[16|32]_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
+optimization of \fBpcre[16|32]_exec()\fP. If JIT optimization is requested for a
UTF pattern that contains \eC, it will not succeed, and so the matching will
be carried out by the normal interpretive function.
.P
diff --git a/libpcre32.pc.in b/libpcre32.pc.in
new file mode 100644
index 0000000..6582105
--- /dev/null
+++ b/libpcre32.pc.in
@@ -0,0 +1,12 @@
+# Package Information for pkg-config
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libpcre32
+Description: PCRE - Perl compatible regular expressions C library with 32 bit character support
+Version: @PACKAGE_VERSION@
+Libs: -L${libdir} -lpcre32
+Cflags: -I${includedir} @PCRE_STATIC_CFLAG@
diff --git a/maint/ManyConfigTests b/maint/ManyConfigTests
index 0497ff3..7eed2a2 100755
--- a/maint/ManyConfigTests
+++ b/maint/ManyConfigTests
@@ -169,6 +169,14 @@ for opts in \
"--enable-pcre16 --enable-unicode-properties --disable-stack-for-recursion --disable-shared" \
"--enable-pcre16 --enable-jit --enable-unicode-properties --with-link-size=3 --disable-shared" \
"--enable-pcre16 --enable-jit --enable-unicode-properties --with-link-size=4 --disable-shared"
+ "--enable-pcre32" \
+ "--enable-pcre32 --enable-jit --enable-utf --disable-shared" \
+ "--enable-pcre32 --enable-jit --enable-unicode-properties --disable-shared" \
+ "--enable-pcre32 --enable-jit --disable-pcre8 --disable-shared" \
+ "--enable-pcre32 --enable-jit --disable-pcre8 --enable-utf --disable-shared" \
+ "--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
+ "--enable-pcre32 --enable-unicode-properties --disable-stack-for-recursion --disable-shared" \
+ "--enable-pcre32 --enable-jit --enable-unicode-properties --with-link-size=4 --disable-shared"
do
runtest
done
@@ -184,7 +192,8 @@ for opts in \
"--enable-unicode-properties --disable-stack-for-recursion --disable-shared" \
"--enable-unicode-properties --with-link-size=3 --disable-shared" \
"--enable-jit --enable-unicode-properties --disable-shared" \
- "--enable-pcre16 --enable-jit --enable-unicode-properties --disable-shared"
+ "--enable-pcre16 --enable-pcre32 --enable-jit --enable-unicode-properties " \
+ "--disable-shared"
do
runtest
done
diff --git a/maint/README b/maint/README
index edf29cd..45050f7 100644
--- a/maint/README
+++ b/maint/README
@@ -279,7 +279,8 @@ others are relatively new.
support --outputfile=name.
. Consider making UTF-8 and UCP the default for PCRE n.0 for some n > 8.
- (And now presumably UTF-16 and UCP for the 16-bit library.)
+ (And now presumably UTF-16 and UCP for the 16-bit library, and UTF-32 and UCP
+ for the 32-bit library.)
. Add a user pointer to pcre_malloc/free functions -- some option would be
needed to retain backward compatibility.
diff --git a/pcre-config.in b/pcre-config.in
index 595e5d1..ac06a33 100644
--- a/pcre-config.in
+++ b/pcre-config.in
@@ -16,6 +16,10 @@ if test @enable_pcre16@ = yes ; then
libs="[--libs16] $libs"
fi
+if test @enable_pcre32@ = yes ; then
+ libs="[--libs32] $libs"
+fi
+
if test @enable_pcre8@ = yes ; then
libs="[--libs] [--libs-posix] $libs"
cflags="$cflags [--cflags-posix]"
@@ -106,6 +110,13 @@ while test $# -gt 0; do
echo "${usage}" 1>&2
fi
;;
+ --libs32)
+ if test @enable_pcre32@ = yes ; then
+ echo $libS$libR -lpcre32
+ else
+ echo "${usage}" 1>&2
+ fi
+ ;;
--libs-cpp)
if test @enable_cpp@ = yes ; then
echo $libS$libR -lpcrecpp -lpcre
diff --git a/pcre.h.in b/pcre.h.in
index 8c0bdda..959bc38 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -117,12 +117,14 @@ compiling). */
#define PCRE_UNGREEDY 0x00000200 /* Compile */
#define PCRE_NOTEMPTY 0x00000400 /* Exec, DFA exec */
/* The next two are also used in exec and DFA exec */
-#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16) */
-#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8) */
+#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16 and PCRE_UTF32) */
+#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8 and PCRE_UTF32) */
+#define PCRE_UTF32 0x00000800 /* Compile (same as PCRE_UTF8 and PCRE_UTF16) */
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* Compile */
/* The next two are also used in exec and DFA exec */
-#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK) */
-#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK) */
+#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK and PCRE_NO_UTF32_CHECK) */
+#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK and PCRE_NO_UTF32_CHECK) */
+#define PCRE_NO_UTF32_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK and PCRE_NO_UTF16_CHECK) */
#define PCRE_AUTO_CALLOUT 0x00004000 /* Compile */
#define PCRE_PARTIAL_SOFT 0x00008000 /* Exec, DFA exec */
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
@@ -156,10 +158,12 @@ compiling). */
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
-#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16 */
-#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16 */
-#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
-#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */
+#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */
+#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */
+#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16/32 */
+#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16/32 */
+#define PCRE_ERROR_BADUTF32_OFFSET (-11) /* Same for 8/16/32 */
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
@@ -214,6 +218,12 @@ compiling). */
#define PCRE_UTF16_ERR3 3
#define PCRE_UTF16_ERR4 4
+/* Specific error codes for UTF-32 validity checks */
+
+#define PCRE_UTF32_ERR0 0
+#define PCRE_UTF32_ERR1 1
+#define PCRE_UTF32_ERR2 2
+
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
@@ -252,6 +262,7 @@ compatible. */
#define PCRE_CONFIG_JIT 9
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
+#define PCRE_CONFIG_UTF32 12
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
@@ -261,7 +272,7 @@ compatible. */
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
#define PCRE_STUDY_EXTRA_NEEDED 0x0008
-/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
+/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
@@ -280,12 +291,18 @@ typedef struct real_pcre pcre;
struct real_pcre16; /* declaration; the definition is private */
typedef struct real_pcre16 pcre16;
+struct real_pcre32; /* declaration; the definition is private */
+typedef struct real_pcre32 pcre32;
+
struct real_pcre_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre_jit_stack pcre_jit_stack;
struct real_pcre16_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
+struct real_pcre32_jit_stack; /* declaration; the definition is private */
+typedef struct real_pcre32_jit_stack pcre32_jit_stack;
+
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
@@ -297,6 +314,17 @@ pcre16 functions are not implemented. There is a check for this in pcre_internal
#define PCRE_SPTR16 const PCRE_UCHAR16 *
#endif
+/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
+a 32 bit wide signed data type. Otherwise it can be a dummy data type since
+pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
+#ifndef PCRE_UCHAR32
+#define PCRE_UCHAR32 unsigned int
+#endif
+
+#ifndef PCRE_SPTR32
+#define PCRE_SPTR32 const PCRE_UCHAR32 *
+#endif
+
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
@@ -333,6 +361,19 @@ typedef struct pcre16_extra {
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre16_extra;
+/* Same structure as above, but with 32 bit char pointers. */
+
+typedef struct pcre32_extra {
+ unsigned long int flags; /* Bits for which fields are set */
+ void *study_data; /* Opaque data from pcre_study() */
+ unsigned long int match_limit; /* Maximum number of calls to match() */
+ void *callout_data; /* Data passed back in callouts */
+ const unsigned char *tables; /* Pointer to character tables */
+ unsigned long int match_limit_recursion; /* Max recursive calls to match() */
+ PCRE_UCHAR32 **mark; /* For passing back a mark pointer */
+ void *executable_jit; /* Contains a pointer to a compiled jit code */
+} pcre32_extra;
+
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
@@ -380,6 +421,28 @@ typedef struct pcre16_callout_block {
/* ------------------------------------------------------------------ */
} pcre16_callout_block;
+/* Same structure as above, but with 32 bit char pointers. */
+
+typedef struct pcre32_callout_block {
+ int version; /* Identifies version of block */
+ /* ------------------------ Version 0 ------------------------------- */
+ int callout_number; /* Number compiled into pattern */
+ int *offset_vector; /* The offset vector */
+ PCRE_SPTR32 subject; /* The subject being matched */
+ int subject_length; /* The length of the subject */
+ int start_match; /* Offset to start of this match attempt */
+ int current_position; /* Where we currently are in the subject */
+ int capture_top; /* Max current capture */
+ int capture_last; /* Most recently closed capture */
+ void *callout_data; /* Data passed in with the call */
+ /* ------------------- Added for Version 1 -------------------------- */
+ int pattern_position; /* Offset to next item in the pattern */
+ int next_item_length; /* Length of next item in the pattern */
+ /* ------------------- Added for Version 2 -------------------------- */
+ const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */
+ /* ------------------------------------------------------------------ */
+} pcre32_callout_block;
+
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
@@ -398,6 +461,12 @@ PCRE_EXP_DECL void (*pcre16_free)(void *);
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
+
+PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
+PCRE_EXP_DECL void (*pcre32_free)(void *);
+PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
+PCRE_EXP_DECL void (*pcre32_stack_free)(void *);
+PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
@@ -410,12 +479,19 @@ PCRE_EXP_DECL void pcre16_free(void *);
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
PCRE_EXP_DECL void pcre16_stack_free(void *);
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
+
+PCRE_EXP_DECL void *pcre32_malloc(size_t);
+PCRE_EXP_DECL void pcre32_free(void *);
+PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
+PCRE_EXP_DECL void pcre32_stack_free(void *);
+PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *);
#endif /* VPCOMPAT */
/* User defined callback which provides a stack just before the match starts. */
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
+typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
/* Exported PCRE functions */
@@ -423,83 +499,122 @@ PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
const unsigned char *);
+PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
+ const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
int *, const unsigned char *);
+PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
+ int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre16_config(int, void *);
+PCRE_EXP_DECL int pcre32_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
+PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
+ int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
char *, int);
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
PCRE_UCHAR16 *, int);
+PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
+ PCRE_UCHAR32 *, int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
+ PCRE_SPTR32, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int);
+PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *,
+ PCRE_SPTR32, int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
+PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
+PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
void *);
+PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
+ void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
+PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
+ int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
+PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
+PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
+ PCRE_UCHAR32 **, PCRE_UCHAR32 **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
PCRE_SPTR16 *);
+PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int,
+ PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
PCRE_SPTR16 **);
+PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int,
+ PCRE_SPTR32 **);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
+PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
+PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
+PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
+PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
PCRE_EXP_DECL const char *pcre_version(void);
PCRE_EXP_DECL const char *pcre16_version(void);
+PCRE_EXP_DECL const char *pcre32_version(void);
/* Utility functions for byte order swaps. */
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
const unsigned char *);
+PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
+ const unsigned char *);
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
PCRE_SPTR16, int, int *, int);
+PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
+ PCRE_SPTR32, int, int *, int);
/* JIT compiler related functions. */
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
+PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
+PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
pcre_jit_callback, void *);
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
pcre16_jit_callback, void *);
+PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
+ pcre32_jit_callback, void *);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/pcre32_byte_order.c b/pcre32_byte_order.c
new file mode 100644
index 0000000..9cf5362
--- /dev/null
+++ b/pcre32_byte_order.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_byte_order.c"
+
+/* End of pcre32_byte_order.c */
diff --git a/pcre32_chartables.c b/pcre32_chartables.c
new file mode 100644
index 0000000..b5d8c23
--- /dev/null
+++ b/pcre32_chartables.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_chartables.c"
+
+/* End of pcre32_chartables.c */
diff --git a/pcre32_compile.c b/pcre32_compile.c
new file mode 100644
index 0000000..d781eb3
--- /dev/null
+++ b/pcre32_compile.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_compile.c"
+
+/* End of pcre32_compile.c */
diff --git a/pcre32_config.c b/pcre32_config.c
new file mode 100644
index 0000000..d63f3e9
--- /dev/null
+++ b/pcre32_config.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_config.c"
+
+/* End of pcre32_config.c */
diff --git a/pcre32_dfa_exec.c b/pcre32_dfa_exec.c
new file mode 100644
index 0000000..b0bfd34
--- /dev/null
+++ b/pcre32_dfa_exec.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_dfa_exec.c"
+
+/* End of pcre32_dfa_exec.c */
diff --git a/pcre32_exec.c b/pcre32_exec.c
new file mode 100644
index 0000000..8170ed7
--- /dev/null
+++ b/pcre32_exec.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_exec.c"
+
+/* End of pcre32_exec.c */
diff --git a/pcre32_fullinfo.c b/pcre32_fullinfo.c
new file mode 100644
index 0000000..6ecc520
--- /dev/null
+++ b/pcre32_fullinfo.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_fullinfo.c"
+
+/* End of pcre32_fullinfo.c */
diff --git a/pcre32_get.c b/pcre32_get.c
new file mode 100644
index 0000000..d35deee
--- /dev/null
+++ b/pcre32_get.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_get.c"
+
+/* End of pcre32_get.c */
diff --git a/pcre32_globals.c b/pcre32_globals.c
new file mode 100644
index 0000000..32e0914
--- /dev/null
+++ b/pcre32_globals.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_globals.c"
+
+/* End of pcre32_globals.c */
diff --git a/pcre32_jit_compile.c b/pcre32_jit_compile.c
new file mode 100644
index 0000000..2e7c6f9
--- /dev/null
+++ b/pcre32_jit_compile.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_jit_compile.c"
+
+/* End of pcre32_jit_compile.c */
diff --git a/pcre32_maketables.c b/pcre32_maketables.c
new file mode 100644
index 0000000..5d1b1c6
--- /dev/null
+++ b/pcre32_maketables.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_maketables.c"
+
+/* End of pcre32_maketables.c */
diff --git a/pcre32_newline.c b/pcre32_newline.c
new file mode 100644
index 0000000..7f8d536
--- /dev/null
+++ b/pcre32_newline.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_newline.c"
+
+/* End of pcre32_newline.c */
diff --git a/pcre32_ord2utf32.c b/pcre32_ord2utf32.c
new file mode 100644
index 0000000..8f71e85
--- /dev/null
+++ b/pcre32_ord2utf32.c
@@ -0,0 +1,90 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This file contains a private PCRE function that converts an ordinal
+character value into a UTF32 string. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_internal.h"
+
+#define MASK (0x1fffffu)
+
+/*************************************************
+* Convert character value to UTF-32 *
+*************************************************/
+
+/* This function takes an integer value in the range 0 - 0x10ffff
+and encodes it as a UTF-32 character in 1 pcre_uchars.
+
+Arguments:
+ cvalue the character value
+ buffer pointer to buffer for result - at least 1 pcre_uchars long
+
+Returns: number of characters placed in the buffer
+*/
+
+int
+PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
+{
+#ifdef SUPPORT_UTF
+
+cvalue &= MASK;
+
+/* Checking invalid cvalue character, encoded as invalid UTF-32 character */
+if ((cvalue & 0xfffff800u) == 0xd800u || cvalue >= 0x110000u)
+ cvalue = 0xfffeu;
+
+*buffer = (pcre_uchar)cvalue;
+return 1;
+
+#else /* SUPPORT_UTF */
+(void)(cvalue); /* Keep compiler happy; this function won't ever be */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
+return 0;
+#endif /* SUPPORT_UTF */
+}
+
+/* End of pcre32_ord2utf32.c */
diff --git a/pcre32_printint.c b/pcre32_printint.c
new file mode 100644
index 0000000..f3fd7b2
--- /dev/null
+++ b/pcre32_printint.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_printint.c"
+
+/* End of pcre32_printint.c */
diff --git a/pcre32_refcount.c b/pcre32_refcount.c
new file mode 100644
index 0000000..dbdf432
--- /dev/null
+++ b/pcre32_refcount.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_refcount.c"
+
+/* End of pcre32_refcount.c */
diff --git a/pcre32_string_utils.c b/pcre32_string_utils.c
new file mode 100644
index 0000000..e37b3d4
--- /dev/null
+++ b/pcre32_string_utils.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_string_utils.c"
+
+/* End of pcre32_string_utils.c */
diff --git a/pcre32_study.c b/pcre32_study.c
new file mode 100644
index 0000000..d3a3afe
--- /dev/null
+++ b/pcre32_study.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_study.c"
+
+/* End of pcre32_study.c */
diff --git a/pcre32_tables.c b/pcre32_tables.c
new file mode 100644
index 0000000..3d94cca
--- /dev/null
+++ b/pcre32_tables.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_tables.c"
+
+/* End of pcre32_tables.c */
diff --git a/pcre32_ucd.c b/pcre32_ucd.c
new file mode 100644
index 0000000..befe22d
--- /dev/null
+++ b/pcre32_ucd.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_ucd.c"
+
+/* End of pcre32_ucd.c */
diff --git a/pcre32_utf32_utils.c b/pcre32_utf32_utils.c
new file mode 100644
index 0000000..971c333
--- /dev/null
+++ b/pcre32_utf32_utils.c
@@ -0,0 +1,138 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a function for converting any UTF-32 character
+strings to host byte order. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_internal.h"
+
+static pcre_uint32
+swap_uint32(pcre_uint32 value)
+{
+return ((value & 0x000000ff) << 24) |
+ ((value & 0x0000ff00) << 8) |
+ ((value & 0x00ff0000) >> 8) |
+ (value >> 24);
+}
+
+/*************************************************
+* Convert any UTF-32 string to host byte order *
+*************************************************/
+
+/* This function takes an UTF-32 string and converts
+it to host byte order. The length can be explicitly set,
+or automatically detected for zero terminated strings.
+BOMs can be kept or discarded during the conversion.
+Conversion can be done in place (output == input).
+
+Arguments:
+ output the output buffer, its size must be greater
+ or equal than the input string
+ input any UTF-32 string
+ length the number of 32-bit units in the input string
+ can be less than zero for zero terminated strings
+ host_byte_order
+ A non-zero value means the input is in host byte
+ order, which can be dynamically changed by BOMs later.
+ Initially it contains the starting byte order and returns
+ with the last byte order so it can be used for stream
+ processing. It can be NULL, which set the host byte
+ order mode by default.
+ keep_boms for a non-zero value, the BOM (0xfeff) characters
+ are copied as well
+
+Returns: the number of 32-bit units placed into the output buffer,
+ including the zero-terminator
+*/
+
+int
+pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *output, PCRE_SPTR32 input,
+ int length, int *host_byte_order, int keep_boms)
+{
+#ifdef SUPPORT_UTF
+/* This function converts any UTF-32 string to host byte order and optionally
+removes any Byte Order Marks (BOMS). Returns with the remainig length. */
+int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
+pcre_uchar *optr = (pcre_uchar *)output;
+const pcre_uchar *iptr = (const pcre_uchar *)input;
+const pcre_uchar *end;
+/* The c variable must be unsigned. */
+register pcre_uchar c;
+
+if (length < 0)
+ length = STRLEN_UC(iptr) + 1;
+end = iptr + length;
+
+while (iptr < end)
+ {
+ c = *iptr++;
+ if (c == 0x0000feffu || c == 0xfffe0000u)
+ {
+ /* Detecting the byte order of the machine is unnecessary, it is
+ enough to know that the UTF-32 string has the same byte order or not. */
+ host_bo = c == 0x0000feffu;
+ if (keep_boms != 0)
+ *optr++ = 0x0000feffu;
+ else
+ length--;
+ }
+ else
+ *optr++ = host_bo ? c : swap_uint32(c);
+ }
+if (host_byte_order != NULL)
+ *host_byte_order = host_bo;
+
+#else /* SUPPORT_UTF */
+(void)(output); /* Keep picky compilers happy */
+(void)(input);
+(void)(keep_boms);
+#endif /* SUPPORT_UTF */
+return length;
+}
+
+/* End of pcre32_utf32_utils.c */
diff --git a/pcre32_valid_utf32.c b/pcre32_valid_utf32.c
new file mode 100644
index 0000000..799fe50
--- /dev/null
+++ b/pcre32_valid_utf32.c
@@ -0,0 +1,126 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function for validating UTF-32 character
+strings. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_internal.h"
+
+#define MASK (0x1fffffu)
+
+/*************************************************
+* Validate a UTF-32 string *
+*************************************************/
+
+/* This function is called (optionally) at the start of compile or match, to
+check that a supposed UTF-32 string is actually valid. The early check means
+that subsequent code can assume it is dealing with a valid string. The check
+can be turned off for maximum performance, but the consequences of supplying an
+invalid string are then undefined.
+
+From release 8.21 more information about the details of the error are passed
+back in the returned value:
+
+PCRE_UTF32_ERR0 No error
+PCRE_UTF32_ERR1 Surrogate character
+PCRE_UTF32_ERR2 Not allowed character
+
+Arguments:
+ string points to the string
+ length length of string, or -1 if the string is zero-terminated
+ errp pointer to an error position offset variable
+
+Returns: = 0 if the string is a valid UTF-32 string
+ > 0 otherwise, setting the offset of the bad character
+*/
+
+int
+PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
+{
+#ifdef SUPPORT_UTF
+register PCRE_PUCHAR p;
+register pcre_uchar c;
+
+if (length < 0)
+ {
+ for (p = string; *p != 0; p++);
+ length = p - string;
+ }
+
+for (p = string; length-- > 0; p++)
+ {
+ c = *p & MASK;
+
+ if ((c & 0xfffff800u) != 0xd800u)
+ {
+ /* Normal UTF-32 code point. Neither high nor low surrogate. */
+
+ /* This is probably a 16-bit BOM. Regardless, the string is rejected. */
+ if (c == 0xfffeu)
+ {
+ *erroroffset = p - string;
+ return PCRE_UTF32_ERR2;
+ }
+ }
+ else
+ {
+ /* A surrogate */
+ *erroroffset = p - string;
+ return PCRE_UTF32_ERR1;
+ }
+ }
+
+#else /* SUPPORT_UTF */
+(void)(string); /* Keep picky compilers happy */
+(void)(length);
+#endif /* SUPPORT_UTF */
+
+return PCRE_UTF32_ERR0; /* This indicates success */
+}
+
+/* End of pcre32_valid_utf32.c */
diff --git a/pcre32_version.c b/pcre32_version.c
new file mode 100644
index 0000000..fdaad9b
--- /dev/null
+++ b/pcre32_version.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_version.c"
+
+/* End of pcre32_version.c */
diff --git a/pcre32_xclass.c b/pcre32_xclass.c
new file mode 100644
index 0000000..5662408
--- /dev/null
+++ b/pcre32_xclass.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 32 bit character support. */
+#define COMPILE_PCRE32
+
+#include "pcre_xclass.c"
+
+/* End of pcre32_xclass.c */
diff --git a/pcre_byte_order.c b/pcre_byte_order.c
index 6ac8325..472eb38 100644
--- a/pcre_byte_order.c
+++ b/pcre_byte_order.c
@@ -95,12 +95,15 @@ Arguments:
Returns: 0 if the swap is successful, negative on error
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
pcre_extra *extra_data, const unsigned char *tables)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
pcre16_extra *extra_data, const unsigned char *tables)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *argument_re,
+ pcre32_extra *extra_data, const unsigned char *tables)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
@@ -108,10 +111,10 @@ pcre_study_data *study;
#ifndef COMPILE_PCRE8
pcre_uchar *ptr;
int length;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
BOOL utf;
BOOL utf16_char;
-#endif /* SUPPORT_UTF */
+#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
#endif /* !COMPILE_PCRE8 */
if (re == NULL) return PCRE_ERROR_NULL;
@@ -131,13 +134,22 @@ re->options = swap_uint32(re->options);
re->flags = swap_uint16(re->flags);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
+#elif defined COMPILE_PCRE32
+re->first_char = swap_uint32(re->first_char);
+re->req_char = swap_uint32(re->req_char);
+#endif
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
+#ifdef COMPILE_PCRE32
+re->dummy1 = swap_uint16(re->dummy1);
+re->dummy2 = swap_uint16(re->dummy2);
+#endif
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
@@ -150,20 +162,24 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
#ifndef COMPILE_PCRE8
ptr = (pcre_uchar *)re + re->name_table_offset;
length = re->name_count * re->name_entry_size;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
utf = (re->options & PCRE_UTF16) != 0;
utf16_char = FALSE;
-#endif
+#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
while(TRUE)
{
/* Swap previous characters. */
while (length-- > 0)
{
+#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
+#elif defined COMPILE_PCRE32
+ *ptr = swap_uint32(*ptr);
+#endif
ptr++;
}
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
if (utf16_char)
{
if (HAS_EXTRALEN(ptr[-1]))
@@ -178,13 +194,17 @@ while(TRUE)
/* Get next opcode. */
length = 0;
+#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
+#elif defined COMPILE_PCRE32
+ *ptr = swap_uint32(*ptr);
+#endif
switch (*ptr)
{
case OP_END:
return 0;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
@@ -259,16 +279,26 @@ while(TRUE)
case OP_XCLASS:
/* Reverse the size of the XCLASS instance. */
ptr++;
+#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
+#elif defined COMPILE_PCRE32
+ *ptr = swap_uint32(*ptr);
+#endif
+#ifndef COMPILE_PCRE32
if (LINK_SIZE > 1)
{
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
ptr++;
*ptr = swap_uint16(*ptr);
}
+#endif
ptr++;
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
+#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
+#elif defined COMPILE_PCRE32
+ *ptr = swap_uint32(*ptr);
+#endif
if ((*ptr & XCL_MAP) != 0)
{
/* Skip the character bit map. */
@@ -279,7 +309,7 @@ while(TRUE)
}
ptr++;
}
-/* Control should never reach here in 16 bit mode. */
+/* Control should never reach here in 16/32 bit mode. */
#endif /* !COMPILE_PCRE8 */
return 0;
diff --git a/pcre_compile.c b/pcre_compile.c
index 1756b8f..b5633d7 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -53,7 +53,7 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
-/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
+/* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which
is also used by pcretest. PCRE_DEBUG is not defined when building a production
library. We do not need to select pcre16_printint.c specially, because the
COMPILE_PCREx macro will already be appropriately set. */
@@ -123,6 +123,7 @@ overrun before it actually does run off the end of the data block. */
#define REQ_CASELESS 0x10000000l /* Indicates caselessness */
#define REQ_VARY 0x20000000l /* Reqchar followed non-literal item */
+#define REQ_MASK (REQ_CASELESS | REQ_VARY)
/* Repeated character flags. */
@@ -503,6 +504,7 @@ static const char error_texts[] =
/* 75 */
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character value in \\u.... sequence is too large\0"
+ "invalid UTF-32 string\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -838,12 +840,12 @@ else
#endif
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if (c > (utf ? 0x10ffff : 0xff))
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (c > (utf ? 0x10ffff : 0xffff))
-#endif
+#elif defined COMPILE_PCRE32
+ if (utf && c > 0x10ffff)
#endif
{
*errorcodeptr = ERR76;
@@ -1069,12 +1071,12 @@ else
c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; }
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; }
-#endif
+#elif defined COMPILE_PCRE32
+ if (utf && c > 0x10ffff) { c = -1; break; }
#endif
}
@@ -1367,7 +1369,7 @@ Arguments:
name name to seek, or NULL if seeking a numbered subpattern
lorn name length, or subpattern number if name is NULL
xmode TRUE if we are in /x mode
- utf TRUE if we are in UTF-8 / UTF-16 mode
+ utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
count pointer to the current capturing subpattern number (updated)
Returns: the number of the named subpattern, or -1 if not found
@@ -1601,7 +1603,7 @@ Arguments:
name name to seek, or NULL if seeking a numbered subpattern
lorn name length, or subpattern number if name is NULL
xmode TRUE if we are in /x mode
- utf TRUE if we are in UTF-8 / UTF-16 mode
+ utf TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode
Returns: the number of the found subpattern, or -1 if not found
*/
@@ -1704,7 +1706,7 @@ and doing the check at the end; a flag specifies which mode we are running in.
Arguments:
code points to the start of the pattern (the bracket)
- utf TRUE in UTF-8 / UTF-16 mode
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
atend TRUE if called when the pattern is complete
cd the "compile data" structure
@@ -1838,7 +1840,7 @@ for (;;)
case OP_NOTI:
branchlength++;
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1852,7 +1854,7 @@ for (;;)
case OP_NOTEXACTI:
branchlength += GET2(cc,1);
cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1895,7 +1897,7 @@ for (;;)
/* Check a class for variable quantification */
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
/* Fall through */
@@ -2034,7 +2036,7 @@ length.
Arguments:
code points to start of expression
- utf TRUE in UTF-8 / UTF-16 mode
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
number the required bracket number or negative to find a lookbehind
Returns: pointer to the opcode for the bracket, or NULL if not found
@@ -2121,7 +2123,7 @@ for (;;)
a multi-byte character. The length in the table is a minimum, so we have to
arrange to skip the extra bytes. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) switch(c)
{
case OP_CHAR:
@@ -2173,7 +2175,7 @@ instance of OP_RECURSE.
Arguments:
code points to start of expression
- utf TRUE in UTF-8 / UTF-16 mode
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
Returns: pointer to the opcode for OP_RECURSE, or NULL if not found
*/
@@ -2241,7 +2243,7 @@ for (;;)
by a multi-byte character. The length in the table is a minimum, so we have
to arrange to skip the extra bytes. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) switch(c)
{
case OP_CHAR:
@@ -2327,7 +2329,7 @@ bracket whose current branch will already have been scanned.
Arguments:
code points to start of search
endcode points to where to stop
- utf TRUE if in UTF-8 / UTF-16 mode
+ utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cd contains pointers to tables etc.
Returns: TRUE if what is matched could be empty
@@ -2560,7 +2562,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
MINUPTO, and POSUPTO may be followed by a multibyte character */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
case OP_STAR:
case OP_STARI:
case OP_MINSTAR:
@@ -2626,7 +2628,7 @@ Arguments:
code points to start of the recursion
endcode points to where to stop (current RECURSE item)
bcptr points to the chain of current (unclosed) branch starts
- utf TRUE if in UTF-8 / UTF-16 mode
+ utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cd pointers to tables etc
Returns: TRUE if what is matched could be empty
@@ -2773,7 +2775,7 @@ value in the reference (which is a group number).
Arguments:
group points to the start of the group
adjust the amount by which the group is to be moved
- utf TRUE in UTF-8 / UTF-16 mode
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
cd contains pointers to tables etc.
save_hwm the hwm forward reference pointer at the start of the group
@@ -3024,7 +3026,7 @@ sense to automatically possessify the repeated item.
Arguments:
previous pointer to the repeated opcode
- utf TRUE in UTF-8 / UTF-16 mode
+ utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
ptr next character in pattern
options options bits
cd contains pointers to tables etc.
@@ -3476,19 +3478,24 @@ if ((options & PCRE_CASELESS) != 0)
length - this means that the same lists of (e.g.) horizontal spaces can be used
in all cases. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#ifdef SUPPORT_UTF
if ((options & PCRE_UTF8) == 0)
#endif
if (end > 0xff) end = 0xff;
-#endif
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
#ifdef SUPPORT_UTF
if ((options & PCRE_UTF16) == 0)
#endif
if (end > 0xffff) end = 0xffff;
+
+#elif defined COMPILE_PCRE32
+#ifdef SUPPORT_UTF
+ if ((options & PCRE_UTF32) == 0)
+ if (end > 0xffffu) end = 0xffffu; // FIXMEchpe rebase fix this
#endif
+#endif /* COMPILE_PCRE[8|16|32] */
/* If all characters are less than 256, use the bit map. Otherwise use extra
data. */
@@ -3696,7 +3703,7 @@ must not do this for other options (e.g. PCRE_EXTENDED) because they may change
dynamically as we process the pattern. */
#ifdef SUPPORT_UTF
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
BOOL utf = (options & PCRE_UTF8) != 0;
pcre_uchar utf_chars[6];
#else
@@ -4081,7 +4088,7 @@ for (;; ptr++)
{
const pcre_uchar *oldptr;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(c))
{ /* Braces are required because the */
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
@@ -4500,6 +4507,7 @@ for (;; ptr++)
if (negate_class)
{
#ifdef SUPPORT_UCP
+ // FIXMEchpe pcreuint32?
int d;
#endif
if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
@@ -4523,7 +4531,7 @@ for (;; ptr++)
{
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
code += PRIV(ord2utf)(c, code);
else
@@ -4539,7 +4547,7 @@ for (;; ptr++)
/* For a single, positive character, get the value into mcbuffer, and
then we can handle this with the normal one-character code. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
mclength = PRIV(ord2utf)(c, mcbuffer);
else
@@ -4774,7 +4782,7 @@ for (;; ptr++)
hold the length of the character in bytes, plus UTF_LENGTH to flag that
it's a length rather than a small character. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && NOT_FIRSTCHAR(code[-1]))
{
pcre_uchar *lastchar = code - 1;
@@ -4910,7 +4918,7 @@ for (;; ptr++)
if (repeat_max < 0)
{
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && (c & UTF_LENGTH) != 0)
{
memcpy(code, utf_chars, IN_UCHARS(c & 7));
@@ -4935,7 +4943,7 @@ for (;; ptr++)
else if (repeat_max != repeat_min)
{
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && (c & UTF_LENGTH) != 0)
{
memcpy(code, utf_chars, IN_UCHARS(c & 7));
@@ -4965,7 +4973,7 @@ for (;; ptr++)
/* The character or character type itself comes last in all cases. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && (c & UTF_LENGTH) != 0)
{
memcpy(code, utf_chars, IN_UCHARS(c & 7));
@@ -5452,7 +5460,7 @@ for (;; ptr++)
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
{
tempcode += PRIV(OP_lengths)[*tempcode];
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(tempcode[-1]))
tempcode += GET_EXTRALEN(tempcode[-1]);
#endif
@@ -5550,7 +5558,7 @@ for (;; ptr++)
arg = ++ptr;
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
arglen = (int)(ptr - arg);
- if (arglen > (int)MAX_MARK)
+ if ((unsigned int)arglen > MAX_MARK)
{
*errorcodeptr = ERR75;
goto FAILED;
@@ -6852,7 +6860,7 @@ for (;; ptr++)
a value > 127. We set its representation in the length/buffer, and then
handle it as a data character. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
mclength = PRIV(ord2utf)(c, mcbuffer);
else
@@ -6875,7 +6883,7 @@ for (;; ptr++)
mclength = 1;
mcbuffer[0] = c;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(c))
ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
#endif
@@ -7606,32 +7614,42 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
+pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr,
+ int *erroroffset, const unsigned char *tables)
#endif
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
-#else
+#elif defined COMPILE_PCRE16
return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
+#elif defined COMPILE_PCRE32
+return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
#endif
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
+pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
+ const char **errorptr, int *erroroffset, const unsigned char *tables)
#endif
{
REAL_PCRE *re;
@@ -7717,6 +7735,10 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
{ skipatstart += 8; options |= PCRE_UTF16; continue; }
#endif
+#ifdef COMPILE_PCRE32
+ if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+ { skipatstart += 8; options |= PCRE_UTF32; continue; }
+#endif
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
{ skipatstart += 6; options |= PCRE_UCP; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
@@ -7745,7 +7767,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
else break;
}
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
/* Can't support UTF unless PCRE has been compiled to include the code. The
@@ -7757,10 +7779,12 @@ not used here. */
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
errorcode = ERR44;
-#else
+#elif defined COMPILE_PCRE16
errorcode = ERR74;
+#elif defined COMPILE_PCRE32
+ errorcode = ERR77;
#endif
goto PCRE_EARLY_ERROR_RETURN2;
}
@@ -7924,6 +7948,9 @@ re->name_count = cd->names_found;
re->ref_count = 0;
re->tables = (tables == PRIV(default_tables))? NULL : tables;
re->nullpad = NULL;
+#ifdef COMPILE_PCRE32
+re->dummy1 = re->dummy2 = 0;
+#endif
/* The starting points of the name/number translation table and of the code are
passed around in the compile data block. The start/end pattern and initial
@@ -8086,12 +8113,12 @@ if ((re->options & PCRE_ANCHORED) == 0)
firstchar = find_firstassertedchar(codestart, FALSE);
if (firstchar >= 0) /* Remove caseless flag for non-caseable chars */
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
re->first_char = firstchar & 0xff;
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
re->first_char = firstchar & 0xffff;
-#endif
+#elif defined COMPILE_PCRE32
+ re->first_char = firstchar & ~REQ_MASK;
#endif
if ((firstchar & REQ_CASELESS) != 0)
{
@@ -8128,12 +8155,12 @@ bytes. */
if (reqchar >= 0 &&
((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
re->req_char = reqchar & 0xff;
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
re->req_char = reqchar & 0xffff;
-#endif
+#elif defined COMPILE_PCRE32
+ re->req_char = reqchar & ~REQ_MASK;
#endif
if ((reqchar & REQ_CASELESS) != 0)
{
@@ -8185,10 +8212,12 @@ if ((re->flags & PCRE_REQCHSET) != 0)
else printf("Req char = \\x%02x%s\n", ch, caseless);
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
pcre_printint((pcre *)re, stdout, TRUE);
-#else
+#elif defined COMPILE_PCRE16
pcre16_printint((pcre *)re, stdout, TRUE);
+#elif defined COMPILE_PCRE32
+pcre32_printint((pcre *)re, stdout, TRUE);
#endif
/* This check is done here in the debugging case so that the code that
@@ -8204,10 +8233,12 @@ if (code - codestart > length)
}
#endif /* PCRE_DEBUG */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
return (pcre *)re;
-#else
+#elif defined COMPILE_PCRE16
return (pcre16 *)re;
+#elif defined COMPILE_PCRE32
+return (pcre32 *)re;
#endif
}
diff --git a/pcre_config.c b/pcre_config.c
index aa0ef86..3d5689f 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -65,18 +65,21 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_config(int what, void *where)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_config(int what, void *where)
#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
-#if defined COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
@@ -89,7 +92,20 @@ switch (what)
#endif
case PCRE_CONFIG_UTF16:
-#if defined COMPILE_PCRE8
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
+ *((int *)where) = 0;
+ return PCRE_ERROR_BADOPTION;
+#else
+#if defined SUPPORT_UTF
+ *((int *)where) = 1;
+#else
+ *((int *)where) = 0;
+#endif
+ break;
+#endif
+
+ case PCRE_CONFIG_UTF32:
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 5ffa750..3c9491f 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -1007,7 +1007,7 @@ for (;;)
{
const pcre_uchar *temp = ptr - 1;
if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) { BACKCHAR(temp); }
#endif
GETCHARTEST(d, temp);
@@ -2607,10 +2607,12 @@ for (;;)
cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)start_subject;
#endif
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
@@ -2741,7 +2743,7 @@ for (;;)
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
{
int charcount = local_offsets[rc+1] - local_offsets[rc];
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[rc];
@@ -2845,7 +2847,7 @@ for (;;)
const pcre_uchar *p = ptr;
const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
@@ -2927,7 +2929,7 @@ for (;;)
}
else
{
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[0];
@@ -2956,10 +2958,12 @@ for (;;)
cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[1];
cb.offset_vector = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)start_subject;
#endif
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
@@ -3075,16 +3079,21 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+ PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount, int *workspace, int wscount)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
@@ -3159,7 +3168,7 @@ end_subject = (const pcre_uchar *)subject + length;
req_char_ptr = current_subject - 1;
#ifdef SUPPORT_UTF
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (re->options & PCRE_UTF8) != 0;
#else
utf = FALSE;
@@ -3245,12 +3254,21 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
offsets[0] = erroroffset;
offsets[1] = errorcode;
}
- return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
+#if defined COMPILE_PCRE8
+ return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#elif defined COMPILE_PCRE16
+ return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
+ PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#elif defined COMPILE_PCRE32
+ return PCRE_ERROR_BADUTF32;
+#endif
}
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
+#endif
}
#endif
diff --git a/pcre_exec.c b/pcre_exec.c
index c6cba03..c77411c 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1273,10 +1273,12 @@ for (;;)
cb.version = 2; /* Version 1 of the callout block */
cb.callout_number = ecode[LINK_SIZE+2];
cb.offset_vector = md->offset_vector;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)md->start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)md->start_subject;
#endif
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
@@ -1696,10 +1698,12 @@ for (;;)
cb.version = 2; /* Version 1 of the callout block */
cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
-#else
+#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)md->start_subject;
+#elif defined COMPILE_PCRE32
+ cb.subject = (PCRE_SPTR32)md->start_subject;
#endif
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
@@ -4558,7 +4562,7 @@ for (;;)
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case 0x2028:
case 0x2029:
#endif
@@ -4580,7 +4584,7 @@ for (;;)
{
default: break;
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
@@ -4600,7 +4604,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
break;
@@ -4619,7 +4623,7 @@ for (;;)
switch(*eptr++)
{
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
@@ -4640,7 +4644,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
break;
@@ -5158,7 +5162,7 @@ for (;;)
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case 0x2028:
case 0x2029:
#endif
@@ -5172,7 +5176,7 @@ for (;;)
{
default: break;
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
@@ -5184,7 +5188,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
break;
@@ -5196,7 +5200,7 @@ for (;;)
{
default: break;
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
RRETURN(MATCH_NOMATCH);
@@ -5208,7 +5212,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
break;
@@ -5840,7 +5844,7 @@ for (;;)
{
if (c != CHAR_LF && (md->bsr_anycrlf ||
(c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
&& c != 0x2028 && c != 0x2029
#endif
))) break;
@@ -5861,7 +5865,7 @@ for (;;)
{
default: eptr++; break;
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
goto ENDLOOP00;
@@ -5882,7 +5886,7 @@ for (;;)
{
default: goto ENDLOOP01;
HSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
HSPACE_MULTIBYTE_CASES:
#endif
eptr++; break;
@@ -5903,7 +5907,7 @@ for (;;)
{
default: eptr++; break;
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
goto ENDLOOP02;
@@ -5924,7 +5928,7 @@ for (;;)
{
default: goto ENDLOOP03;
VSPACE_BYTE_CASES:
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
VSPACE_MULTIBYTE_CASES:
#endif
eptr++; break;
@@ -6197,16 +6201,21 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+ PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount)
#endif
{
int rc, ocount, arg_offset_max;
@@ -6297,19 +6306,22 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
offsets[0] = erroroffset;
offsets[1] = errorcode;
}
-#ifdef COMPILE_PCRE16
- return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
- PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
-#else
+#if defined COMPILE_PCRE8
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#elif defined COMPILE_PCRE16
+ return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
+ PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#elif defined COMPILE_PCRE32
+ return PCRE_ERROR_BADUTF32;
#endif
}
-
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
/* Check that a start_offset points to the start of a UTF character. */
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
+#endif
}
#endif
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 7a7db11..7dd7db1 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -65,14 +65,18 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
int what, void *where)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
int what, void *where)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
+ int what, void *where)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
diff --git a/pcre_get.c b/pcre_get.c
index 3d9904e..8094b34 100644
--- a/pcre_get.c
+++ b/pcre_get.c
@@ -65,12 +65,15 @@ Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
#endif
{
int rc;
@@ -98,6 +101,16 @@ if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
+#ifdef COMPILE_PCRE32
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+ return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+ return rc;
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+ return rc;
+#endif
bot = 0;
while (top > bot)
@@ -132,14 +145,18 @@ Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
+ PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
#endif
{
int rc;
@@ -167,6 +184,16 @@ if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
+#ifdef COMPILE_PCRE32
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+ return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+ return rc;
+if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+ return rc;
+#endif
lastentry = nametable + entrysize * (top - 1);
bot = 0;
@@ -192,12 +219,15 @@ while (top > bot)
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
*firstptr = (char *)first;
*lastptr = (char *)last;
-#else
+#elif defined COMPILE_PCRE16
*firstptr = (PCRE_UCHAR16 *)first;
*lastptr = (PCRE_UCHAR16 *)last;
+#elif defined COMPILE_PCRE32
+ *firstptr = (PCRE_UCHAR32 *)first;
+ *lastptr = (PCRE_UCHAR32 *)last;
#endif
return entrysize;
}
@@ -226,31 +256,40 @@ Returns: the number of the first that is set,
or a negative number on error
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
-#else
+#elif defined COMPILE_PCRE16
static int
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+#elif defined COMPILE_PCRE32
+static int
+get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)code;
int entrysize;
pcre_uchar *entry;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
char *first, *last;
-#else
+#elif defined COMPILE_PCRE16
PCRE_UCHAR16 *first, *last;
+#elif defined COMPILE_PCRE32
+PCRE_UCHAR32 *first, *last;
#endif
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
-#else
+#elif defined COMPILE_PCRE16
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre16_get_stringnumber(code, stringname);
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
+#elif defined COMPILE_PCRE32
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+ return pcre32_get_stringnumber(code, stringname);
+entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
#endif
if (entrysize <= 0) return entrysize;
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
@@ -291,14 +330,18 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_UCHAR16 *buffer, int size)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
+ int stringnumber, PCRE_UCHAR32 *buffer, int size)
#endif
{
int yield;
@@ -342,24 +385,31 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
char *buffer, int size)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_UCHAR16 *buffer, int size)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
+ int *ovector, int stringcount, PCRE_SPTR32 stringname,
+ PCRE_UCHAR32 *buffer, int size)
#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
-#else
+#elif defined COMPILE_PCRE16
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#elif defined COMPILE_PCRE32
+return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
#endif
}
@@ -386,14 +436,18 @@ Returns: if successful: 0
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
PCRE_SPTR16 **listptr)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
+ PCRE_SPTR32 **listptr)
#endif
{
int i;
@@ -408,10 +462,12 @@ for (i = 0; i < double_count; i += 2)
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
*listptr = (const char **)stringlist;
-#else
+#elif defined COMPILE_PCRE16
*listptr = (PCRE_SPTR16 *)stringlist;
+#elif defined COMPILE_PCRE32
+*listptr = (PCRE_SPTR32 *)stringlist;
#endif
p = (pcre_uchar *)(stringlist + stringcount + 1);
@@ -442,12 +498,15 @@ Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre32_free_substring_list(PCRE_SPTR32 *pointer)
#endif
{
(PUBL(free))((void *)pointer);
@@ -480,14 +539,18 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_SPTR16 *stringptr)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
+ int stringnumber, PCRE_SPTR32 *stringptr)
#endif
{
int yield;
@@ -500,10 +563,12 @@ substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
substring[yield] = 0;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
*stringptr = (const char *)substring;
-#else
+#elif defined COMPILE_PCRE16
*stringptr = (PCRE_SPTR16)substring;
+#elif defined COMPILE_PCRE32
+*stringptr = (PCRE_SPTR32)substring;
#endif
return yield;
}
@@ -537,24 +602,31 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
const char **stringptr)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_SPTR16 *stringptr)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
+ int *ovector, int stringcount, PCRE_SPTR32 stringname,
+ PCRE_SPTR32 *stringptr)
#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
-#else
+#elif defined COMPILE_PCRE16
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
+#elif defined COMPILE_PCRE32
+return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
#endif
}
@@ -573,12 +645,15 @@ Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring(PCRE_SPTR16 pointer)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre32_free_substring(PCRE_SPTR32 pointer)
#endif
{
(PUBL(free))((void *)pointer);
diff --git a/pcre_internal.h b/pcre_internal.h
index f45a799..25393ab 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -40,8 +40,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* This header contains definitions that are shared between the different
modules, but which are not relevant to the exported API. This includes some
-functions whose names all begin with "_pcre_" or "_pcre16_" depending on
-the PRIV macro. */
+functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_"
+depending on the PRIV macro. */
#ifndef PCRE_INTERNAL_H
#define PCRE_INTERNAL_H
@@ -53,7 +53,7 @@ the PRIV macro. */
#endif
/* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
-#ifndef COMPILE_PCRE16
+#if !defined COMPILE_PCRE16 && ! defined COMPILE_PCRE32
#define COMPILE_PCRE8
#endif
@@ -78,11 +78,11 @@ Until then we define it if SUPPORT_UTF is defined. */
#define SUPPORT_UTF8 1
#endif
-/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+/* We do not support both EBCDIC and UTF-8/16/32 at the same time. The "configure"
script prevents both being selected, but not everybody uses "configure". */
#if defined EBCDIC && defined SUPPORT_UTF
-#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#error The use of both EBCDIC and SUPPORT_UTF is not supported.
#endif
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -243,16 +243,15 @@ exactly 256 items. When the character is able to contain more than 256
items, some check is needed before accessing these tables.
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
typedef unsigned char pcre_uchar;
#define IN_UCHARS(x) (x)
#define MAX_255(c) 1
#define TABLE_GET(c, table, default) ((table)[c])
-#else
+#elif defined COMPILE_PCRE16
-#ifdef COMPILE_PCRE16
#if USHRT_MAX != 65535
/* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
pcre.h(.in) and disable (comment out) this message. */
@@ -260,15 +259,25 @@ pcre.h(.in) and disable (comment out) this message. */
#endif
typedef pcre_uint16 pcre_uchar;
-#define IN_UCHARS(x) ((x) << 1)
+#define UCHAR_SHIFT (1)
+#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
+#define MAX_255(c) ((c) <= 255u)
+#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
+
+#elif defined COMPILE_PCRE32
+
+typedef pcre_uint32 pcre_uchar;
+#define UCHAR_SHIFT (2)
+#define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
#define MAX_255(c) ((c) <= 255u)
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
+/* Assert that pcre_uchar32 is a 32-bit type */
+typedef int __assert_pcre_uchar32_size[sizeof(pcre_uchar) == 4 ? 1 : -1];
+
#else
#error Unsupported compiling mode
-#endif /* COMPILE_PCRE16 */
-
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
/* This is an unsigned int value that no character can ever have. UTF-8
characters only go up to 0x7fffffff (though Unicode doesn't go beyond
@@ -396,7 +405,7 @@ The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
the config.h file, but can be overridden by using -D on the command line. This
is automated on Unix systems via the "configure" command. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#if LINK_SIZE == 2
@@ -441,12 +450,11 @@ is automated on Unix systems via the "configure" command. */
#error LINK_SIZE must be either 2, 3, or 4
#endif
-#else /* COMPILE_PCRE8 */
-
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
#if LINK_SIZE == 2
+/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
#undef LINK_SIZE
#define LINK_SIZE 1
@@ -460,6 +468,7 @@ is automated on Unix systems via the "configure" command. */
#elif LINK_SIZE == 3 || LINK_SIZE == 4
+/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
#undef LINK_SIZE
#define LINK_SIZE 2
@@ -477,11 +486,25 @@ is automated on Unix systems via the "configure" command. */
#error LINK_SIZE must be either 2, 3, or 4
#endif
+#elif defined COMPILE_PCRE32
+
+/* Only supported LINK_SIZE is 4 */
+/* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
+#undef LINK_SIZE
+#define LINK_SIZE 1
+
+#define PUT(a,n,d) \
+ (a[n] = (d))
+
+#define GET(a,n) \
+ (a[n])
+
+/* Keep it positive */
+#define MAX_PATTERN_SIZE (1 << 30)
+
#else
#error Unsupported compiling mode
-#endif /* COMPILE_PCRE16 */
-
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
/* Convenience macro defined in terms of the others */
@@ -492,7 +515,7 @@ is automated on Unix systems via the "configure" command. */
offsets changes. There are used for repeat counts and for other things such as
capturing parenthesis numbers in back references. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#define IMM2_SIZE 2
@@ -503,9 +526,17 @@ capturing parenthesis numbers in back references. */
#define GET2(a,n) \
(((a)[n] << 8) | (a)[(n)+1])
-#else /* COMPILE_PCRE8 */
+#elif defined COMPILE_PCRE16
-#ifdef COMPILE_PCRE16
+#define IMM2_SIZE 1
+
+#define PUT2(a,n,d) \
+ a[n] = d
+
+#define GET2(a,n) \
+ a[n]
+
+#elif defined COMPILE_PCRE32
#define IMM2_SIZE 1
@@ -517,16 +548,18 @@ capturing parenthesis numbers in back references. */
#else
#error Unsupported compiling mode
-#endif /* COMPILE_PCRE16 */
-
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
/* The maximum length of a MARK name is currently one data unit; it may be
changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
-#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#define MAX_MARK ((1u << 16) - 1)
+#else
+#define MAX_MARK ((1u << 8) - 1)
+#endif
/* When UTF encoding is being used, a character is no longer just a single
byte. The macros for character handling generate simple sequences when used in
@@ -553,7 +586,7 @@ we don't even define them. */
#else /* SUPPORT_UTF */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
/* These macros were originally written in the form of loops that used data
from the tables whose names start with PRIV(utf8_table). They were rewritten by
@@ -727,9 +760,7 @@ because almost all calls are already within a block of UTF-8 only code. */
#define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((eptr) & 0xc0) == 0x80) action
-#else /* COMPILE_PCRE8 */
-
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
/* Tells the biggest code point which can be encoded as a single character. */
@@ -825,12 +856,70 @@ code. */
#define ACROSSCHAR(condition, eptr, action) \
if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
-#endif
+#elif defined COMPILE_PCRE32
-#endif /* COMPILE_PCRE8 */
+/* These are unnecessary for the 32-bit library */
+#undef MAX_VALUE_FOR_SINGLE_CHAR
+#undef HAS_EXTRALEN
+#undef GET_EXTRALEN
+#undef NOT_FIRSTCHAR
-#endif /* SUPPORT_UTF */
+/* Get the next UTF-32 character, not advancing the pointer. This is called when
+we know we are in UTF-32 mode. */
+
+#define GETCHAR(c, eptr) \
+ c = *eptr;
+
+/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+ c = *eptr;
+
+/* Get the next UTF-32 character, advancing the pointer. This is called when we
+know we are in UTF-32 mode. */
+
+#define GETCHARINC(c, eptr) \
+ c = *eptr++;
+
+/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-32 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+ c = *eptr++;
+
+/* Get the next UTF-32 character, not advancing the pointer, not incrementing
+length (since all UTF-32 is of length 1). This is called when we know we are in
+UTF-32 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+ c = *eptr;
+
+/* Get the next UTF-832character, testing for UTF-32 mode, not advancing the
+pointer, not incrementing the length (since all UTF-32 is of length 1).
+This is called when we do not know if we are in UTF-32 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+ c = *eptr;
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-32 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-32 only
+code. */
+
+#define BACKCHAR(eptr) do { } while (0)
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) do { } while (0)
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) do { } while (0)
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE[8|16|32] */
+
+#endif /* SUPPORT_UTF */
/* Tests for Unicode horizontal and vertical whitespace characters must check a
number of different values. Using a switch statement for this generates the
@@ -935,13 +1024,6 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
/* ------ End of whitespace macros ------ */
-/* In case there is no definition of offsetof() provided - though any proper
-Standard C system should have one. */
-
-#ifndef offsetof
-#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
-#endif
-
/* Private flags containing information about the compiled regex. They used to
live at the top end of the options word, but that got almost full, so now they
@@ -949,12 +1031,9 @@ are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
the restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#ifdef COMPILE_PCRE8
-#define PCRE_MODE 0x0001 /* compiled in 8 bit mode */
-#endif
-#ifdef COMPILE_PCRE16
-#define PCRE_MODE 0x0002 /* compiled in 16 bit mode */
-#endif
+#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
+#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
+#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
#define PCRE_FIRSTSET 0x0010 /* first_char is set */
#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
#define PCRE_REQCHSET 0x0040 /* req_byte is set */
@@ -965,6 +1044,15 @@ compatibility. */
#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
+#if defined COMPILE_PCRE8
+#define PCRE_MODE PCRE_MODE8
+#elif defined COMPILE_PCRE16
+#define PCRE_MODE PCRE_MODE16
+#elif defined COMPILE_PCRE32
+#define PCRE_MODE PCRE_MODE32
+#endif
+#define PCRE_MODE_MASK (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32)
+
/* Flags for the "extra" block produced by pcre_study(). */
#define PCRE_STUDY_MAPPED 0x0001 /* a map of starting chars exists */
@@ -1351,6 +1439,9 @@ a positive value. */
#ifdef COMPILE_PCRE16
#define STRING_UTF_RIGHTPAR "UTF16)"
#endif
+#ifdef COMPILE_PCRE32
+#define STRING_UTF_RIGHTPAR "UTF32)"
+#endif
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
@@ -1613,6 +1704,9 @@ only. */
#ifdef COMPILE_PCRE16
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
#endif
+#ifdef COMPILE_PCRE32
+#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
+#endif
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
@@ -2091,7 +2185,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
@@ -2114,13 +2208,20 @@ fields are present. Currently PCRE always sets the dummy fields to zero.
NOTE NOTE NOTE
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#define REAL_PCRE real_pcre
-#else
+#elif defined COMPILE_PCRE16
#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
#endif
-typedef struct REAL_PCRE {
+/* It is necessary to fork the struct for 32 bit, since it needs to use
+ * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
+ * typedef since pcretest needs access to the struct of the 8-, 16-
+ * and 32-bit variants. */
+
+typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
@@ -2136,7 +2237,42 @@ typedef struct REAL_PCRE {
pcre_uint16 ref_count; /* Reference count */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
const pcre_uint8 *nullpad; /* NULL padding */
-} REAL_PCRE;
+} real_pcre8_or_16;
+
+typedef struct real_pcre8_or_16 real_pcre;
+typedef struct real_pcre8_or_16 real_pcre16;
+
+typedef struct real_pcre32 {
+ pcre_uint32 magic_number;
+ pcre_uint32 size; /* Total that was malloced */
+ pcre_uint32 options; /* Public options */
+ pcre_uint16 flags; /* Private flags */
+ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
+ pcre_uint16 top_bracket; /* Highest numbered group */
+ pcre_uint16 top_backref; /* Highest numbered back reference */
+ pcre_uint32 first_char; /* Starting character */
+ pcre_uint32 req_char; /* This character must be seen */
+ pcre_uint16 name_table_offset; /* Offset to name table that follows */
+ pcre_uint16 name_entry_size; /* Size of any name items */
+ pcre_uint16 name_count; /* Number of name items */
+ pcre_uint16 ref_count; /* Reference count */
+ pcre_uint16 dummy1; /* for later expansion */
+ pcre_uint16 dummy2; /* for later expansion */
+ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
+ void *nullpad; /* for later expansion */
+} real_pcre32;
+
+/* Assert that the size of REAL_PCRE is divisible by 8 */
+typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
+
+/* Needed in pcretest to access some fields in the real_pcre* structures
+ * directly. They're unified for 8/16/32 bits since the structs only differ
+ * after these fields; if that ever changes, need to fork those defines into
+ * 8/16 and 32 bit versions. */
+#define REAL_PCRE_MAGIC(re) (((REAL_PCRE*)re)->magic_number)
+#define REAL_PCRE_SIZE(re) (((REAL_PCRE*)re)->size)
+#define REAL_PCRE_OPTIONS(re) (((REAL_PCRE*)re)->options)
+#define REAL_PCRE_FLAGS(re) (((REAL_PCRE*)re)->flags)
/* The format of the block used to store data from pcre_study(). The same
remark (see NOTE above) about extending this structure applies. */
@@ -2341,25 +2477,30 @@ total length. */
/* Internal function and data prefixes. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#ifndef PUBL
#define PUBL(name) pcre_##name
#endif
#ifndef PRIV
#define PRIV(name) _pcre_##name
#endif
-#else /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
#ifndef PUBL
#define PUBL(name) pcre16_##name
#endif
#ifndef PRIV
#define PRIV(name) _pcre16_##name
#endif
+#elif defined COMPILE_PCRE32
+#ifndef PUBL
+#define PUBL(name) pcre32_##name
+#endif
+#ifndef PRIV
+#define PRIV(name) _pcre32_##name
+#endif
#else
#error Unsupported compiling mode
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
/* Layout of the UCP type table that translates property names into types and
codes. Each entry used to point directly to a name, but to reduce the number of
@@ -2402,7 +2543,7 @@ one of the exported public functions. They have to be "external" in the C
sense, but are not part of the PCRE public API. */
/* String comparison functions. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#define STRCMP_UC_UC(str1, str2) \
strcmp((char *)(str1), (char *)(str2))
@@ -2414,7 +2555,7 @@ sense, but are not part of the PCRE public API. */
strncmp((char *)(str1), (str2), (num))
#define STRLEN_UC(str) strlen((const char *)str)
-#else
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
extern int PRIV(strcmp_uc_uc)(const pcre_uchar *,
const pcre_uchar *);
@@ -2436,7 +2577,7 @@ extern unsigned int PRIV(strlen_uc)(const pcre_uchar *str);
PRIV(strncmp_uc_c8)((str1), (str2), (num))
#define STRLEN_UC(str) PRIV(strlen_uc)(str)
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index dea03f4..a558d44 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -46,7 +46,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "pcre_internal.h"
-#ifdef SUPPORT_JIT
+#if defined SUPPORT_JIT
/* All-in-one: Since we use the JIT compiler only from here,
we just include it. This way we don't need to touch the build
@@ -343,7 +343,9 @@ typedef struct compiler_common {
#ifdef SUPPORT_UCP
BOOL use_ucp;
#endif
+#ifndef COMPILE_PCRE32
jump_list *utfreadchar;
+#endif
#ifdef COMPILE_PCRE8
jump_list *utfreadtype8;
#endif
@@ -363,25 +365,25 @@ typedef struct compare_context {
union {
sljit_i asint;
sljit_uh asushort;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
sljit_ub asbyte;
sljit_ub asuchars[4];
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
sljit_uh asuchars[2];
-#endif
+#elif defined COMPILE_PCRE32
+ sljit_ui asuchars[1];
#endif
} c;
union {
sljit_i asint;
sljit_uh asushort;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
sljit_ub asbyte;
sljit_ub asuchars[4];
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
sljit_uh asuchars[2];
-#endif
+#elif defined COMPILE_PCRE32
+ sljit_ui asuchars[1];
#endif
} oc;
#endif
@@ -428,17 +430,18 @@ the start pointers when the end of the capturing group has not yet reached. */
#define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#define MOV_UCHAR SLJIT_MOV_UB
#define MOVU_UCHAR SLJIT_MOVU_UB
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
#define MOV_UCHAR SLJIT_MOV_UH
#define MOVU_UCHAR SLJIT_MOVU_UH
+#elif defined COMPILE_PCRE32
+#define MOV_UCHAR SLJIT_MOV_UI
+#define MOVU_UCHAR SLJIT_MOVU_UI
#else
#error Unsupported compiling mode
#endif
-#endif
/* Shortcuts. */
#define DEFINE_COMPILER \
@@ -588,7 +591,7 @@ switch(*cc)
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
return cc;
@@ -610,7 +613,7 @@ switch(*cc)
case OP_NOTEXACTI:
case OP_NOTPOSUPTOI:
cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
return cc;
@@ -933,7 +936,7 @@ while (cc < ccend)
if (size < 0)
{
cc += -size;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
}
@@ -1073,7 +1076,7 @@ while (cc < ccend)
if (size < 0)
{
cc += -size;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
}
@@ -1317,7 +1320,7 @@ while (cc < ccend)
if (PRIVATE_DATA(cc))
private_data_length++;
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1326,7 +1329,7 @@ while (cc < ccend)
if (PRIVATE_DATA(cc))
private_data_length += 2;
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1335,7 +1338,7 @@ while (cc < ccend)
if (PRIVATE_DATA(cc))
private_data_length += 2;
cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1494,7 +1497,7 @@ while (status != end)
srcw[0] = PRIVATE_DATA(cc);
}
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1507,7 +1510,7 @@ while (status != end)
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
}
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1520,7 +1523,7 @@ while (status != end)
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
}
cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -1834,8 +1837,8 @@ loop = LABEL();
OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
/* Copy the integer value to the output buffer */
-#ifdef COMPILE_PCRE16
-OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
@@ -1876,14 +1879,14 @@ OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFS
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
-#ifdef COMPILE_PCRE16
-OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
-#ifdef COMPILE_PCRE16
-OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
@@ -2001,7 +2004,7 @@ if (c <= 127 && bit == 0x20)
if (!is_powerof2(bit))
return 0;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#ifdef SUPPORT_UTF
if (common->utf && c > 127)
@@ -2017,9 +2020,8 @@ if (common->utf && c > 127)
#endif /* SUPPORT_UTF */
return (0 << 8) | bit;
-#else /* COMPILE_PCRE8 */
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-#ifdef COMPILE_PCRE16
#ifdef SUPPORT_UTF
if (common->utf && c > 65535)
{
@@ -2030,9 +2032,8 @@ if (common->utf && c > 65535)
}
#endif /* SUPPORT_UTF */
return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16|32] */
}
static void check_partial(compiler_common *common, BOOL force)
@@ -2130,25 +2131,23 @@ static void read_char(compiler_common *common)
/* Reads the character into TMP1, updates STR_PTR.
Does not check STR_END. TMP2 Destroyed. */
DEFINE_COMPILER;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
struct sljit_jump *jump;
#endif
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf)
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
-#endif
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16] */
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
JUMPHERE(jump);
}
-#endif
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
@@ -2157,33 +2156,31 @@ static void peek_char(compiler_common *common)
/* Reads the character into TMP1, keeps STR_PTR.
Does not check STR_END. TMP2 Destroyed. */
DEFINE_COMPILER;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
struct sljit_jump *jump;
#endif
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf)
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
-#endif
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16] */
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
JUMPHERE(jump);
}
-#endif
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
}
static void read_char8_type(compiler_common *common)
{
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
DEFINE_COMPILER;
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
struct sljit_jump *jump;
#endif
@@ -2192,15 +2189,14 @@ if (common->utf)
{
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
/* This can be an extra read in some situations, but hopefully
it is needed in most cases. */
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
JUMPHERE(jump);
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
@@ -2211,20 +2207,24 @@ if (common->utf)
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-#endif
-#endif /* COMPILE_PCRE8 */
+#elif defined COMPILE_PCRE32
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+ jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+ JUMPHERE(jump);
+#endif /* COMPILE_PCRE[8|16|32] */
return;
}
-#endif
+#endif /* SUPPORT_UTF */
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
/* The ctypes array contains only 256 values. */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
#endif
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
JUMPHERE(jump);
#endif
}
@@ -2233,7 +2233,8 @@ static void skip_char_back(compiler_common *common)
{
/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
DEFINE_COMPILER;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
struct sljit_label *label;
if (common->utf)
@@ -2245,8 +2246,7 @@ if (common->utf)
CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
return;
}
-#endif
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (common->utf)
{
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
@@ -2259,7 +2259,8 @@ if (common->utf)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
return;
}
-#endif
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
@@ -2290,7 +2291,7 @@ else
#ifdef SUPPORT_UTF
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
static void do_utfreadchar(compiler_common *common)
{
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
@@ -2384,9 +2385,8 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-#else /* COMPILE_PCRE8 */
+#elif defined COMPILE_PCRE16
-#ifdef COMPILE_PCRE16
static void do_utfreadchar(compiler_common *common)
{
/* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
@@ -2411,9 +2411,8 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
+#endif /* COMPILE_PCRE[8|16] */
#endif /* SUPPORT_UTF */
@@ -2509,8 +2508,8 @@ if (newlinecheck)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
-#ifdef COMPILE_PCRE16
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
nl = JUMP(SLJIT_JUMP);
@@ -2531,7 +2530,8 @@ if (newlinecheck)
CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
if (common->utf)
{
singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
@@ -2539,8 +2539,7 @@ if (common->utf)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(singlechar);
}
-#endif
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (common->utf)
{
singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
@@ -2551,7 +2550,8 @@ if (common->utf)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(singlechar);
}
-#endif
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
JUMPHERE(start);
if (newlinecheck)
@@ -2570,7 +2570,7 @@ static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOO
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
-pcre_int32 chars[MAX_N_CHARS * 2];
+pcre_uint32 chars[MAX_N_CHARS * 2];
pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
int location = 0;
pcre_int32 len, c, bit, caseless;
@@ -2643,7 +2643,7 @@ while (TRUE)
break;
len = 1;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
#endif
@@ -2818,8 +2818,8 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
-#ifdef COMPILE_PCRE16
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
@@ -2861,8 +2861,8 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
-#ifdef COMPILE_PCRE16
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(notfoundnl);
@@ -2916,15 +2916,15 @@ if (common->utf)
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+#ifdef SUPPORT_UTF
+#if defined COMPILE_PCRE8
if (common->utf)
{
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
}
-#endif
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (common->utf)
{
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
@@ -2934,7 +2934,8 @@ if (common->utf)
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
}
-#endif
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF */
JUMPTO(SLJIT_JUMP, start);
JUMPHERE(found);
JUMPHERE(quit);
@@ -3314,7 +3315,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
@@ -3325,7 +3326,7 @@ if (common->utf)
#ifdef COMPILE_PCRE8
}
#endif
-#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -3342,7 +3343,7 @@ COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
@@ -3363,7 +3364,7 @@ if (common->utf)
#ifdef COMPILE_PCRE8
}
#endif
-#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
@@ -3380,7 +3381,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
@@ -3391,7 +3392,7 @@ if (common->utf)
#ifdef COMPILE_PCRE8
}
#endif
-#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
@@ -3524,23 +3525,21 @@ if (caseless && char_has_othercase(common, cc))
othercasebit = char_get_othercase_bit(common, cc);
SLJIT_ASSERT(othercasebit);
/* Extracting bit difference info. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
othercasechar = cc + (othercasebit >> 8);
othercasebit &= 0xff;
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
othercasechar = cc + (othercasebit >> 9);
if ((othercasebit & 0x100) != 0)
othercasebit = (othercasebit & 0xff) << 8;
else
othercasebit &= 0xff;
-#endif
-#endif
+#endif /* COMPILE_PCRE[8|16|32] */
}
if (context->sourcereg == -1)
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
@@ -3549,23 +3548,25 @@ if (context->sourcereg == -1)
else
#endif
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#else
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
- OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#endif
-#endif /* COMPILE_PCRE8 */
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif defined COMPILE_PCRE32
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE[8|16|32] */
context->sourcereg = TMP2;
}
#ifdef SUPPORT_UTF
utflength = 1;
+#ifndef COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(*cc))
utflength += GET_EXTRALEN(*cc);
+#endif
do
{
@@ -3587,23 +3588,29 @@ do
}
context->ucharptr++;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
-#else
+#elif defined COMPILE_PCRE16
if (context->ucharptr >= 2 || context->length == 0)
+#elif defined COMPILE_PCRE32
+ if (1 /* context->ucharptr >= 1 || context->length == 0 */)
#endif
{
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
if (context->length >= 4)
OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
else if (context->length >= 2)
OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 1)
OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#else
+#elif defined COMPILE_PCRE16
else if (context->length >= 2)
OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#endif
+#endif /* COMPILE_PCRE[8|16] */
+#elif defined COMPILE_PCRE32
+ OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE[8|16|32] */
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
switch(context->ucharptr)
@@ -3614,6 +3621,7 @@ do
add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
break;
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
case 2 / sizeof(pcre_uchar):
if (context->oc.asushort != 0)
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
@@ -3628,6 +3636,8 @@ do
break;
#endif
+#endif /* COMPILE_PCRE[8|16] */
+
default:
SLJIT_ASSERT_STOP();
break;
@@ -3638,13 +3648,9 @@ do
#else
/* Unaligned read is unsupported. */
-#ifdef COMPILE_PCRE8
if (context->length > 0)
- OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#else
- if (context->length > 0)
- OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
-#endif
+ OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
if (othercasebit != 0 && othercasechar == cc)
@@ -3753,7 +3759,7 @@ while (*cc != XCL_END)
if (*cc == XCL_SINGLE)
{
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
#ifdef SUPPORT_UCP
@@ -3763,11 +3769,11 @@ while (*cc != XCL_END)
else if (*cc == XCL_RANGE)
{
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
cc++;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
#ifdef SUPPORT_UCP
@@ -4183,21 +4189,21 @@ switch(type)
{
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
+#if defined COMPILE_PCRE8
jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#else /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
+#endif
JUMPHERE(jump[0]);
+#endif /* COMPILE_PCRE[8|16] */
return cc;
}
#endif
@@ -4461,7 +4467,7 @@ switch(type)
case OP_CHAR:
case OP_CHARI:
length = 1;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
#endif
if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
@@ -4602,7 +4608,7 @@ switch(type)
#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
return cc + 32 / sizeof(pcre_uchar);
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
return cc + GET(cc, 0) - 1;
@@ -4656,7 +4662,7 @@ do
if (*cc == OP_CHAR)
{
size = 1;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(cc[1]))
size += GET_EXTRALEN(cc[1]);
#endif
@@ -4669,8 +4675,10 @@ do
{
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
size = 0;
+#ifndef COMPILE_PCRE32
else if (HAS_EXTRALEN(cc[1]))
size += GET_EXTRALEN(cc[1]);
+#endif
}
else
#endif
@@ -6258,7 +6266,7 @@ if (*type == 0)
if (end != NULL)
{
*end = cc + 1;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
#endif
}
@@ -6681,7 +6689,7 @@ while (cc < ccend)
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
break;
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
@@ -7858,7 +7866,7 @@ common->name_count = re->name_count;
common->name_entry_size = re->name_entry_size;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
#ifdef SUPPORT_UTF
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
common->utf = (re->options & PCRE_UTF8) != 0;
#ifdef SUPPORT_UCP
common->use_ucp = (re->options & PCRE_UCP) != 0;
@@ -8179,19 +8187,21 @@ if (common->caselesscmp != NULL)
do_caselesscmp(common);
}
#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE32
if (common->utfreadchar != NULL)
{
set_jumps(common->utfreadchar, LABEL());
do_utfreadchar(common);
}
+#endif /* !COMPILE_PCRE32 */
#ifdef COMPILE_PCRE8
if (common->utfreadtype8 != NULL)
{
set_jumps(common->utfreadtype8, LABEL());
do_utfreadtype8(common);
}
-#endif
#endif /* COMPILE_PCRE8 */
+#endif /* SUPPORT_UTF */
#ifdef SUPPORT_UCP
if (common->getucd != NULL)
{
@@ -8357,12 +8367,15 @@ PRIV(jit_get_target)(void)
return sljit_get_platform_name();
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize, int maxsize)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL pcre16_jit_stack *
pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
#endif
{
if (startsize < 1 || maxsize < 1)
@@ -8374,23 +8387,29 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack *stack)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL void
pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
#endif
{
sljit_free_stack((struct sljit_stack *)stack);
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL void
pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
#endif
{
executable_functions *functions;
@@ -8409,12 +8428,15 @@ if (extra != NULL &&
/* These are dummy functions to avoid linking errors when JIT support is not
being compiled. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize, int maxsize)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL pcre16_jit_stack *
pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
#endif
{
(void)startsize;
@@ -8422,23 +8444,29 @@ pcre16_jit_stack_alloc(int startsize, int maxsize)
return NULL;
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack *stack)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL void
pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
#endif
{
(void)stack;
}
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DECL void
pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
#endif
{
(void)extra;
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 830e3e1..d2643db 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -48,6 +48,9 @@ POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include "pcre.h"
+
+#include "pcre_internal.h"
+
#define PCRE_BUG 0x80000000
/*
@@ -87,10 +90,12 @@ static int regression_tests(void);
int main(void)
{
int jit = 0;
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8
pcre_config(PCRE_CONFIG_JIT, &jit);
-#else
+#elif defined SUPPORT_PCRE16
pcre16_config(PCRE_CONFIG_JIT, &jit);
+#elif defined SUPPORT_PCRE32
+ pcre32_config(PCRE_CONFIG_JIT, &jit);
#endif
if (!jit) {
printf("JIT must be enabled to run pcre_jit_test\n");
@@ -101,8 +106,8 @@ int main(void)
/* --------------------------------------------------------------------------------------- */
-#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
-#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
+#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
+#error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
#endif
#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
@@ -116,6 +121,7 @@ int main(void)
#define OFFSET_MASK 0x00ffff
#define F_NO8 0x010000
#define F_NO16 0x020000
+#define F_NO32 0x020000
#define F_NOMATCH 0x040000
#define F_DIFF 0x080000
#define F_FORCECONV 0x100000
@@ -711,12 +717,15 @@ static const unsigned char *tables(int mode)
const char *errorptr;
int erroroffset;
unsigned char *default_tables;
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8
pcre *regex;
char null_str[1] = { 0 };
-#else
+#elif defined SUPPORT_PCRE16
pcre16 *regex;
PCRE_UCHAR16 null_str[1] = { 0 };
+#elif defined SUPPORT_PCRE32
+ pcre32 *regex;
+ PCRE_UCHAR32 null_str[1] = { 0 };
#endif
if (mode) {
@@ -730,18 +739,24 @@ static const unsigned char *tables(int mode)
return tables_copy;
default_tables = NULL;
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8
regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
if (regex) {
pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
pcre_free(regex);
}
-#else
+#elif defined SUPPORT_PCRE16
regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
if (regex) {
pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
pcre16_free(regex);
}
+#elif defined SUPPORT_PCRE32
+ regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
+ if (regex) {
+ pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
+ pcre32_free(regex);
+ }
#endif
/* Shouldn't ever happen. */
if (!default_tables)
@@ -771,6 +786,13 @@ static pcre16_jit_stack* callback16(void *arg)
}
#endif
+#ifdef SUPPORT_PCRE32
+static pcre32_jit_stack* callback32(void *arg)
+{
+ return (pcre32_jit_stack *)arg;
+}
+#endif
+
#ifdef SUPPORT_PCRE8
static void setstack8(pcre_extra *extra)
{
@@ -809,12 +831,31 @@ static void setstack16(pcre16_extra *extra)
}
#endif /* SUPPORT_PCRE8 */
+#ifdef SUPPORT_PCRE32
+static void setstack32(pcre32_extra *extra)
+{
+ static pcre32_jit_stack *stack;
+
+ if (!extra) {
+ if (stack)
+ pcre32_jit_stack_free(stack);
+ stack = NULL;
+ return;
+ }
+
+ if (!stack)
+ stack = pcre32_jit_stack_alloc(1, 1024 * 1024);
+ /* Extra can be NULL. */
+ pcre32_assign_jit_stack(extra, callback32, stack);
+}
+#endif /* SUPPORT_PCRE8 */
+
#ifdef SUPPORT_PCRE16
static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
{
unsigned char *iptr = (unsigned char*)input;
- unsigned short *optr = (unsigned short *)output;
+ PCRE_UCHAR16 *optr = output;
unsigned int c;
if (max_length == 0)
@@ -843,7 +884,7 @@ static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *o
max_length--;
} else if (max_length <= 2) {
*optr = '\0';
- return (int)(optr - (unsigned short *)output);
+ return (int)(optr - output);
} else {
c -= 0x10000;
*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
@@ -856,13 +897,13 @@ static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *o
if (offsetmap)
*offsetmap = (int)(iptr - (unsigned char*)input);
*optr = '\0';
- return (int)(optr - (unsigned short *)output);
+ return (int)(optr - output);
}
static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
{
unsigned char *iptr = (unsigned char*)input;
- unsigned short *optr = (unsigned short *)output;
+ PCRE_UCHAR16 *optr = output;
if (max_length == 0)
return 0;
@@ -872,15 +913,75 @@ static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max
max_length--;
}
*optr = '\0';
- return (int)(optr - (unsigned short *)output);
+ return (int)(optr - output);
}
-#define REGTEST_MAX_LENGTH 4096
-static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
-static int regtest_offsetmap[REGTEST_MAX_LENGTH];
+#define REGTEST_MAX_LENGTH16 4096
+static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
+static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
#endif /* SUPPORT_PCRE16 */
+#ifdef SUPPORT_PCRE32
+
+static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
+{
+ unsigned char *iptr = (unsigned char*)input;
+ PCRE_UCHAR32 *optr = output;
+ unsigned int c;
+
+ if (max_length == 0)
+ return 0;
+
+ while (*iptr && max_length > 1) {
+ c = 0;
+ if (offsetmap)
+ *offsetmap++ = (int)(iptr - (unsigned char*)input);
+
+ if (!(*iptr & 0x80))
+ c = *iptr++;
+ else if (!(*iptr & 0x20)) {
+ c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
+ iptr += 2;
+ } else if (!(*iptr & 0x10)) {
+ c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
+ iptr += 3;
+ } else if (!(*iptr & 0x08)) {
+ c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
+ iptr += 4;
+ }
+
+ *optr++ = c;
+ max_length--;
+ }
+ if (offsetmap)
+ *offsetmap = (int)(iptr - (unsigned char*)input);
+ *optr = 0;
+ return (int)(optr - output);
+}
+
+static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
+{
+ unsigned char *iptr = (unsigned char*)input;
+ PCRE_UCHAR32 *optr = output;
+
+ if (max_length == 0)
+ return 0;
+
+ while (*iptr && max_length > 1) {
+ *optr++ = *iptr++;
+ max_length--;
+ }
+ *optr = '\0';
+ return (int)(optr - output);
+}
+
+#define REGTEST_MAX_LENGTH32 4096
+static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
+static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
+
+#endif /* SUPPORT_PCRE32 */
+
static int check_ascii(const char *input)
{
const unsigned char *ptr = (unsigned char *)input;
@@ -904,16 +1005,16 @@ static int regression_tests(void)
int successful_row = 0;
int counter = 0;
int study_mode;
+ int utf = 0, ucp = 0;
+ int disabled_flags = 0;
#ifdef SUPPORT_PCRE8
pcre *re8;
pcre_extra *extra8;
pcre_extra dummy_extra8;
int ovector8_1[32];
int ovector8_2[32];
- int return_value8_1, return_value8_2;
+ int return_value8[2];
unsigned char *mark8_1, *mark8_2;
- int utf8 = 0, ucp8 = 0;
- int disabled_flags8 = 0;
#endif
#ifdef SUPPORT_PCRE16
pcre16 *re16;
@@ -921,43 +1022,59 @@ static int regression_tests(void)
pcre16_extra dummy_extra16;
int ovector16_1[32];
int ovector16_2[32];
- int return_value16_1, return_value16_2;
+ int return_value16[2];
PCRE_UCHAR16 *mark16_1, *mark16_2;
- int utf16 = 0, ucp16 = 0;
- int disabled_flags16 = 0;
int length16;
#endif
+#ifdef SUPPORT_PCRE32
+ pcre32 *re32;
+ pcre32_extra *extra32;
+ pcre32_extra dummy_extra32;
+ int ovector32_1[32];
+ int ovector32_2[32];
+ int return_value32[2];
+ PCRE_UCHAR32 *mark32_1, *mark32_2;
+ int length32;
+#endif
/* This test compares the behaviour of interpreter and JIT. Although disabling
utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
still considered successful from pcre_jit_test point of view. */
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8
pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
-#else
+#elif defined SUPPORT_PCRE16
pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
+#elif defined SUPPORT_PCRE32
+ pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
#endif
printf("Running JIT regression tests\n");
printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
+#if defined SUPPORT_PCRE8
+ pcre_config(PCRE_CONFIG_UTF8, &utf);
+ pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+#elif defined SUPPORT_PCRE16
+ pcre16_config(PCRE_CONFIG_UTF16, &utf);
+ pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+#elif defined SUPPORT_PCRE16
+ pcre32_config(PCRE_CONFIG_UTF32, &utf);
+ pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+#endif
+
+ if (!utf)
+ disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
+ if (!ucp)
+ disabled_flags |= PCRE_UCP;
#ifdef SUPPORT_PCRE8
- pcre_config(PCRE_CONFIG_UTF8, &utf8);
- pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
- if (!utf8)
- disabled_flags8 |= PCRE_UTF8;
- if (!ucp8)
- disabled_flags8 |= PCRE_UCP;
- printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
+ printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
#endif
#ifdef SUPPORT_PCRE16
- pcre16_config(PCRE_CONFIG_UTF16, &utf16);
- pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
- if (!utf16)
- disabled_flags16 |= PCRE_UTF8;
- if (!ucp16)
- disabled_flags16 |= PCRE_UCP;
- printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
+ printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
+#endif
+#ifdef SUPPORT_PCRE32
+ printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
#endif
while (current->pattern) {
@@ -982,7 +1099,7 @@ static int regression_tests(void)
re8 = NULL;
if (!(current->start_offset & F_NO8))
re8 = pcre_compile(current->pattern,
- current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
&error, &err_offs, tables(0));
extra8 = NULL;
@@ -1001,19 +1118,19 @@ static int regression_tests(void)
re8 = NULL;
}
extra8->flags |= PCRE_EXTRA_MARK;
- } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
- printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
+ } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO8))
+ printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
#endif
#ifdef SUPPORT_PCRE16
- if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
- convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
+ if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
+ convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
else
- copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
+ copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
re16 = NULL;
if (!(current->start_offset & F_NO16))
- re16 = pcre16_compile(regtest_buf,
- current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
+ re16 = pcre16_compile(regtest_buf16,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
&error, &err_offs, tables(0));
extra16 = NULL;
@@ -1032,8 +1149,39 @@ static int regression_tests(void)
re16 = NULL;
}
extra16->flags |= PCRE_EXTRA_MARK;
- } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
- printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
+ } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO16))
+ printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
+#endif
+#ifdef SUPPORT_PCRE32
+ if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
+ convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
+ else
+ copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
+
+ re32 = NULL;
+ if (!(current->start_offset & F_NO32))
+ re32 = pcre32_compile(regtest_buf32,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
+ &error, &err_offs, tables(0));
+
+ extra32 = NULL;
+ if (re32) {
+ error = NULL;
+ extra32 = pcre32_study(re32, study_mode, &error);
+ if (!extra32) {
+ printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
+ pcre32_free(re32);
+ re32 = NULL;
+ }
+ if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+ printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
+ pcre32_free_study(extra32);
+ pcre32_free(re32);
+ re32 = NULL;
+ }
+ extra32->flags |= PCRE_EXTRA_MARK;
+ } else if (((utf && ucp) || is_ascii_pattern) && !(current->start_offset & F_NO32))
+ printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
#endif
counter++;
@@ -1044,11 +1192,14 @@ static int regression_tests(void)
#ifdef SUPPORT_PCRE16
setstack16(NULL);
#endif
+#ifdef SUPPORT_PCRE32
+ setstack32(NULL);
+#endif
}
#ifdef SUPPORT_PCRE8
- return_value8_1 = -1000;
- return_value8_2 = -1000;
+ return_value8[0] = -1000;
+ return_value8[1] = -1000;
for (i = 0; i < 32; ++i)
ovector8_1[i] = -2;
for (i = 0; i < 32; ++i)
@@ -1058,19 +1209,19 @@ static int regression_tests(void)
mark8_2 = NULL;
setstack8(extra8);
extra8->mark = &mark8_1;
- return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
+ return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
memset(&dummy_extra8, 0, sizeof(pcre_extra));
dummy_extra8.flags = PCRE_EXTRA_MARK;
dummy_extra8.mark = &mark8_2;
- return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
+ return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
}
#endif
#ifdef SUPPORT_PCRE16
- return_value16_1 = -1000;
- return_value16_2 = -1000;
+ return_value16[0] = -1000;
+ return_value16[1] = -1000;
for (i = 0; i < 32; ++i)
ovector16_1[i] = -2;
for (i = 0; i < 32; ++i)
@@ -1079,80 +1230,189 @@ static int regression_tests(void)
mark16_1 = NULL;
mark16_2 = NULL;
setstack16(extra16);
- if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
- length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
+ if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
+ length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
else
- length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
+ length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
extra16->mark = &mark16_1;
- return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
+ return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
memset(&dummy_extra16, 0, sizeof(pcre16_extra));
dummy_extra16.flags = PCRE_EXTRA_MARK;
dummy_extra16.mark = &mark16_2;
- return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
+ return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
}
#endif
- /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
+#ifdef SUPPORT_PCRE32
+ return_value32[0] = -1000;
+ return_value32[1] = -1000;
+ for (i = 0; i < 32; ++i)
+ ovector32_1[i] = -2;
+ for (i = 0; i < 32; ++i)
+ ovector32_2[i] = -2;
+ if (re32) {
+ mark32_1 = NULL;
+ mark32_2 = NULL;
+ setstack32(extra32);
+ if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
+ length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
+ else
+ length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
+ extra32->mark = &mark32_1;
+ return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+ memset(&dummy_extra32, 0, sizeof(pcre32_extra));
+ dummy_extra32.flags = PCRE_EXTRA_MARK;
+ dummy_extra32.mark = &mark32_2;
+ return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
+ }
+#endif
+
+ /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
+ * return_value8[0], return_value16[0],
+ * ovector8_1[0], ovector8_1[1],
+ * ovector16_1[0], ovector16_1[1],
+ * ovector32_1[0], ovector32_1[1],
+ * (current->flags & PCRE_CASELESS) ? "C" : ""); */
/* If F_DIFF is set, just run the test, but do not compare the results.
Segfaults can still be captured. */
is_successful = 1;
if (!(current->start_offset & F_DIFF)) {
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 && defined SUPPORT_UTF
- if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
+#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
+ if (!(current->start_offset & F_FORCECONV)) {
+ int return_value;
+
/* All results must be the same. */
- if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
- printf("\n8 and 16 bit: Return value differs(J8:%d,I8:%d,J16:%d,I16%d): [%d] '%s' @ '%s'\n",
- return_value8_1, return_value8_2, return_value16_1, return_value16_2,
+#ifdef SUPPORT_PCRE8
+ if ((return_value = return_value8[0]) != return_value8[1]) {
+ printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
+ return_value8[0], return_value8[1], total, current->pattern, current->input);
+ is_successful = 0;
+ } else
+#endif
+#ifdef SUPPORT_PCRE16
+ if ((return_value = return_value16[0]) != return_value16[1]) {
+ printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
+ return_value16[0], return_value16[1], total, current->pattern, current->input);
+ is_successful = 0;
+ } else
+#endif
+#ifdef SUPPORT_PCRE32
+ if ((return_value = return_value32[0]) != return_value32[1]) {
+ printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
+ return_value32[0], return_value32[1], total, current->pattern, current->input);
+ is_successful = 0;
+ } else
+#endif
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+ if (return_value8[0] != return_value16[0]) {
+ printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
+ return_value8[0], return_value16[0],
total, current->pattern, current->input);
is_successful = 0;
- } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
- if (return_value8_1 == PCRE_ERROR_PARTIAL) {
- return_value8_1 = 2;
- return_value16_1 = 2;
+ } else
+#endif
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
+ if (return_value8[0] != return_value32[0]) {
+ printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
+ return_value8[0], return_value32[0],
+ total, current->pattern, current->input);
+ is_successful = 0;
+ } else
+#endif
+#if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
+ if (return_value16[0] != return_value32[0]) {
+ printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
+ return_value16[0], return_value32[0],
+ total, current->pattern, current->input);
+ is_successful = 0;
+ } else
+#endif
+ if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
+ if (return_value == PCRE_ERROR_PARTIAL) {
+ return_value = 2;
} else {
- return_value8_1 *= 2;
- return_value16_1 *= 2;
+ return_value *= 2;
}
-
+#ifdef SUPPORT_PCRE8
+ return_value8[0] = return_value;
+#endif
+#ifdef SUPPORT_PCRE16
+ return_value16[0] = return_value;
+#endif
+#ifdef SUPPORT_PCRE32
+ return_value32[0] = return_value;
+#endif
/* Transform back the results. */
if (current->flags & PCRE_UTF8) {
- for (i = 0; i < return_value8_1; ++i) {
+#ifdef SUPPORT_PCRE16
+ for (i = 0; i < return_value; ++i) {
if (ovector16_1[i] >= 0)
- ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
+ ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
if (ovector16_2[i] >= 0)
- ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
+ ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
}
- }
+#endif
+#ifdef SUPPORT_PCRE32
+ for (i = 0; i < return_value; ++i) {
+ if (ovector32_1[i] >= 0)
+ ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
+ if (ovector32_2[i] >= 0)
+ ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
+ }
+#endif
+ }
- for (i = 0; i < return_value8_1; ++i)
+ for (i = 0; i < return_value; ++i) {
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
- printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16%d): [%d] '%s' @ '%s' \n",
+ printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
+#endif
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
+ if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
+ printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
+ i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+#endif
+#if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
+ if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
+ printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
+ i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+#endif
+ }
}
- } else {
-#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
+ } else
+#endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
+ {
/* Only the 8 bit and 16 bit results must be equal. */
#ifdef SUPPORT_PCRE8
- if (return_value8_1 != return_value8_2) {
- printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
- return_value8_1, return_value8_2, total, current->pattern, current->input);
+ if (return_value8[0] != return_value8[1]) {
+ printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ return_value8[0], return_value8[1], total, current->pattern, current->input);
is_successful = 0;
- } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
- if (return_value8_1 == PCRE_ERROR_PARTIAL)
- return_value8_1 = 2;
+ } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
+ if (return_value8[0] == PCRE_ERROR_PARTIAL)
+ return_value8[0] = 2;
else
- return_value8_1 *= 2;
+ return_value8[0] *= 2;
- for (i = 0; i < return_value8_1; ++i)
+ for (i = 0; i < return_value8[0]; ++i)
if (ovector8_1[i] != ovector8_2[i]) {
- printf("\n8 bit: Ovector[%d] value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
+ printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
is_successful = 0;
}
@@ -1160,40 +1420,57 @@ static int regression_tests(void)
#endif
#ifdef SUPPORT_PCRE16
- if (return_value16_1 != return_value16_2) {
- printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
- return_value16_1, return_value16_2, total, current->pattern, current->input);
+ if (return_value16[0] != return_value16[1]) {
+ printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ return_value16[0], return_value16[1], total, current->pattern, current->input);
is_successful = 0;
- } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
- if (return_value16_1 == PCRE_ERROR_PARTIAL)
- return_value16_1 = 2;
+ } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
+ if (return_value16[0] == PCRE_ERROR_PARTIAL)
+ return_value16[0] = 2;
else
- return_value16_1 *= 2;
+ return_value16[0] *= 2;
- for (i = 0; i < return_value16_1; ++i)
+ for (i = 0; i < return_value16[0]; ++i)
if (ovector16_1[i] != ovector16_2[i]) {
- printf("\n16 bit: Ovector[%d] value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
+ printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
is_successful = 0;
}
}
#endif
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 && defined SUPPORT_UTF
- }
-#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
+#ifdef SUPPORT_PCRE32
+ if (return_value32[0] != return_value32[1]) {
+ printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ return_value32[0], return_value32[1], total, current->pattern, current->input);
+ is_successful = 0;
+ } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
+ if (return_value32[0] == PCRE_ERROR_PARTIAL)
+ return_value32[0] = 2;
+ else
+ return_value32[0] *= 2;
+
+ for (i = 0; i < return_value32[0]; ++i)
+ if (ovector32_1[i] != ovector32_2[i]) {
+ printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
+ is_successful = 0;
+ }
+ }
+#endif
+ }
}
if (is_successful) {
#ifdef SUPPORT_PCRE8
- if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
- if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
+ if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii_input)) {
+ if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
is_successful = 0;
}
- if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
+ if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
is_successful = 0;
@@ -1201,20 +1478,35 @@ static int regression_tests(void)
}
#endif
#ifdef SUPPORT_PCRE16
- if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
- if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
+ if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii_input)) {
+ if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
is_successful = 0;
}
- if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
+ if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
is_successful = 0;
}
}
#endif
+#ifdef SUPPORT_PCRE32
+ if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii_input)) {
+ if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
+ printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+
+ if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
+ printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+ }
+#endif
}
if (is_successful) {
@@ -1232,6 +1524,13 @@ static int regression_tests(void)
is_successful = 0;
}
#endif
+#ifdef SUPPORT_PCRE32
+ if (mark32_1 != mark32_2) {
+ printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+#endif
}
#ifdef SUPPORT_PCRE8
@@ -1246,6 +1545,12 @@ static int regression_tests(void)
pcre16_free(re16);
}
#endif
+#ifdef SUPPORT_PCRE32
+ if (re32) {
+ pcre32_free_study(extra32);
+ pcre32_free(re32);
+ }
+#endif
if (is_successful) {
successful++;
@@ -1268,6 +1573,9 @@ static int regression_tests(void)
#ifdef SUPPORT_PCRE16
setstack16(NULL);
#endif
+#ifdef SUPPORT_PCRE32
+ setstack32(NULL);
+#endif
if (total == successful) {
printf("\nAll JIT regression tests are successfully passed.\n");
diff --git a/pcre_maketables.c b/pcre_maketables.c
index 8e466cc..610a669 100644
--- a/pcre_maketables.c
+++ b/pcre_maketables.c
@@ -66,12 +66,15 @@ Arguments: none
Returns: pointer to the contiguous block of data
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
const unsigned char *
pcre_maketables(void)
-#else
+#elif defined COMPILE_PCRE16
const unsigned char *
pcre16_maketables(void)
+#elif defined COMPILE_PCRE32
+const unsigned char *
+pcre32_maketables(void)
#endif
{
unsigned char *yield, *p;
diff --git a/pcre_newline.c b/pcre_newline.c
index 5e257d8..1919f47 100644
--- a/pcre_newline.c
+++ b/pcre_newline.c
@@ -117,7 +117,7 @@ else switch(c)
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
-#else /* 16-bit (can't be EBCDIC) */
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
@@ -196,7 +196,7 @@ else switch(c)
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
-#else
+#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
diff --git a/pcre_printint.c b/pcre_printint.c
index 108d72f..02077d6 100644
--- a/pcre_printint.c
+++ b/pcre_printint.c
@@ -78,10 +78,13 @@ having a separate .h file just for this. */
#ifdef PCRE_INCLUDED
static /* Keep the following function as private. */
#endif
-#ifdef COMPILE_PCRE8
+
+#if defined COMPILE_PCRE8
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
-#else
+#elif defined COMPILE_PCRE16
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#elif defined COMPILE_PCRE32
+void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#endif
/* Macro that decides whether a character should be output as a literal or in
@@ -114,23 +117,23 @@ static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
static int
print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
{
-int c = *ptr;
+pcre_uint32 c = *ptr;
#ifndef SUPPORT_UTF
(void)utf; /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c);
+if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%x}", c);
return 0;
#else
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if (!utf || (c & 0xc0) != 0xc0)
{
- if (PRINTABLE(c)) fprintf(f, "%c", c);
+ if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
@@ -162,13 +165,11 @@ else
return a;
}
-#else
-
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
if (!utf || (c & 0xfc00) != 0xd800)
{
- if (PRINTABLE(c)) fprintf(f, "%c", c);
+ if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
@@ -190,9 +191,25 @@ else
return 1;
}
-#endif /* COMPILE_PCRE16 */
+#elif defined COMPILE_PCRE32
-#endif /* COMPILE_PCRE8 */
+if (!utf || (c & 0xfffff800u) != 0xd800u)
+ {
+ if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+ else if (c <= 0x80) fprintf(f, "\\x%02x", c);
+ else fprintf(f, "\\x{%x}", c);
+ return 0;
+ }
+else
+ {
+ /* This is a check for malformed UTF-32; it should only occur if the sanity
+ check has been turned off. Rather than swallow a surrogate, just stop if
+ we hit one. Print it with \X instead of \x as an indication. */
+ fprintf(f, "\\X{%x}", c);
+ return 0;
+ }
+
+#endif /* COMPILE_PCRE[8|16|32] */
#endif /* SUPPORT_UTF */
}
@@ -281,12 +298,15 @@ written that do not depend on the value of LINK_SIZE. */
#ifdef PCRE_INCLUDED
static /* Keep the following function as private. */
#endif
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
void
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
-#else
+#elif defined COMPILE_PCRE16
void
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#elif defined COMPILE_PCRE32
+void
+pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)external_re;
@@ -310,7 +330,7 @@ if (re->magic_number != MAGIC_NUMBER)
}
code = codestart = (pcre_uchar *)re + offset + count * size;
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
for(;;)
diff --git a/pcre_refcount.c b/pcre_refcount.c
index 441e4dc..79efa90 100644
--- a/pcre_refcount.c
+++ b/pcre_refcount.c
@@ -68,12 +68,15 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_refcount(pcre16 *argument_re, int adjust)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_refcount(pcre32 *argument_re, int adjust)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
diff --git a/pcre_study.c b/pcre_study.c
index 805c28f..cc9e4f8 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -224,7 +224,7 @@ for (;;)
case OP_NOTPOSPLUSI:
branchlength++;
cc += 2;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -245,7 +245,7 @@ for (;;)
case OP_NOTEXACTI:
branchlength += GET2(cc,1);
cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -486,7 +486,7 @@ for (;;)
case OP_NOTPOSQUERYI:
cc += PRIV(OP_lengths)[op];
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
@@ -538,7 +538,7 @@ Arguments:
p points to the character
caseless the caseless flag
cd the block with char table pointers
- utf TRUE for UTF-8 / UTF-16 mode
+ utf TRUE for UTF-8 / UTF-16 / UTF-32 mode
Returns: pointer after the character
*/
@@ -577,7 +577,7 @@ if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
return p + 1;
#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
if (c > 0xff)
{
c = 0xff;
@@ -701,7 +701,7 @@ function fails unless the result is SSB_DONE.
Arguments:
code points to an expression
start_bits points to a 32-byte table, initialized to 0
- utf TRUE if in UTF-8 / UTF-16 mode
+ utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cd the block with char table pointers
Returns: SSB_FAIL => Failed to find any starting bytes
@@ -1000,11 +1000,10 @@ do
SET_BIT(0xE1); /* For U+1680, U+180E */
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
SET_BIT(0xE3); /* For U+3000 */
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xA0);
SET_BIT(0xFF); /* For characters > 255 */
-#endif /* COMPILE_PCRE16 */
+#endif /* COMPILE_PCRE[8|16|32] */
}
else
#endif /* SUPPORT_UTF */
@@ -1012,9 +1011,9 @@ do
#ifndef EBCDIC
SET_BIT(0xA0);
#endif /* Not EBCDIC */
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF); /* For characters > 255 */
-#endif /* COMPILE_PCRE16 */
+#endif /* COMPILE_PCRE[16|32] */
}
try_next = FALSE;
break;
@@ -1031,17 +1030,16 @@ do
#ifdef COMPILE_PCRE8
SET_BIT(0xC2); /* For U+0085 */
SET_BIT(0xE2); /* For U+2028, U+2029 */
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(CHAR_NEL);
SET_BIT(0xFF); /* For characters > 255 */
-#endif /* COMPILE_PCRE16 */
+#endif /* COMPILE_PCRE[8|16|32] */
}
else
#endif /* SUPPORT_UTF */
{
SET_BIT(CHAR_NEL);
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF); /* For characters > 255 */
#endif
}
@@ -1139,11 +1137,10 @@ do
SET_BIT(0xE1); /* For U+1680, U+180E */
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
SET_BIT(0xE3); /* For U+3000 */
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xA0);
SET_BIT(0xFF); /* For characters > 255 */
-#endif /* COMPILE_PCRE16 */
+#endif /* COMPILE_PCRE[8|16|32] */
}
else
#endif /* SUPPORT_UTF */
@@ -1164,8 +1161,7 @@ do
#ifdef COMPILE_PCRE8
SET_BIT(0xC2); /* For U+0085 */
SET_BIT(0xE2); /* For U+2028, U+2029 */
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(CHAR_NEL);
SET_BIT(0xFF); /* For characters > 255 */
#endif /* COMPILE_PCRE16 */
@@ -1229,7 +1225,7 @@ do
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
}
#endif
-#ifdef COMPILE_PCRE16
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF); /* For characters > 255 */
#endif
/* Fall through */
@@ -1325,12 +1321,15 @@ Returns: pointer to a pcre[16]_extra block, with study_data filled in and
NULL on error or if no optimization possible
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
+pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
#endif
{
int min;
@@ -1353,10 +1352,12 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER)
if ((re->flags & PCRE_MODE) == 0)
{
-#ifdef COMPILE_PCRE8
- *errorptr = "argument is compiled in 16 bit mode";
-#else
- *errorptr = "argument is compiled in 8 bit mode";
+#if defined COMPILE_PCRE8
+ *errorptr = "argument not compiled in 8 bit mode";
+#elif defined COMPILE_PCRE16
+ *errorptr = "argument not compiled in 16 bit mode";
+#elif defined COMPILE_PCRE32
+ *errorptr = "argument not compiled in 32 bit mode";
#endif
return NULL;
}
@@ -1383,14 +1384,18 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
tables = re->tables;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
if (tables == NULL)
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
-#else
+#elif defined COMPILE_PCRE16
if (tables == NULL)
(void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
+#elif defined COMPILE_PCRE32
+ if (tables == NULL)
+ (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+ (void *)(&tables));
#endif
compile_block.lcc = tables + lcc_offset;
@@ -1503,11 +1508,12 @@ if (bits_set || min > 0 || (options & (
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
(options & PCRE_STUDY_EXTRA_NEEDED) == 0)
{
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
pcre_free_study(extra);
-#endif
-#ifdef COMPILE_PCRE16
+#elif defined COMPILE_PCRE16
pcre16_free_study(extra);
+#elif defined COMPILE_PCRE32
+ pcre32_free_study(extra);
#endif
extra = NULL;
}
@@ -1528,12 +1534,15 @@ Argument: a pointer to the pcre[16]_extra block
Returns: nothing
*/
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void
pcre_free_study(pcre_extra *extra)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void
pcre16_free_study(pcre16_extra *extra)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN void
+pcre32_free_study(pcre32_extra *extra)
#endif
{
if (extra == NULL)
diff --git a/pcre_tables.c b/pcre_tables.c
index ee16a47..8b0a832 100644
--- a/pcre_tables.c
+++ b/pcre_tables.c
@@ -74,9 +74,9 @@ const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST };
character. */
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
- || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
+ || (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
-/* These tables are also required by pcretest in 16 bit mode. */
+/* These tables are also required by pcretest in 16- or 32-bit mode. */
const int PRIV(utf8_table1)[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
@@ -98,7 +98,7 @@ const pcre_uint8 PRIV(utf8_table4)[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
-#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
+#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/
#ifdef SUPPORT_UTF
diff --git a/pcre_version.c b/pcre_version.c
index 58a0eaa..ae86ff2 100644
--- a/pcre_version.c
+++ b/pcre_version.c
@@ -79,12 +79,15 @@ I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
-#else
+#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre16_version(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
+pcre32_version(void)
#endif
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
diff --git a/pcreposix.c b/pcreposix.c
index 1730d12..9898e5b 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -157,11 +157,12 @@ static const int eint[] = {
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
REG_BADPAT, /* \N is not supported in a class */
REG_BADPAT, /* too many forward references */
- REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
+ REG_BADPAT, /* disallowed UTF-8/16/32 code point (>= 0xd800 && <= 0xdfff) */
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
/* 75 */
REG_BADPAT, /* overlong MARK name */
- REG_BADPAT /* character value in \u.... sequence is too large */
+ REG_BADPAT, /* character value in \u.... sequence is too large */
+ REG_BADPAT /* invalid UTF-32 string (should not occur) */
};
/* Table of texts corresponding to POSIX error codes */
@@ -316,7 +317,7 @@ int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
BOOL allocated_ovector = FALSE;
BOOL nosub =
- (((const pcre *)preg->re_pcre)->options & PCRE_NO_AUTO_CAPTURE) != 0;
+ (REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0;
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
diff --git a/pcretest.c b/pcretest.c
index 1b9d6ef..eba3f48 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -134,10 +134,18 @@ appropriately for an application, not for building PCRE. */
#include "pcre.h"
-#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
+#if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
+/* Configure internal macros to 32 bit mode. */
+#define COMPILE_PCRE32
+#endif
+#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
/* Configure internal macros to 16 bit mode. */
#define COMPILE_PCRE16
#endif
+#if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
+/* Configure internal macros to 16 bit mode. */
+#define COMPILE_PCRE8
+#endif
#include "pcre_internal.h"
@@ -152,6 +160,9 @@ void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#ifdef SUPPORT_PCRE16
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#endif
+#ifdef SUPPORT_PCRE32
+void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
/* We need access to some of the data tables that PCRE uses. So as not to have
to keep two copies, we include the source files here, changing the names of the
@@ -176,8 +187,8 @@ that differ in their output from isprint() even in the "C" locale. */
#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
-/* Posix support is disabled in 16 bit only mode. */
-#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
+/* Posix support is disabled in 16 or 32 bit only mode. */
+#if !defined SUPPORT_PCRE8 && !defined NOPOSIX
#define NOPOSIX
#endif
@@ -222,7 +233,7 @@ argument, the casting might be incorrectly applied. */
#define PCHARSV8(p, offset, len, f) \
(void)pchars((pcre_uint8 *)(p) + offset, len, f)
-#define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
+#define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
p = read_capture_name8(p, cn8, re)
#define STRLEN8(p) ((int)strlen((char *)p))
@@ -304,7 +315,7 @@ argument, the casting might be incorrectly applied. */
#define PCHARSV16(p, offset, len, f) \
(void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
-#define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
+#define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
p = read_capture_name16(p, cn16, re)
#define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
@@ -383,49 +394,156 @@ argument, the casting might be incorrectly applied. */
#endif /* SUPPORT_PCRE16 */
+/* -----------------------------------------------------------*/
+
+#ifdef SUPPORT_PCRE32
+
+#define PCHARS32(lv, p, offset, len, f) \
+ lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
+
+#define PCHARSV32(p, offset, len, f) \
+ (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
+
+#define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
+ p = read_capture_name32(p, cn32, re)
+
+#define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
+
+#define SET_PCRE_CALLOUT32(callout) \
+ pcre32_callout = (int (*)(pcre32_callout_block *))callout
+
+#define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
+ pcre32_assign_jit_stack((pcre32_extra *)extra, \
+ (pcre32_jit_callback)callback, userdata)
+
+#define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
+ re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
+ tables)
+
+#define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
+ namesptr, cbuffer, size) \
+ rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
+ count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
+
+#define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
+ rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
+ (PCRE_UCHAR32 *)cbuffer, size/2)
+
+#define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets, workspace, size_workspace) \
+ count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
+ (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
+ workspace, size_workspace)
+
+#define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets) \
+ count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
+ len, start_offset, options, offsets, size_offsets)
+
+#define PCRE_FREE_STUDY32(extra) \
+ pcre32_free_study((pcre32_extra *)extra)
+
+#define PCRE_FREE_SUBSTRING32(substring) \
+ pcre32_free_substring((PCRE_SPTR32)substring)
+
+#define PCRE_FREE_SUBSTRING_LIST32(listptr) \
+ pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
+
+#define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
+ getnamesptr, subsptr) \
+ rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
+ count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
+
+#define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
+ n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
+
+#define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
+ rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
+ (PCRE_SPTR32 *)(void*)subsptr)
+
+#define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
+ rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
+ (PCRE_SPTR32 **)(void*)listptr)
+
+#define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
+ rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
+ tables)
+
+#define PCRE_PRINTINT32(re, outfile, debug_lengths) \
+ pcre32_printint(re, outfile, debug_lengths)
+
+#define PCRE_STUDY32(extra, re, options, error) \
+ extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
+
+#define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
+ (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
+
+#define PCRE_JIT_STACK_FREE32(stack) \
+ pcre32_jit_stack_free((pcre32_jit_stack *)stack)
+
+#endif /* SUPPORT_PCRE32 */
+
/* ----- Both modes are supported; a runtime test is needed, except for
pcre_config(), and the JIT stack functions, when it doesn't matter which
version is called. ----- */
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+enum {
+ PCRE8_MODE,
+ PCRE16_MODE,
+ PCRE32_MODE
+};
-#define CHAR_SIZE (use_pcre16? 2:1)
+#if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
+
+#define CHAR_SIZE (1 << pcre_mode)
#define PCHARS(lv, p, offset, len, f) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCHARS32(lv, p, offset, len, f); \
+ else if (pcre_mode == PCRE16_MODE) \
PCHARS16(lv, p, offset, len, f); \
else \
PCHARS8(lv, p, offset, len, f)
#define PCHARSV(p, offset, len, f) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCHARSV32(p, offset, len, f); \
+ else if (pcre_mode == PCRE16_MODE) \
PCHARSV16(p, offset, len, f); \
else \
PCHARSV8(p, offset, len, f)
-#define READ_CAPTURE_NAME(p, cn8, cn16, re) \
- if (use_pcre16) \
- READ_CAPTURE_NAME16(p, cn8, cn16, re); \
+#define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
+ if (pcre_mode == PCRE32_MODE) \
+ READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
+ else if (pcre_mode == PCRE16_MODE) \
+ READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
else \
- READ_CAPTURE_NAME8(p, cn8, cn16, re)
+ READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
#define SET_PCRE_CALLOUT(callout) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ SET_PCRE_CALLOUT32(callout); \
+ else if (pcre_mode == PCRE16_MODE) \
SET_PCRE_CALLOUT16(callout); \
else \
SET_PCRE_CALLOUT8(callout)
-#define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
+#define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
#define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
else \
PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
else \
PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
@@ -434,7 +552,10 @@ version is called. ----- */
#define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
namesptr, cbuffer, size) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
+ namesptr, cbuffer, size); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
namesptr, cbuffer, size); \
else \
@@ -442,14 +563,19 @@ version is called. ----- */
namesptr, cbuffer, size)
#define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
else \
PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
#define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets, workspace, size_workspace) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets, workspace, size_workspace); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets, workspace, size_workspace); \
else \
@@ -458,7 +584,10 @@ version is called. ----- */
#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
+ offsets, size_offsets); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
offsets, size_offsets); \
else \
@@ -466,26 +595,35 @@ version is called. ----- */
offsets, size_offsets)
#define PCRE_FREE_STUDY(extra) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_FREE_STUDY32(extra); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_FREE_STUDY16(extra); \
else \
PCRE_FREE_STUDY8(extra)
#define PCRE_FREE_SUBSTRING(substring) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_FREE_SUBSTRING32(substring); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_FREE_SUBSTRING16(substring); \
else \
PCRE_FREE_SUBSTRING8(substring)
#define PCRE_FREE_SUBSTRING_LIST(listptr) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_FREE_SUBSTRING_LIST32(listptr); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_FREE_SUBSTRING_LIST16(listptr); \
else \
PCRE_FREE_SUBSTRING_LIST8(listptr)
#define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
getnamesptr, subsptr) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
+ getnamesptr, subsptr); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
getnamesptr, subsptr); \
else \
@@ -493,51 +631,67 @@ version is called. ----- */
getnamesptr, subsptr)
#define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
else \
PCRE_GET_STRINGNUMBER8(n, rc, ptr)
#define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
else \
PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
#define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
else \
PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
#define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
- (use_pcre16 ? \
- PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
- :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
+ (pcre_mode == PCRE32_MODE ? \
+ PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
+ : pcre_mode == PCRE16_MODE ? \
+ PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
+ : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
#define PCRE_JIT_STACK_FREE(stack) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_JIT_STACK_FREE32(stack); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_JIT_STACK_FREE16(stack); \
else \
PCRE_JIT_STACK_FREE8(stack)
#define PCRE_MAKETABLES \
- (use_pcre16? pcre16_maketables() : pcre_maketables())
+ (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
else \
PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
#define PCRE_PRINTINT(re, outfile, debug_lengths) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_PRINTINT32(re, outfile, debug_lengths); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_PRINTINT16(re, outfile, debug_lengths); \
else \
PCRE_PRINTINT8(re, outfile, debug_lengths)
#define PCRE_STUDY(extra, re, options, error) \
- if (use_pcre16) \
+ if (pcre_mode == PCRE32_MODE) \
+ PCRE_STUDY32(extra, re, options, error); \
+ else if (pcre_mode == PCRE16_MODE) \
PCRE_STUDY16(extra, re, options, error); \
else \
PCRE_STUDY8(extra, re, options, error)
@@ -574,7 +728,7 @@ version is called. ----- */
/* ----- Only 16-bit mode is supported ----- */
-#else
+#elif defined SUPPORT_PCRE16
#define CHAR_SIZE 2
#define PCHARS PCHARS16
#define PCHARSV PCHARSV16
@@ -601,6 +755,37 @@ version is called. ----- */
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
#define PCRE_PRINTINT PCRE_PRINTINT16
#define PCRE_STUDY PCRE_STUDY16
+
+/* ----- Only 32-bit mode is supported ----- */
+
+#elif defined SUPPORT_PCRE32
+#define CHAR_SIZE 4
+#define PCHARS PCHARS32
+#define PCHARSV PCHARSV32
+#define READ_CAPTURE_NAME READ_CAPTURE_NAME32
+#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
+#define STRLEN STRLEN32
+#define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
+#define PCRE_COMPILE PCRE_COMPILE32
+#define PCRE_CONFIG pcre32_config
+#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
+#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
+#define PCRE_DFA_EXEC PCRE_DFA_EXEC32
+#define PCRE_EXEC PCRE_EXEC32
+#define PCRE_FREE_STUDY PCRE_FREE_STUDY32
+#define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
+#define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
+#define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
+#define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
+#define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
+#define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
+#define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
+#define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
+#define PCRE_MAKETABLES pcre32_maketables()
+#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
+#define PCRE_PRINTINT PCRE_PRINTINT32
+#define PCRE_STUDY PCRE_STUDY32
+
#endif
/* ----- End of mode-specific function call macros ----- */
@@ -652,22 +837,22 @@ static pcre_uint8 *pbuffer = NULL;
/* Another buffer is needed translation to 16-bit character strings. It will
obtained and extended as required. */
-#ifdef SUPPORT_PCRE16
-static int buffer16_size = 0;
-static pcre_uint16 *buffer16 = NULL;
-
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
-/* We need the table of operator lengths that is used for 16-bit compiling, in
+/* We need the table of operator lengths that is used for 16/32-bit compiling, in
order to swap bytes in a pattern for saving/reloading testing. Luckily, the
data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
-appropriately for the 16-bit world. Just as a safety check, make sure that
-COMPILE_PCRE16 is *not* set. */
+appropriately for the 16/32-bit world. Just as a safety check, make sure that
+COMPILE_PCRE[16|32] is *not* set. */
#ifdef COMPILE_PCRE16
#error COMPILE_PCRE16 must not be set when compiling pcretest.c
#endif
+#ifdef COMPILE_PCRE32
+#error COMPILE_PCRE32 must not be set when compiling pcretest.c
+#endif
+
#if LINK_SIZE == 2
#undef LINK_SIZE
#define LINK_SIZE 1
@@ -681,19 +866,30 @@ COMPILE_PCRE16 is *not* set. */
#undef IMM2_SIZE
#define IMM2_SIZE 1
-#endif /* SUPPORT_PCRE8 */
+#endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
+#ifdef SUPPORT_PCRE16
+static int buffer16_size = 0;
+static pcre_uint16 *buffer16 = NULL;
static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
#endif /* SUPPORT_PCRE16 */
-/* If we have 8-bit support, default use_pcre16 to false; if there is also
-16-bit support, it can be changed by an option. If there is no 8-bit support,
-there must be 16-bit support, so default it to 1. */
+#ifdef SUPPORT_PCRE32
+static int buffer32_size = 0;
+static pcre_uint32 *buffer32 = NULL;
+static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
+#endif /* SUPPORT_PCRE32 */
-#ifdef SUPPORT_PCRE8
-static int use_pcre16 = 0;
-#else
-static int use_pcre16 = 1;
+/* If we have 8-bit support, default to it; if there is also
+16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
+there must be 16-or 32-bit support, so default it to 1. */
+
+#if defined SUPPORT_PCRE8
+static int pcre_mode = PCRE8_MODE;
+#elif defined SUPPORT_PCRE16
+static int pcre_mode = PCRE16_MODE;
+#elif defined SUPPORT_PCRE32
+static int pcre_mode = PCRE32_MODE;
#endif
/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
@@ -1144,7 +1340,7 @@ return (pcre_jit_stack *)arg;
}
-#if !defined NOUTF || defined SUPPORT_PCRE16
+#if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
/*************************************************
* Convert UTF-8 string to value *
*************************************************/
@@ -1204,7 +1400,7 @@ return i+1;
-#if !defined NOUTF || defined SUPPORT_PCRE16
+#if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
@@ -1316,6 +1512,83 @@ return pp - buffer16;
}
#endif
+#ifdef SUPPORT_PCRE32
+/*************************************************
+* Convert a string to 32-bit *
+*************************************************/
+
+/* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
+8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
+times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
+in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
+result is always left in buffer32.
+
+Note that this function does not object to surrogate values. This is
+deliberate; it makes it possible to construct UTF-32 strings that are invalid,
+for the purpose of testing that they are correctly faulted.
+
+Patterns to be converted are either plain ASCII or UTF-8; data lines are always
+in UTF-8 so that values greater than 255 can be handled.
+
+Arguments:
+ data TRUE if converting a data line; FALSE for a regex
+ p points to a byte string
+ utf true if UTF-8 (to be converted to UTF-32)
+ len number of bytes in the string (excluding trailing zero)
+
+Returns: number of 32-bit data items used (excluding trailing zero)
+ OR -1 if a UTF-8 string is malformed
+ OR -2 if a value > 0x10ffff is encountered
+ OR -3 if an ill-formed value is encountered (i.e. a surrogate)
+*/
+
+static int
+to32(int data, pcre_uint8 *p, int utf, int len)
+{
+pcre_uint32 *pp;
+
+if (buffer32_size < 4*len + 4)
+ {
+ if (buffer32 != NULL) free(buffer32);
+ buffer32_size = 4*len + 4;
+ buffer32 = (pcre_uint32 *)malloc(buffer32_size);
+ if (buffer32 == NULL)
+ {
+ fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
+ exit(1);
+ }
+ }
+
+pp = buffer32;
+
+if (!utf && !data)
+ {
+ while (len-- > 0) *pp++ = *p++;
+ }
+
+else
+ {
+ int c = 0;
+ while (len > 0)
+ {
+ int chlen = utf82ord(p, &c);
+ if (chlen <= 0) return -1;
+ if (utf)
+ {
+ if (c > 0x10ffff) return -2;
+ if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
+ }
+
+ p += chlen;
+ len -= chlen;
+ *pp++ = c;
+ }
+ }
+
+*pp = 0;
+return pp - buffer32;
+}
+#endif
/*************************************************
* Read or extend an input line *
@@ -1542,6 +1815,22 @@ return len;
#endif /* SUPPORT_PCRE16 */
+
+#ifdef SUPPORT_PCRE32
+/*************************************************
+* Find length of 0-terminated 32-bit string *
+*************************************************/
+
+static int strlen32(PCRE_SPTR32 p)
+{
+int len = 0;
+while (*p++ != 0) len++;
+return len;
+}
+#endif /* SUPPORT_PCRE32 */
+
+
+
#ifdef SUPPORT_PCRE16
/*************************************************
* Print 16-bit character string *
@@ -1581,6 +1870,33 @@ return yield;
+#ifdef SUPPORT_PCRE32
+/*************************************************
+* Print 32-bit character string *
+*************************************************/
+
+/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
+If handed a NULL file, just counts chars without printing. */
+
+static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
+{
+int yield = 0;
+
+if (length < 0)
+ length = strlen32(p);
+
+while (length-- > 0)
+ {
+ int c = *p++;
+ yield += pchar(c, f);
+ }
+
+return yield;
+}
+#endif /* SUPPORT_PCRE32 */
+
+
+
#ifdef SUPPORT_PCRE8
/*************************************************
* Read a capture name (8-bit) and check it *
@@ -1634,6 +1950,33 @@ return p;
+#ifdef SUPPORT_PCRE32
+/*************************************************
+* Read a capture name (32-bit) and check it *
+*************************************************/
+
+/* Note that the text being read is 8-bit. */
+
+static pcre_uint8 *
+read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
+{
+pcre_uint32 *npp = *pp;
+while (isalnum(*p)) *npp++ = *p++;
+*npp++ = 0;
+*npp = 0;
+if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
+ {
+ fprintf(outfile, "no parentheses with name \"");
+ PCHARSV(*pp, 0, -1, outfile);
+ fprintf(outfile, "\"\n");
+ }
+*pp = npp;
+return p;
+}
+#endif /* SUPPORT_PCRE32 */
+
+
+
/*************************************************
* Callout function *
*************************************************/
@@ -1791,7 +2134,7 @@ free(block);
*************************************************/
/* Get one piece of information from the pcre_fullinfo() function. When only
-one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
+one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
value, but the code is defensive.
Arguments:
@@ -1808,7 +2151,13 @@ new_info(pcre *re, pcre_extra *study, int option, void *ptr)
{
int rc;
-if (use_pcre16)
+if (pcre_mode == PCRE32_MODE)
+#ifdef SUPPORT_PCRE32
+ rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
+#else
+ rc = PCRE_ERROR_BADMODE;
+#endif
+else if (pcre_mode == PCRE16_MODE)
#ifdef SUPPORT_PCRE16
rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
#else
@@ -1824,10 +2173,11 @@ else
if (rc < 0)
{
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
- use_pcre16? "16" : "", option);
+ pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
if (rc == PCRE_ERROR_BADMODE)
- fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
- "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
+ fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
+ "%d-bit mode\n", 8 * CHAR_SIZE,
+ 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
}
return rc;
@@ -1876,10 +2226,11 @@ bytes in the pattern itself. This is to make it possible to test PCRE's
ability to reload byte-flipped patterns, e.g. those compiled on a different
architecture. */
+#if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
static void
-regexflip(pcre *ere, pcre_extra *extra)
+regexflip8_or_16(pcre *ere, pcre_extra *extra)
{
-REAL_PCRE *re = (REAL_PCRE *)ere;
+real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
#ifdef SUPPORT_PCRE16
int op;
pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
@@ -1916,7 +2267,7 @@ if (extra != NULL)
in the name table, if present, and then in the pattern itself. */
#ifdef SUPPORT_PCRE16
-if (!use_pcre16) return;
+if (pcre_mode != PCRE16_MODE) return;
while(TRUE)
{
@@ -2054,6 +2405,117 @@ while(TRUE)
/* Control should never reach here in 16 bit mode. */
#endif /* SUPPORT_PCRE16 */
}
+#endif /* SUPPORT_PCRE[8|16] */
+
+
+
+#if defined SUPPORT_PCRE32
+static void
+regexflip_32(pcre *ere, pcre_extra *extra)
+{
+real_pcre32 *re = (real_pcre32 *)ere;
+int op;
+pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
+int length = re->name_count * re->name_entry_size;
+#ifdef SUPPORT_UTF
+BOOL utf = (re->options & PCRE_UTF32) != 0;
+#endif /* SUPPORT_UTF */
+
+/* Always flip the bytes in the main data block and study blocks. */
+
+re->magic_number = REVERSED_MAGIC_NUMBER;
+re->size = swap_uint32(re->size);
+re->options = swap_uint32(re->options);
+re->flags = swap_uint16(re->flags);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
+re->first_char = swap_uint32(re->first_char);
+re->req_char = swap_uint32(re->req_char);
+re->name_table_offset = swap_uint16(re->name_table_offset);
+re->name_entry_size = swap_uint16(re->name_entry_size);
+re->name_count = swap_uint16(re->name_count);
+
+if (extra != NULL)
+ {
+ pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
+ rsd->size = swap_uint32(rsd->size);
+ rsd->flags = swap_uint32(rsd->flags);
+ rsd->minlength = swap_uint32(rsd->minlength);
+ }
+
+/* In 32-bit mode we must swap bytes
+in the name table, if present, and then in the pattern itself. */
+
+while(TRUE)
+ {
+ /* Swap previous characters. */
+ while (length-- > 0)
+ {
+ *ptr = swap_uint32(*ptr);
+ ptr++;
+ }
+
+ /* Get next opcode. */
+
+ length = 0;
+ op = *ptr;
+ *ptr++ = swap_uint32(op);
+
+ switch (op)
+ {
+ case OP_END:
+ return;
+
+ default:
+ length = OP_lengths32[op] - 1;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ /* Skip the character bit map. */
+ ptr += 32/sizeof(pcre_uint32);
+ length = 0;
+ break;
+
+ case OP_XCLASS:
+ /* LINK_SIZE can only be 1 in 32-bit mode. */
+ length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
+
+ /* Reverse the size of the XCLASS instance. */
+ *ptr = swap_uint32(*ptr);
+ ptr++;
+
+ op = *ptr;
+ *ptr = swap_uint32(op);
+ ptr++;
+ if ((op & XCL_MAP) != 0)
+ {
+ /* Skip the character bit map. */
+ ptr += 32/sizeof(pcre_uint32);
+ length -= 32/sizeof(pcre_uint32);
+ }
+ break;
+ }
+ }
+/* Control should never reach here in 32 bit mode. */
+}
+
+#endif /* SUPPORT_PCRE32 */
+
+
+
+static void
+regexflip(pcre *ere, pcre_extra *extra)
+{
+#if defined SUPPORT_PCRE32
+ if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
+ regexflip_32(ere, extra);
+#endif
+#if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
+ if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
+ regexflip8_or_16(ere, extra);
+#endif
+}
@@ -2182,6 +2644,9 @@ printf("\nOptions:\n");
#ifdef SUPPORT_PCRE16
printf(" -16 use the 16-bit library\n");
#endif
+#ifdef SUPPORT_PCRE32
+printf(" -32 use the 32-bit library\n");
+#endif
printf(" -b show compiled code\n");
printf(" -C show PCRE compile-time options and exit\n");
printf(" -C arg show a specific compile-time option\n");
@@ -2189,6 +2654,7 @@ printf(" and exit with its value. The arg can be:\n");
printf(" linksize internal link size [2, 3, 4]\n");
printf(" pcre8 8 bit library support enabled [0, 1]\n");
printf(" pcre16 16 bit library support enabled [0, 1]\n");
+printf(" pcre32 32 bit library support enabled [0, 1]\n");
printf(" utf Unicode Transformation Format supported [0, 1]\n");
printf(" ucp Unicode Properties supported [0, 1]\n");
printf(" jit Just-in-time compiler supported [0, 1]\n");
@@ -2266,13 +2732,20 @@ pcre_jit_stack *jit_stack = NULL;
/* These vectors store, end-to-end, a list of zero-terminated captured
substring names, each list itself being terminated by an empty name. Assume
that 1024 is plenty long enough for the few names we'll be testing. It is
-easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
+easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
for the actual memory, to ensure alignment. */
-pcre_uint16 copynames[1024];
-pcre_uint16 getnames[1024];
+pcre_uint32 copynames[1024];
+pcre_uint32 getnames[1024];
+
+#ifdef SUPPORT_PCRE32
+pcre_uint32 *cn32ptr;
+pcre_uint32 *gn32ptr;
+#endif
#ifdef SUPPORT_PCRE16
+pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
+pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
pcre_uint16 *cn16ptr;
pcre_uint16 *gn16ptr;
#endif
@@ -2285,8 +2758,8 @@ pcre_uint8 *gn8ptr;
#endif
/* Get buffers from malloc() so that valgrind will check their misuse when
-debugging. They grow automatically when very long lines are read. The 16-bit
-buffer (buffer16) is obtained only if needed. */
+debugging. They grow automatically when very long lines are read. The 16-
+and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
buffer = (pcre_uint8 *)malloc(buffer_size);
dbuffer = (pcre_uint8 *)malloc(buffer_size);
@@ -2308,10 +2781,12 @@ _setmode( _fileno( stdout ), _O_BINARY );
/* Get the version number: both pcre_version() and pcre16_version() give the
same answer. We just need to ensure that we call one that is available. */
-#ifdef SUPPORT_PCRE8
+#if defined SUPPORT_PCRE8
version = pcre_version();
-#else
+#elif defined SUPPORT_PCRE16
version = pcre16_version();
+#elif defined SUPPORT_PCRE32
+version = pcre32_version();
#endif
/* Scan options */
@@ -2338,12 +2813,21 @@ while (argc > 1 && argv[op][0] == '-')
else if (strcmp(arg, "-16") == 0)
{
#ifdef SUPPORT_PCRE16
- use_pcre16 = 1;
+ pcre_mode = PCRE16_MODE;
#else
printf("** This version of PCRE was built without 16-bit support\n");
exit(1);
#endif
}
+ else if (strcmp(arg, "-32") == 0)
+ {
+#ifdef SUPPORT_PCRE32
+ pcre_mode = PCRE32_MODE;
+#else
+ printf("** This version of PCRE was built without 32-bit support\n");
+ exit(1);
+#endif
+ }
else if (strcmp(arg, "-q") == 0) quiet = 1;
else if (strcmp(arg, "-b") == 0) debug = 1;
else if (strcmp(arg, "-i") == 0) showinfo = 1;
@@ -2431,17 +2915,34 @@ while (argc > 1 && argv[op][0] == '-')
yield = 0;
#endif
}
- else if (strcmp(argv[op + 1], "utf") == 0)
+ else if (strcmp(argv[op + 1], "pcre32") == 0)
{
-#ifdef SUPPORT_PCRE8
- (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
- printf("%d\n", rc);
- yield = rc;
+#ifdef SUPPORT_PCRE32
+ printf("1\n");
+ yield = 1;
#else
- (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
+ printf("0\n");
+ yield = 0;
+#endif
+ goto EXIT;
+ }
+ if (strcmp(argv[op + 1], "utf") == 0)
+ {
+#ifdef SUPPORT_PCRE8
+ if (pcre_mode == PCRE8_MODE)
+ (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
+#endif
+#ifdef SUPPORT_PCRE16
+ if (pcre_mode == PCRE16_MODE)
+ (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
+#endif
+#ifdef SUPPORT_PCRE32
+ if (pcre_mode == PCRE32_MODE)
+ (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
+#endif
printf("%d\n", rc);
yield = rc;
-#endif
+ goto EXIT;
}
else if (strcmp(argv[op + 1], "ucp") == 0)
{
@@ -2496,21 +2997,20 @@ while (argc > 1 && argv[op][0] == '-')
/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
are set, either both UTFs are supported or both are not supported. */
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- printf(" 8-bit and 16-bit support\n");
- (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
- if (rc)
- printf(" UTF-8 and UTF-16 support\n");
- else
- printf(" No UTF-8 or UTF-16 support\n");
-#elif defined SUPPORT_PCRE8
- printf(" 8-bit support only\n");
+#ifdef SUPPORT_PCRE8
+ printf(" 8-bit support\n");
(void)pcre_config(PCRE_CONFIG_UTF8, &rc);
- printf(" %sUTF-8 support\n", rc? "" : "No ");
-#else
- printf(" 16-bit support only\n");
+ printf (" %sUTF-8 support\n", rc ? "" : "No ");
+#endif
+#ifdef SUPPORT_PCRE16
+ printf(" 16-bit support\n");
(void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
- printf(" %sUTF-16 support\n", rc? "" : "No ");
+ printf (" %sUTF-16 support\n", rc ? "" : "No ");
+#endif
+#ifdef SUPPORT_PCRE32
+ printf(" 32-bit support\n");
+ (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
+ printf (" %sUTF-32 support\n", rc ? "" : "No ");
#endif
(void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
@@ -2617,6 +3117,13 @@ pcre16_stack_malloc = stack_malloc;
pcre16_stack_free = stack_free;
#endif
+#ifdef SUPPORT_PCRE32
+pcre32_malloc = new_malloc;
+pcre32_free = new_free;
+pcre32_stack_malloc = stack_malloc;
+pcre32_stack_free = stack_free;
+#endif
+
/* Heading line unless quiet, then prompt for first regex if stdin */
if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
@@ -2716,7 +3223,7 @@ while (!done)
if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
- magic = ((REAL_PCRE *)re)->magic_number;
+ magic = REAL_PCRE_MAGIC(re);
if (magic != MAGIC_NUMBER)
{
if (swap_uint32(magic) == MAGIC_NUMBER)
@@ -2774,9 +3281,11 @@ while (!done)
{
/* Simulate the result of the function call below. */
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
- use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
- fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
- "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
+ pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
+ PCRE_INFO_OPTIONS);
+ fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
+ "%d-bit mode\n", 8 * CHAR_SIZE,
+ 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
new_free(re);
fclose(f);
continue;
@@ -3040,10 +3549,10 @@ while (!done)
#endif /* !defined NOPOSIX */
{
- /* In 16-bit mode, convert the input. */
+ /* In 16- or 32-bit mode, convert the input. */
#ifdef SUPPORT_PCRE16
- if (use_pcre16)
+ if (pcre_mode == PCRE16_MODE)
{
switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
{
@@ -3069,6 +3578,32 @@ while (!done)
}
#endif
+#ifdef SUPPORT_PCRE32
+ if (pcre_mode == PCRE32_MODE)
+ {
+ switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
+ {
+ case -1:
+ fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
+ "converted to UTF-32\n");
+ goto SKIP_DATA;
+
+ case -2:
+ fprintf(outfile, "**Failed: character value greater than 0x10ffff "
+ "cannot be converted to UTF-32\n");
+ goto SKIP_DATA;
+
+ case -3:
+ fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
+ goto SKIP_DATA;
+
+ default:
+ break;
+ }
+ p = (pcre_uint8 *)buffer32;
+ }
+#endif
+
/* Compile many times when timing */
if (timeit > 0)
@@ -3126,16 +3661,32 @@ while (!done)
/* Extract the size for possible writing before possibly flipping it,
and remember the store that was got. */
- true_size = ((REAL_PCRE *)re)->size;
+ true_size = REAL_PCRE_SIZE(re);
regex_gotten_store = first_gotten_store;
/* Output code size information if requested */
if (log_store)
+ {
+ int name_count, name_entry_size, real_pcre_size;
+
+ new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
+ new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
+#ifdef SUPPORT_PCRE8
+ if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
+ real_pcre_size = sizeof(real_pcre);
+#endif
+#ifdef SUPPORT_PCRE16
+ if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
+ real_pcre_size = sizeof(real_pcre16);
+#endif
+#ifdef SUPPORT_PCRE32
+ if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
+ real_pcre_size = sizeof(real_pcre32);
+#endif
fprintf(outfile, "Memory allocation (code space): %d\n",
- (int)(first_gotten_store -
- sizeof(REAL_PCRE) -
- ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
+ (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
+ }
/* If -s or /S was present, study the regex to generate additional info to
help with the matching, unless the pattern has the SS option, which
@@ -3239,35 +3790,31 @@ while (!done)
fprintf(outfile, "Named capturing subpatterns:\n");
while (namecount-- > 0)
{
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- int imm2_size = use_pcre16 ? 1 : 2;
-#else
- int imm2_size = IMM2_SIZE;
-#endif
+ int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
int length = (int)STRLEN(nametable + imm2_size);
fprintf(outfile, " ");
PCHARSV(nametable, imm2_size, length, outfile);
while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- fprintf(outfile, "%3d\n", use_pcre16?
- (int)(((PCRE_SPTR16)nametable)[0])
- :((int)nametable[0] << 8) | (int)nametable[1]);
- nametable += nameentrysize * (use_pcre16 ? 2 : 1);
-#else
- fprintf(outfile, "%3d\n", GET2(nametable, 0));
-#ifdef SUPPORT_PCRE8
- nametable += nameentrysize;
-#else
- nametable += nameentrysize * 2;
+#ifdef SUPPORT_PCRE32
+ if (pcre_mode == PCRE32_MODE)
+ fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
+#endif
+#ifdef SUPPORT_PCRE16
+ if (pcre_mode == PCRE16_MODE)
+ fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
#endif
+#ifdef SUPPORT_PCRE8
+ if (pcre_mode == PCRE8_MODE)
+ fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
#endif
+ nametable += nameentrysize * CHAR_SIZE;
}
}
if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
- all_options = ((REAL_PCRE *)re)->options;
+ all_options = REAL_PCRE_OPTIONS(re);
if (do_flip) all_options = swap_uint32(all_options);
if (get_options == 0) fprintf(outfile, "No options\n");
@@ -3329,7 +3876,7 @@ while (!done)
else
{
const char *caseless =
- ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
+ ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
"" : " (caseless)";
if (PRINTOK(first_char))
@@ -3349,7 +3896,7 @@ while (!done)
else
{
const char *caseless =
- ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
+ ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
"" : " (caseless)";
if (PRINTOK(need_char))
@@ -3530,9 +4077,13 @@ while (!done)
*copynames = 0;
*getnames = 0;
+#ifdef SUPPORT_PCRE32
+ cn32ptr = copynames;
+ gn32ptr = getnames;
+#endif
#ifdef SUPPORT_PCRE16
- cn16ptr = copynames;
- gn16ptr = getnames;
+ cn16ptr = copynames16;
+ gn16ptr = getnames16;
#endif
#ifdef SUPPORT_PCRE8
cn8ptr = copynames8;
@@ -3647,7 +4198,7 @@ while (!done)
allows UTF-8 characters to be constructed byte by byte, and also allows
invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
Otherwise, pass it down to later code so that it can be turned into
- UTF-8 when running in 16-bit mode. */
+ UTF-8 when running in 16/32-bit mode. */
c = 0;
while (i++ < 2 && isxdigit(*p))
@@ -3692,7 +4243,7 @@ while (!done)
}
else if (isalnum(*p))
{
- READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
+ READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
}
else if (*p == '+')
{
@@ -3755,7 +4306,7 @@ while (!done)
}
else if (isalnum(*p))
{
- READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
+ READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
}
continue;
@@ -3872,8 +4423,8 @@ while (!done)
mode must have come from \x{...} or octal constructs because values from
\x.. get this far only in non-UTF mode. */
-#if !defined NOUTF || defined SUPPORT_PCRE16
- if (use_pcre16 || use_utf)
+#if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
+ if (pcre_mode != PCRE8_MODE || use_utf)
{
pcre_uint8 buff8[8];
int ii, utn;
@@ -3944,8 +4495,7 @@ while (!done)
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
}
- else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
- != 0)
+ else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
{
fprintf(outfile, "Matched with REG_NOSUB\n");
}
@@ -3979,9 +4529,9 @@ while (!done)
/* Handle matching via the native interface - repeats for /g and /G */
#ifdef SUPPORT_PCRE16
- if (use_pcre16)
+ if (pcre_mode == PCRE16_MODE)
{
- len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
+ len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);
switch(len)
{
case -1:
@@ -4006,6 +4556,33 @@ while (!done)
}
#endif
+#ifdef SUPPORT_PCRE32
+ if (pcre_mode == PCRE32_MODE)
+ {
+ len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);
+ switch(len)
+ {
+ case -1:
+ fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
+ "converted to UTF-32\n");
+ goto NEXT_DATA;
+
+ case -2:
+ fprintf(outfile, "**Failed: character value greater than 0x10ffff "
+ "cannot be converted to UTF-32\n");
+ goto NEXT_DATA;
+
+ case -3:
+ fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
+ goto NEXT_DATA;
+
+ default:
+ break;
+ }
+ bptr = (pcre_uint8 *)buffer32;
+ }
+#endif
+
/* Ensure that there is a JIT callback if we want to verify that JIT was
actually used. If jit_stack == NULL, no stack has yet been assigned. */
@@ -4227,7 +4804,7 @@ while (!done)
int rc;
char copybuffer[256];
- if (use_pcre16)
+ if (pcre_mode == PCRE16_MODE)
{
if (*(pcre_uint16 *)cnptr == 0) break;
}
@@ -4282,7 +4859,7 @@ while (!done)
int rc;
const char *substring;
- if (use_pcre16)
+ if (pcre_mode == PCRE16_MODE)
{
if (*(pcre_uint16 *)gnptr == 0) break;
}
@@ -4376,7 +4953,7 @@ while (!done)
if (g_notempty != 0)
{
int onechar = 1;
- unsigned int obits = ((REAL_PCRE *)re)->options;
+ unsigned int obits = REAL_PCRE_OPTIONS(re);
use_offsets[0] = start_offset;
if ((obits & PCRE_NEWLINE_BITS) == 0)
{
@@ -4394,22 +4971,23 @@ while (!done)
(obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
(obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
&&
- start_offset < len - 1 &&
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- (use_pcre16?
- ((PCRE_SPTR16)bptr)[start_offset] == '\r'
- && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
- :
- bptr[start_offset] == '\r'
- && bptr[start_offset + 1] == '\n')
-#elif defined SUPPORT_PCRE16
- ((PCRE_SPTR16)bptr)[start_offset] == '\r'
- && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
-#else
- bptr[start_offset] == '\r'
- && bptr[start_offset + 1] == '\n'
+ start_offset < len - 1 && (
+#ifdef SUPPORT_PCRE8
+ (pcre_mode == PCRE8_MODE &&
+ bptr[start_offset] == '\r' &&
+ bptr[start_offset + 1] == '\n') ||
+#endif
+#ifdef SUPPORT_PCRE16
+ (pcre_mode == PCRE16_MODE &&
+ ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
+ ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
#endif
- )
+#ifdef SUPPORT_PCRE32
+ (pcre_mode == PCRE32_MODE &&
+ ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
+ ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
+#endif
+ 0))
onechar++;
else if (use_utf)
{
@@ -4444,9 +5022,9 @@ while (!done)
case PCRE_ERROR_BADUTF8:
case PCRE_ERROR_SHORTUTF8:
- fprintf(outfile, "Error %d (%s UTF-%s string)", count,
+ fprintf(outfile, "Error %d (%s UTF-%d string)", count,
(count == PCRE_ERROR_BADUTF8)? "bad" : "short",
- use_pcre16? "16" : "8");
+ 8 * CHAR_SIZE);
if (use_size_offsets >= 2)
fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
use_offsets[1]);
@@ -4454,8 +5032,8 @@ while (!done)
break;
case PCRE_ERROR_BADUTF8_OFFSET:
- fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
- use_pcre16? "16" : "8");
+ fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
+ 8 * CHAR_SIZE);
break;
default:
@@ -4545,6 +5123,9 @@ free(offsets);
#ifdef SUPPORT_PCRE16
if (buffer16 != NULL) free(buffer16);
#endif
+#ifdef SUPPORT_PCRE32
+if (buffer32 != NULL) free(buffer32);
+#endif
#if !defined NODFA
if (dfa_workspace != NULL)
diff --git a/testdata/saved32 b/testdata/saved32
new file mode 100644
index 0000000..255235d
--- /dev/null
+++ b/testdata/saved32
Binary files differ
diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1
new file mode 100644
index 0000000..42af7b4
--- /dev/null
+++ b/testdata/saved32BE-1
Binary files differ
diff --git a/testdata/saved32BE-2 b/testdata/saved32BE-2
new file mode 100644
index 0000000..68a896d
--- /dev/null
+++ b/testdata/saved32BE-2
Binary files differ
diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1
new file mode 100644
index 0000000..a4044fd
--- /dev/null
+++ b/testdata/saved32LE-1
Binary files differ
diff --git a/testdata/saved32LE-2 b/testdata/saved32LE-2
new file mode 100644
index 0000000..8b35ffa
--- /dev/null
+++ b/testdata/saved32LE-2
Binary files differ
diff --git a/testdata/testinput14 b/testdata/testinput14
index 689f168..e5e8520 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -290,6 +290,8 @@ not matter. --/
<!testsaved16
+<!testsaved32
+
/\h/SI
/\v/SI
diff --git a/testdata/testinput17 b/testdata/testinput17
index 154846e..21a3bc4 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -1,18 +1,11 @@
-/-- This set of tests is for the 16-bit library's basic (non-UTF-16) features
+/-- This set of tests is for the 16- and 32-bit library's basic (non-UTF-16 or -32) features
that are not compatible with the 8-bit library, or which give different
- output in 16-bit mode. --/
+ output in 16- or 32-bit mode. --/
/a\Cb/
aXb
a\nb
-/-- Check maximum non-UTF character size --/
-
-/\x{ffff}/
- A\x{ffff}B
-
-/\x{10000}/
-
/[^\x{c4}]/DZ
diff --git a/testdata/testinput18 b/testdata/testinput18
index add1836..e55b8bc 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -1,5 +1,5 @@
-/-- This set of tests is for UTF-16 support, and is relevant only to the 16-bit
- library. --/
+/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to the
+ 16- and 32-bit library. --/
/xxx/8?DZSS
@@ -8,9 +8,11 @@
/X(\C{3})/8
X\x{11234}Y
+ X\x{11234}YZ
/X(\C{4})/8
X\x{11234}YZ
+ X\x{11234}YZW
/X\C*/8
XYZabcdce
@@ -40,6 +42,7 @@
/a\C\Cb/8
a\x{12257}b
+ a\x{12257}\x{11234}b
** Failers
a\x{100}b
@@ -171,8 +174,13 @@ correctly, but that messes up comparisons). --/
/(*UTF16)\x{11234}/
abcd\x{11234}pqr
+/(*UTF-32)\x{11234}/
+ abcd\x{11234}pqr
+
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
+/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
+
/\h/SI8
ABC\x{09}
ABC\x{20}
@@ -238,6 +246,7 @@ correctly, but that messes up comparisons). --/
/a/8
\x{10000}\>1
+ \x{10000}ab\>1
\x{10000}ab\>2
\x{10000}ab\>3
\x{10000}ab\>4
diff --git a/testdata/testinput19 b/testdata/testinput19
index 4b002f4..00d8020 100644
--- a/testdata/testinput19
+++ b/testdata/testinput19
@@ -1,5 +1,5 @@
/-- This set of tests is for Unicode property support, relevant only to the
- 16-bit library. --/
+ 16- and 32-bit library. --/
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
diff --git a/testdata/testinput20 b/testdata/testinput20
index d3dcaa5..9695fe7 100644
--- a/testdata/testinput20
+++ b/testdata/testinput20
@@ -1,5 +1,5 @@
/-- These tests are for the handling of characters greater than 255 in 16-bit,
- non-UTF-16 mode. --/
+ non-UTF-16 mode, or in 32-bit mode. --/
/^\x{ffff}+/i
\x{ffff}
diff --git a/testdata/testinput21 b/testdata/testinput21
index e0fd236..0f201ad 100644
--- a/testdata/testinput21
+++ b/testdata/testinput21
@@ -9,4 +9,8 @@ right away. The others require the linke size to be 2. */
<!testsaved16BE-1
+<!testsaved32LE-1
+
+<!testsaved32BE-1
+
/-- End of testinput21 --/
diff --git a/testdata/testinput22 b/testdata/testinput22
index f8276c8..46a1365 100644
--- a/testdata/testinput22
+++ b/testdata/testinput22
@@ -1,4 +1,4 @@
-/-- Tests for reloading pre-compile patterns with UTF-16 support. */
+/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
@@ -6,4 +6,8 @@
<!testsaved16BE-2
+<!testsaved32LE-2
+
+<!testsaved32BE-2
+
/-- End of testinput22 --/
diff --git a/testdata/testinput23 b/testdata/testinput23
new file mode 100644
index 0000000..934831a
--- /dev/null
+++ b/testdata/testinput23
@@ -0,0 +1,10 @@
+/-- Tests for the 16-bit library only */
+
+/-- Check maximum non-UTF character size --/
+
+/\x{ffff}/
+ A\x{ffff}B
+
+/\x{10000}/
+
+/-- End of testinput23 --/
diff --git a/testdata/testinput24 b/testdata/testinput24
new file mode 100644
index 0000000..754e588
--- /dev/null
+++ b/testdata/testinput24
@@ -0,0 +1,3 @@
+/-- Tests for the 16-bit library with UTF-16 support only */
+
+/-- End of testinput24 --/
diff --git a/testdata/testinput25 b/testdata/testinput25
new file mode 100644
index 0000000..8d5b3ad
--- /dev/null
+++ b/testdata/testinput25
@@ -0,0 +1,23 @@
+/-- Tests for the 32-bit library only */
+
+/-- Check maximum character size --/
+
+/\x{110000}/8
+
+/\x{110000}/
+
+/\x{7fffffff}/
+
+/\x{80000000}/
+
+/-- Non-UTF characters --/
+
+/\C/8
+ \x{110000}
+
+/\C{2,3}/
+ \x{400000}\x{400001}\x{400002}\x{400003}
+
+/\x{400000}\x{800000}/iDZ
+
+/-- End of testinput25 --/
diff --git a/testdata/testinput26 b/testdata/testinput26
new file mode 100644
index 0000000..c492659
--- /dev/null
+++ b/testdata/testinput26
@@ -0,0 +1,3 @@
+/-- Tests for the 32-bit library with UTF-32 support only */
+
+/-- End of testinput26 --/
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
new file mode 100644
index 0000000..8335fb8
--- /dev/null
+++ b/testdata/testoutput11-32
@@ -0,0 +1,713 @@
+/-- These are a few representative patterns whose lengths and offsets are to be
+shown when the link size is 2. This is just a doublecheck test to ensure the
+sizes don't go horribly wrong when something is changed. The pattern contents
+are all themselves checked in other tests. Unicode, including property support,
+is required for these tests. --/
+
+/((?i)b)/BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 5 CBra 1
+ 5 /i b
+ 7 5 Ket
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/(?s)(.*X|^B)/BM
+Memory allocation (code space): 76
+------------------------------------------------------------------
+ 0 16 Bra
+ 2 7 CBra 1
+ 5 AllAny*
+ 7 X
+ 9 5 Alt
+ 11 ^
+ 12 B
+ 14 12 Ket
+ 16 16 Ket
+ 18 End
+------------------------------------------------------------------
+
+/(?s:.*X|^B)/BM
+Memory allocation (code space): 72
+------------------------------------------------------------------
+ 0 15 Bra
+ 2 6 Bra
+ 4 AllAny*
+ 6 X
+ 8 5 Alt
+ 10 ^
+ 11 B
+ 13 11 Ket
+ 15 15 Ket
+ 17 End
+------------------------------------------------------------------
+
+/^[[:alnum:]]/BM
+Memory allocation (code space): 60
+------------------------------------------------------------------
+ 0 12 Bra
+ 2 ^
+ 3 [0-9A-Za-z]
+ 12 12 Ket
+ 14 End
+------------------------------------------------------------------
+
+/#/IxMD
+Memory allocation (code space): 20
+------------------------------------------------------------------
+ 0 2 Bra
+ 2 2 Ket
+ 4 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: extended
+No first char
+No need char
+
+/a#/IxMD
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 a
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: extended
+First char = 'a'
+No need char
+
+/x?+/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 x?+
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/x++/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 x++
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/x{1,3}+/BM
+Memory allocation (code space): 56
+------------------------------------------------------------------
+ 0 11 Bra
+ 2 7 Once
+ 4 x
+ 6 x{0,2}
+ 9 7 Ket
+ 11 11 Ket
+ 13 End
+------------------------------------------------------------------
+
+/(x)*+/BM
+Memory allocation (code space): 52
+------------------------------------------------------------------
+ 0 10 Bra
+ 2 Braposzero
+ 3 5 CBraPos 1
+ 6 x
+ 8 5 KetRpos
+ 10 10 Ket
+ 12 End
+------------------------------------------------------------------
+
+/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/BM
+Memory allocation (code space): 220
+------------------------------------------------------------------
+ 0 52 Bra
+ 2 ^
+ 3 47 CBra 1
+ 6 5 CBra 2
+ 9 a+
+ 11 5 Ket
+ 13 13 CBra 3
+ 16 [ab]+?
+ 26 13 Ket
+ 28 13 CBra 4
+ 31 [bc]+
+ 41 13 Ket
+ 43 5 CBra 5
+ 46 \w*
+ 48 5 Ket
+ 50 47 Ket
+ 52 52 Ket
+ 54 End
+------------------------------------------------------------------
+
+|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
+Memory allocation (code space): 3296
+------------------------------------------------------------------
+ 0 821 Bra
+ 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
+820 \b
+821 821 Ket
+823 End
+------------------------------------------------------------------
+
+|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
+Memory allocation (code space): 3256
+------------------------------------------------------------------
+ 0 811 Bra
+ 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
+810 \b
+811 811 Ket
+813 End
+------------------------------------------------------------------
+
+/(a(?1)b)/BM
+Memory allocation (code space): 64
+------------------------------------------------------------------
+ 0 13 Bra
+ 2 9 CBra 1
+ 5 a
+ 7 2 Recurse
+ 9 b
+ 11 9 Ket
+ 13 13 Ket
+ 15 End
+------------------------------------------------------------------
+
+/(a(?1)+b)/BM
+Memory allocation (code space): 80
+------------------------------------------------------------------
+ 0 17 Bra
+ 2 13 CBra 1
+ 5 a
+ 7 4 Once
+ 9 2 Recurse
+ 11 4 KetRmax
+ 13 b
+ 15 13 Ket
+ 17 17 Ket
+ 19 End
+------------------------------------------------------------------
+
+/a(?P<name1>b|c)d(?P<longername2>e)/BM
+Memory allocation (code space): 186
+------------------------------------------------------------------
+ 0 24 Bra
+ 2 a
+ 4 5 CBra 1
+ 7 b
+ 9 4 Alt
+ 11 c
+ 13 9 Ket
+ 15 d
+ 17 5 CBra 2
+ 20 e
+ 22 5 Ket
+ 24 24 Ket
+ 26 End
+------------------------------------------------------------------
+
+/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/BM
+Memory allocation (code space): 155
+------------------------------------------------------------------
+ 0 29 Bra
+ 2 18 Bra
+ 4 a
+ 6 12 CBra 1
+ 9 c
+ 11 5 CBra 2
+ 14 d
+ 16 5 Ket
+ 18 12 Ket
+ 20 18 Ket
+ 22 5 CBra 3
+ 25 a
+ 27 5 Ket
+ 29 29 Ket
+ 31 End
+------------------------------------------------------------------
+
+/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
+Memory allocation (code space): 117
+------------------------------------------------------------------
+ 0 24 Bra
+ 2 5 CBra 1
+ 5 a
+ 7 5 Ket
+ 9 Any
+ 10 Any
+ 11 Any
+ 12 \1
+ 14 bbb
+ 20 2 Recurse
+ 22 d
+ 24 24 Ket
+ 26 End
+------------------------------------------------------------------
+
+/abc(?C255)de(?C)f/BM
+Memory allocation (code space): 100
+------------------------------------------------------------------
+ 0 22 Bra
+ 2 abc
+ 8 Callout 255 10 1
+ 12 de
+ 16 Callout 0 16 1
+ 20 f
+ 22 22 Ket
+ 24 End
+------------------------------------------------------------------
+
+/abcde/CBM
+Memory allocation (code space): 156
+------------------------------------------------------------------
+ 0 36 Bra
+ 2 Callout 255 0 1
+ 6 a
+ 8 Callout 255 1 1
+ 12 b
+ 14 Callout 255 2 1
+ 18 c
+ 20 Callout 255 3 1
+ 24 d
+ 26 Callout 255 4 1
+ 30 e
+ 32 Callout 255 5 0
+ 36 36 Ket
+ 38 End
+------------------------------------------------------------------
+
+/\x{100}/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{100}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{1000}/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{1000}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{10000}/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{10000}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{100000}/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{100000}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{10ffff}/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{10ffff}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{110000}/8BM
+Failed: character value in \x{...} sequence is too large at offset 9
+
+/[\x{ff}]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{ff}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[\x{100}]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{100}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x80/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x80
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\xff/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{ff}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/\x{0041}\x{2262}\x{0391}\x{002e}/D8M
+Memory allocation (code space): 52
+------------------------------------------------------------------
+ 0 10 Bra
+ 2 A\x{2262}\x{391}.
+ 10 10 Ket
+ 12 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = 'A'
+Need char = '.'
+
+/\x{D55c}\x{ad6d}\x{C5B4}/D8M
+Memory allocation (code space): 44
+------------------------------------------------------------------
+ 0 8 Bra
+ 2 \x{d55c}\x{ad6d}\x{c5b4}
+ 8 8 Ket
+ 10 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{d55c}
+Need char = \x{c5b4}
+
+/\x{65e5}\x{672c}\x{8a9e}/D8M
+Memory allocation (code space): 44
+------------------------------------------------------------------
+ 0 8 Bra
+ 2 \x{65e5}\x{672c}\x{8a9e}
+ 8 8 Ket
+ 10 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{65e5}
+Need char = \x{8a9e}
+
+/[\x{100}]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{100}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[Z\x{100}]/8BM
+Memory allocation (code space): 76
+------------------------------------------------------------------
+ 0 16 Bra
+ 2 [Z\x{100}]
+ 16 16 Ket
+ 18 End
+------------------------------------------------------------------
+
+/^[\x{100}\E-\Q\E\x{150}]/B8M
+Memory allocation (code space): 52
+------------------------------------------------------------------
+ 0 10 Bra
+ 2 ^
+ 3 [\x{100}-\x{150}]
+ 10 10 Ket
+ 12 End
+------------------------------------------------------------------
+
+/^[\QĀ\E-\QŐ\E]/B8M
+Memory allocation (code space): 52
+------------------------------------------------------------------
+ 0 10 Bra
+ 2 ^
+ 3 [\x{100}-\x{150}]
+ 10 10 Ket
+ 12 End
+------------------------------------------------------------------
+
+/^[\QĀ\E-\QŐ\E/B8M
+Failed: missing terminating ] for character class at offset 13
+
+/[\p{L}]/BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\p{L}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[\p{^L}]/BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\P{L}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[\P{L}]/BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\P{L}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[\P{^L}]/BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\p{L}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[abc\p{L}\x{0660}]/8BM
+Memory allocation (code space): 88
+------------------------------------------------------------------
+ 0 19 Bra
+ 2 [a-c\p{L}\x{660}]
+ 19 19 Ket
+ 21 End
+------------------------------------------------------------------
+
+/[\p{Nd}]/8BM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\p{Nd}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[\p{Nd}+-]+/8BM
+Memory allocation (code space): 84
+------------------------------------------------------------------
+ 0 18 Bra
+ 2 [+\-\p{Nd}]+
+ 18 18 Ket
+ 20 End
+------------------------------------------------------------------
+
+/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iBM
+Memory allocation (code space): 60
+------------------------------------------------------------------
+ 0 12 Bra
+ 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
+ 12 12 Ket
+ 14 End
+------------------------------------------------------------------
+
+/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8BM
+Memory allocation (code space): 60
+------------------------------------------------------------------
+ 0 12 Bra
+ 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
+ 12 12 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[\x{105}-\x{109}]/8iBM
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [\x{104}-\x{109}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/( ( (?(1)0|) )* )/xBM
+Memory allocation (code space): 104
+------------------------------------------------------------------
+ 0 23 Bra
+ 2 19 CBra 1
+ 5 Brazero
+ 6 13 SCBra 2
+ 9 6 Cond
+ 11 1 Cond ref
+ 13 0
+ 15 2 Alt
+ 17 8 Ket
+ 19 13 KetRmax
+ 21 19 Ket
+ 23 23 Ket
+ 25 End
+------------------------------------------------------------------
+
+/( (?(1)0|)* )/xBM
+Memory allocation (code space): 84
+------------------------------------------------------------------
+ 0 18 Bra
+ 2 14 CBra 1
+ 5 Brazero
+ 6 6 SCond
+ 8 1 Cond ref
+ 10 0
+ 12 2 Alt
+ 14 8 KetRmax
+ 16 14 Ket
+ 18 18 Ket
+ 20 End
+------------------------------------------------------------------
+
+/[a]/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 a
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[a]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 a
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[\xaa]/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{aa}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[\xaa]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 \x{aa}
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[^a]/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 [^a]
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[^a]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 [^a]
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[^\xaa]/BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 [^\x{aa}]
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[^\xaa]/8BM
+Memory allocation (code space): 28
+------------------------------------------------------------------
+ 0 4 Bra
+ 2 [^\x{aa}]
+ 4 4 Ket
+ 6 End
+------------------------------------------------------------------
+
+/[^\d]/8WB
+------------------------------------------------------------------
+ 0 9 Bra
+ 2 [^\p{Nd}]
+ 9 9 Ket
+ 11 End
+------------------------------------------------------------------
+
+/[[:^alpha:][:^cntrl:]]+/8WB
+------------------------------------------------------------------
+ 0 18 Bra
+ 2 [ -~\x80-\xff\P{L}]+
+ 18 18 Ket
+ 20 End
+------------------------------------------------------------------
+
+/[[:^cntrl:][:^alpha:]]+/8WB
+------------------------------------------------------------------
+ 0 18 Bra
+ 2 [ -~\x80-\xff\P{L}]+
+ 18 18 Ket
+ 20 End
+------------------------------------------------------------------
+
+/[[:alpha:]]+/8WB
+------------------------------------------------------------------
+ 0 10 Bra
+ 2 [\p{L}]+
+ 10 10 Ket
+ 12 End
+------------------------------------------------------------------
+
+/[[:^alpha:]\S]+/8WB
+------------------------------------------------------------------
+ 0 13 Bra
+ 2 [\P{L}\P{Xsp}]+
+ 13 13 Ket
+ 15 End
+------------------------------------------------------------------
+
+/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/B
+------------------------------------------------------------------
+ 0 60 Bra
+ 2 abc
+ 8 5 CBra 1
+ 11 d
+ 13 4 Alt
+ 15 e
+ 17 9 Ket
+ 19 *THEN
+ 20 x
+ 22 12 CBra 2
+ 25 123
+ 31 *THEN
+ 32 4
+ 34 24 Alt
+ 36 567
+ 42 5 CBra 3
+ 45 b
+ 47 4 Alt
+ 49 q
+ 51 9 Ket
+ 53 *THEN
+ 54 xx
+ 58 36 Ket
+ 60 60 Ket
+ 62 End
+------------------------------------------------------------------
+
+/-- End of testinput11 --/
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index d5b456d..5f05d77 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -366,6 +366,12 @@ No study data
Error -28 from pcre_fullinfo(0)
Running in 8-bit mode but pattern was compiled in 16-bit mode
+<!testsaved32
+Compiled pattern loaded from testsaved32
+No study data
+Error -28 from pcre_fullinfo(0)
+Running in 8-bit mode but pattern was compiled in 32-bit mode
+
/\h/SI
Capturing subpattern count = 0
No options
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index 60569f6..4c93b08 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -1,6 +1,6 @@
-/-- This set of tests is for the 16-bit library's basic (non-UTF-16) features
+/-- This set of tests is for the 16- and 32-bit library's basic (non-UTF-16 or -32) features
that are not compatible with the 8-bit library, or which give different
- output in 16-bit mode. --/
+ output in 16- or 32-bit mode. --/
/a\Cb/
aXb
@@ -8,15 +8,6 @@
a\nb
0: a\x0ab
-/-- Check maximum non-UTF character size --/
-
-/\x{ffff}/
- A\x{ffff}B
- 0: \x{ffff}
-
-/\x{10000}/
-Failed: character value in \x{...} sequence is too large at offset 8
-
/[^\x{c4}]/DZ
------------------------------------------------------------------
Bra
diff --git a/testdata/testoutput18 b/testdata/testoutput18-16
index 5f1d106..c5f674e 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18-16
@@ -1,5 +1,5 @@
-/-- This set of tests is for UTF-16 support, and is relevant only to the 16-bit
- library. --/
+/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to the
+ 16- and 32-bit library. --/
/xxx/8?DZSS
**Failed: invalid UTF-8 string cannot be converted to UTF-16
@@ -12,11 +12,17 @@
X\x{11234}Y
0: X\x{11234}Y
1: \x{11234}Y
+ X\x{11234}YZ
+ 0: X\x{11234}Y
+ 1: \x{11234}Y
/X(\C{4})/8
X\x{11234}YZ
0: X\x{11234}YZ
1: \x{11234}YZ
+ X\x{11234}YZW
+ 0: X\x{11234}YZ
+ 1: \x{11234}YZ
/X\C*/8
XYZabcdce
@@ -63,6 +69,8 @@ No match
/a\C\Cb/8
a\x{12257}b
0: a\x{12257}b
+ a\x{12257}\x{11234}b
+No match
** Failers
No match
a\x{100}b
@@ -633,6 +641,9 @@ Error -10 (bad UTF-16 string) offset=0 reason=4
abcd\x{11234}pqr
0: \x{11234}
+/(*UTF-32)\x{11234}/
+Failed: (*VERB) not recognized at offset 5
+
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
Capturing subpattern count = 0
Options: bsr_unicode utf
@@ -640,6 +651,9 @@ Forced newline sequence: CRLF
First char = 'a'
Need char = 'b'
+/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
+Failed: (*VERB) not recognized at offset 12
+
/\h/SI8
Capturing subpattern count = 0
Options: utf
@@ -847,6 +861,8 @@ Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
/a/8
\x{10000}\>1
Error -11 (bad UTF-16 offset)
+ \x{10000}ab\>1
+Error -11 (bad UTF-16 offset)
\x{10000}ab\>2
0: a
\x{10000}ab\>3
diff --git a/testdata/testoutput18-32 b/testdata/testoutput18-32
new file mode 100644
index 0000000..08204e3
--- /dev/null
+++ b/testdata/testoutput18-32
@@ -0,0 +1,1011 @@
+/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to the
+ 16- and 32-bit library. --/
+
+/xxx/8?DZSS
+**Failed: invalid UTF-8 string cannot be converted to UTF-32
+
+/abc/8
+ ]
+**Failed: invalid UTF-8 string cannot be converted to UTF-32
+
+/X(\C{3})/8
+ X\x{11234}Y
+No match
+ X\x{11234}YZ
+ 0: X\x{11234}YZ
+ 1: \x{11234}YZ
+
+/X(\C{4})/8
+ X\x{11234}YZ
+No match
+ X\x{11234}YZW
+ 0: X\x{11234}YZW
+ 1: \x{11234}YZW
+
+/X\C*/8
+ XYZabcdce
+ 0: XYZabcdce
+
+/X\C*?/8
+ XYZabcde
+ 0: X
+
+/X\C{3,5}/8
+ Xabcdefg
+ 0: Xabcde
+ X\x{11234}Y
+No match
+ X\x{11234}YZ
+ 0: X\x{11234}YZ
+ X\x{11234}\x{512}
+No match
+ X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}YZ
+ X\x{11234}\x{512}\x{11234}Z
+ 0: X\x{11234}\x{512}\x{11234}Z
+
+/X\C{3,5}?/8
+ Xabcdefg
+ 0: Xabc
+ X\x{11234}Y
+No match
+ X\x{11234}YZ
+ 0: X\x{11234}YZ
+ X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}Y
+ *** Failers
+No match
+ X\x{11234}
+No match
+
+/a\Cb/8
+ aXb
+ 0: aXb
+ a\nb
+ 0: a\x{0a}b
+
+/a\C\Cb/8
+ a\x{12257}b
+No match
+ a\x{12257}\x{11234}b
+ 0: a\x{12257}\x{11234}b
+ ** Failers
+No match
+ a\x{100}b
+No match
+
+/ab\Cde/8
+ abXde
+ 0: abXde
+
+/-- Check maximum character size --/
+
+/\x{ffff}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{ffff}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{ffff}
+No need char
+
+/\x{10000}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{10000}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{10000}
+No need char
+
+/\x{100}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+No need char
+
+/\x{1000}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{1000}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{1000}
+No need char
+
+/\x{10000}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{10000}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{10000}
+No need char
+
+/\x{100000}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100000}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100000}
+No need char
+
+/\x{10ffff}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{10ffff}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{10ffff}
+No need char
+
+/[\x{ff}]/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{ff}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{ff}
+No need char
+
+/[\x{100}]/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+No need char
+
+/\x80/8DZ
+------------------------------------------------------------------
+ Bra
+ \x80
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{80}
+No need char
+
+/\xff/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{ff}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{ff}
+No need char
+
+/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{d55c}\x{ad6d}\x{c5b4}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{d55c}
+Need char = \x{c5b4}
+ \x{D55c}\x{ad6d}\x{C5B4}
+ 0: \x{d55c}\x{ad6d}\x{c5b4}
+
+/\x{65e5}\x{672c}\x{8a9e}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{65e5}\x{672c}\x{8a9e}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{65e5}
+Need char = \x{8a9e}
+ \x{65e5}\x{672c}\x{8a9e}
+ 0: \x{65e5}\x{672c}\x{8a9e}
+
+/\x{80}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x80
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{80}
+No need char
+
+/\x{084}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{84}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{84}
+No need char
+
+/\x{104}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{104}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{104}
+No need char
+
+/\x{861}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{861}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{861}
+No need char
+
+/\x{212ab}/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{212ab}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{212ab}
+No need char
+
+/-- This one is here not because it's different to Perl, but because the way
+the captured single-byte is displayed. (In Perl it becomes a character, and you
+can't tell the difference.) --/
+
+/X(\C)(.*)/8
+ X\x{1234}
+ 0: X\x{1234}
+ 1: \x{1234}
+ 2:
+ X\nabc
+ 0: X\x{0a}abc
+ 1: \x{0a}
+ 2: abc
+
+/-- This one is here because Perl gives out a grumbly error message (quite
+correctly, but that messes up comparisons). --/
+
+/a\Cb/8
+ *** Failers
+No match
+ a\x{100}b
+ 0: a\x{100}b
+
+/[^ab\xC0-\xF0]/8SDZ
+------------------------------------------------------------------
+ Bra
+ [\x00-`c-\xbf\xf1-\xff] (neg)
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
+ \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
+ \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
+ 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
+ Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
+ \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
+ \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
+ \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
+ \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
+ \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
+ \xfc \xfd \xfe \xff
+ \x{f1}
+ 0: \x{f1}
+ \x{bf}
+ 0: \x{bf}
+ \x{100}
+ 0: \x{100}
+ \x{1000}
+ 0: \x{1000}
+ *** Failers
+ 0: *
+ \x{c0}
+No match
+ \x{f0}
+No match
+
+/Ā{3,4}/8SDZ
+------------------------------------------------------------------
+ Bra
+ \x{100}{3}
+ \x{100}?
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+Need char = \x{100}
+Subject length lower bound = 3
+No set of starting bytes
+ \x{100}\x{100}\x{100}\x{100\x{100}
+ 0: \x{100}\x{100}\x{100}
+
+/(\x{100}+|x)/8SDZ
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ \x{100}+
+ Alt
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: x \xff
+
+/(\x{100}*a|x)/8SDZ
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ \x{100}*+
+ a
+ Alt
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: a x \xff
+
+/(\x{100}{0,2}a|x)/8SDZ
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ \x{100}{0,2}
+ a
+ Alt
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: a x \xff
+
+/(\x{100}{1,2}a|x)/8SDZ
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ \x{100}
+ \x{100}{0,1}
+ a
+ Alt
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: x \xff
+
+/\x{100}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+No need char
+
+/a\x{100}\x{101}*/8DZ
+------------------------------------------------------------------
+ Bra
+ a\x{100}
+ \x{101}*
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = 'a'
+Need char = \x{100}
+
+/a\x{100}\x{101}+/8DZ
+------------------------------------------------------------------
+ Bra
+ a\x{100}
+ \x{101}+
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = 'a'
+Need char = \x{101}
+
+/[^\x{c4}]/DZ
+------------------------------------------------------------------
+ Bra
+ [^\x{c4}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+
+/[\x{100}]/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+No need char
+ \x{100}
+ 0: \x{100}
+ Z\x{100}
+ 0: \x{100}
+ \x{100}Z
+ 0: \x{100}
+ *** Failers
+No match
+
+/[\xff]/DZ8
+------------------------------------------------------------------
+ Bra
+ \x{ff}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{ff}
+No need char
+ >\x{ff}<
+ 0: \x{ff}
+
+/[^\xff]/8DZ
+------------------------------------------------------------------
+ Bra
+ [^\x{ff}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+
+/\x{100}abc(xyz(?1))/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}abc
+ CBra 1
+ xyz
+ Recurse
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Options: utf
+First char = \x{100}
+Need char = 'z'
+
+/\777/8I
+Capturing subpattern count = 0
+Options: utf
+First char = \x{1ff}
+No need char
+ \x{1ff}
+ 0: \x{1ff}
+ \777
+ 0: \x{1ff}
+
+/\x{100}+\x{200}/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}++
+ \x{200}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+Need char = \x{200}
+
+/\x{100}+X/8DZ
+------------------------------------------------------------------
+ Bra
+ \x{100}++
+ X
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = \x{100}
+Need char = 'X'
+
+/^[\QĀ\E-\QŐ\E/BZ8
+Failed: missing terminating ] for character class at offset 13
+
+/X/8
+ \x{0}\x{d7ff}\x{e000}\x{10ffff}
+No match
+ \x{d800}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{d800}\?
+No match
+ \x{da00}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{da00}\?
+No match
+ \x{dc00}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{dc00}\?
+No match
+ \x{de00}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{de00}\?
+No match
+ \x{dfff}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{dfff}\?
+No match
+ \x{110000}
+**Failed: character value greater than 0x10ffff cannot be converted to UTF-32
+ \x{d800}\x{1234}
+Error -10 (bad UTF-32 string) offset=0 reason=1
+ \x{fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+
+/(*UTF16)\x{11234}/
+Failed: (*VERB) not recognized at offset 5
+
+/(*UTF-32)\x{11234}/
+Failed: (*VERB) not recognized at offset 5
+
+/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
+Failed: (*VERB) not recognized at offset 12
+
+/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
+Capturing subpattern count = 0
+Options: bsr_unicode utf
+Forced newline sequence: CRLF
+First char = 'a'
+Need char = 'b'
+
+/\h/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x09 \x20 \xa0 \xff
+ ABC\x{09}
+ 0: \x{09}
+ ABC\x{20}
+ 0:
+ ABC\x{a0}
+ 0: \x{a0}
+ ABC\x{1680}
+ 0: \x{1680}
+ ABC\x{180e}
+ 0: \x{180e}
+ ABC\x{2000}
+ 0: \x{2000}
+ ABC\x{202f}
+ 0: \x{202f}
+ ABC\x{205f}
+ 0: \x{205f}
+ ABC\x{3000}
+ 0: \x{3000}
+
+/\v/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
+ ABC\x{0a}
+ 0: \x{0a}
+ ABC\x{0b}
+ 0: \x{0b}
+ ABC\x{0c}
+ 0: \x{0c}
+ ABC\x{0d}
+ 0: \x{0d}
+ ABC\x{85}
+ 0: \x{85}
+ ABC\x{2028}
+ 0: \x{2028}
+
+/\h*A/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = 'A'
+Subject length lower bound = 1
+Starting byte set: \x09 \x20 A \xa0 \xff
+ CDBABC
+ 0: A
+ \x{2000}ABC
+ 0: \x{2000}A
+
+/\R*A/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = 'A'
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d A \x85 \xff
+ CDBABC
+ 0: A
+ \x{2028}A
+ 0: \x{2028}A
+
+/\v+A/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = 'A'
+Subject length lower bound = 2
+Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
+
+/\s?xxx\s/8SI
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = 'x'
+Subject length lower bound = 4
+Starting byte set: \x09 \x0a \x0c \x0d \x20 x
+
+/\sxxx\s/I8ST1
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = 'x'
+Subject length lower bound = 5
+Starting byte set: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
+ AB\x{85}xxx\x{a0}XYZ
+ 0: \x{85}xxx\x{a0}
+ AB\x{a0}xxx\x{85}XYZ
+ 0: \x{a0}xxx\x{85}
+
+/\S \S/I8ST1
+Capturing subpattern count = 0
+Options: utf
+No first char
+Need char = ' '
+Subject length lower bound = 3
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
+ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
+ \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
+ f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
+ \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
+ \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
+ \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
+ \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
+ \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
+ \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
+ \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
+ \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
+ \xfe \xff
+ \x{a2} \x{84}
+ 0: \x{a2} \x{84}
+ A Z
+ 0: A Z
+
+/a+/8
+ a\x{123}aa\>1
+ 0: aa
+ a\x{123}aa\>2
+ 0: aa
+ a\x{123}aa\>3
+ 0: a
+ a\x{123}aa\>4
+No match
+ a\x{123}aa\>5
+Error -24 (bad offset value)
+ a\x{123}aa\>6
+Error -24 (bad offset value)
+
+/\x{1234}+/iS8I
+Capturing subpattern count = 0
+Options: caseless utf
+First char = \x{1234}
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/\x{1234}+?/iS8I
+Capturing subpattern count = 0
+Options: caseless utf
+First char = \x{1234}
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/\x{1234}++/iS8I
+Capturing subpattern count = 0
+Options: caseless utf
+First char = \x{1234}
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/\x{1234}{2}/iS8I
+Capturing subpattern count = 0
+Options: caseless utf
+First char = \x{1234}
+Need char = \x{1234}
+Subject length lower bound = 2
+No set of starting bytes
+
+/[^\x{c4}]/8DZ
+------------------------------------------------------------------
+ Bra
+ [^\x{c4}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+
+/X+\x{200}/8DZ
+------------------------------------------------------------------
+ Bra
+ X++
+ \x{200}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First char = 'X'
+Need char = \x{200}
+
+/\R/SI8
+Capturing subpattern count = 0
+Options: utf
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
+
+/-- Check bad offset --/
+
+/a/8
+ \x{10000}\>1
+No match
+ \x{10000}ab\>1
+ 0: a
+ \x{10000}ab\>2
+No match
+ \x{10000}ab\>3
+No match
+ \x{10000}ab\>4
+Error -24 (bad offset value)
+ \x{10000}ab\>5
+Error -24 (bad offset value)
+
+//8
+**Failed: character value is ill-formed UTF-32
+
+/\w+\x{C4}/8BZ
+------------------------------------------------------------------
+ Bra
+ \w++
+ \x{c4}
+ Ket
+ End
+------------------------------------------------------------------
+ a\x{C4}\x{C4}
+ 0: a\x{c4}
+
+/\w+\x{C4}/8BZT1
+------------------------------------------------------------------
+ Bra
+ \w+
+ \x{c4}
+ Ket
+ End
+------------------------------------------------------------------
+ a\x{C4}\x{C4}
+ 0: a\x{c4}\x{c4}
+
+/\W+\x{C4}/8BZ
+------------------------------------------------------------------
+ Bra
+ \W+
+ \x{c4}
+ Ket
+ End
+------------------------------------------------------------------
+ !\x{C4}
+ 0: !\x{c4}
+
+/\W+\x{C4}/8BZT1
+------------------------------------------------------------------
+ Bra
+ \W++
+ \x{c4}
+ Ket
+ End
+------------------------------------------------------------------
+ !\x{C4}
+ 0: !\x{c4}
+
+/\W+\x{A1}/8BZ
+------------------------------------------------------------------
+ Bra
+ \W+
+ \x{a1}
+ Ket
+ End
+------------------------------------------------------------------
+ !\x{A1}
+ 0: !\x{a1}
+
+/\W+\x{A1}/8BZT1
+------------------------------------------------------------------
+ Bra
+ \W+
+ \x{a1}
+ Ket
+ End
+------------------------------------------------------------------
+ !\x{A1}
+ 0: !\x{a1}
+
+/X\s+\x{A0}/8BZ
+------------------------------------------------------------------
+ Bra
+ X
+ \s++
+ \x{a0}
+ Ket
+ End
+------------------------------------------------------------------
+ X\x20\x{A0}\x{A0}
+ 0: X \x{a0}
+
+/X\s+\x{A0}/8BZT1
+------------------------------------------------------------------
+ Bra
+ X
+ \s+
+ \x{a0}
+ Ket
+ End
+------------------------------------------------------------------
+ X\x20\x{A0}\x{A0}
+ 0: X \x{a0}\x{a0}
+
+/\S+\x{A0}/8BZ
+------------------------------------------------------------------
+ Bra
+ \S+
+ \x{a0}
+ Ket
+ End
+------------------------------------------------------------------
+ X\x{A0}\x{A0}
+ 0: X\x{a0}\x{a0}
+
+/\S+\x{A0}/8BZT1
+------------------------------------------------------------------
+ Bra
+ \S++
+ \x{a0}
+ Ket
+ End
+------------------------------------------------------------------
+ X\x{A0}\x{A0}
+ 0: X\x{a0}
+
+/\x{a0}+\s!/8BZ
+------------------------------------------------------------------
+ Bra
+ \x{a0}++
+ \s
+ !
+ Ket
+ End
+------------------------------------------------------------------
+ \x{a0}\x20!
+ 0: \x{a0} !
+
+/\x{a0}+\s!/8BZT1
+------------------------------------------------------------------
+ Bra
+ \x{a0}+
+ \s
+ !
+ Ket
+ End
+------------------------------------------------------------------
+ \x{a0}\x20!
+ 0: \x{a0} !
+
+/-- End of testinput18 --/
diff --git a/testdata/testoutput19 b/testdata/testoutput19
index b3cfb9b..ccc198c 100644
--- a/testdata/testoutput19
+++ b/testdata/testoutput19
@@ -1,5 +1,5 @@
/-- This set of tests is for Unicode property support, relevant only to the
- 16-bit library. --/
+ 16- and 32-bit library. --/
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
------------------------------------------------------------------
diff --git a/testdata/testoutput20 b/testdata/testoutput20
index 8214921..127050a 100644
--- a/testdata/testoutput20
+++ b/testdata/testoutput20
@@ -1,5 +1,5 @@
/-- These tests are for the handling of characters greater than 255 in 16-bit,
- non-UTF-16 mode. --/
+ non-UTF-16 mode, or in 32-bit mode. --/
/^\x{ffff}+/i
\x{ffff}
diff --git a/testdata/testoutput21 b/testdata/testoutput21-16
index 52d3cc8..00b9f09 100644
--- a/testdata/testoutput21
+++ b/testdata/testoutput21-16
@@ -75,4 +75,16 @@ No need char
Subject length lower bound = 6
No set of starting bytes
+<!testsaved32LE-1
+Compiled pattern loaded from testsaved32LE-1
+Study data loaded from testsaved32LE-1
+Error -28 from pcre16_fullinfo(0)
+Running in 16-bit mode but pattern was compiled in 32-bit mode
+
+<!testsaved32BE-1
+Compiled pattern loaded from testsaved32BE-1
+Study data loaded from testsaved32BE-1
+Error -28 from pcre16_fullinfo(0)
+Running in 16-bit mode but pattern was compiled in 0-bit mode
+
/-- End of testinput21 --/
diff --git a/testdata/testoutput21-32 b/testdata/testoutput21-32
new file mode 100644
index 0000000..686e475
--- /dev/null
+++ b/testdata/testoutput21-32
@@ -0,0 +1,90 @@
+/-- Tests for reloading pre-compile patterns. The first one gives an error
+right away. The others require the linke size to be 2. */
+
+<!testsaved8
+Compiled pattern loaded from testsaved8
+No study data
+Error -28 from pcre32_fullinfo(0)
+Running in 32-bit mode but pattern was compiled in 8-bit mode
+
+/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+
+<!testsaved16LE-1
+Compiled pattern loaded from testsaved16LE-1
+Study data loaded from testsaved16LE-1
+Error -28 from pcre32_fullinfo(0)
+Running in 32-bit mode but pattern was compiled in 16-bit mode
+
+<!testsaved16BE-1
+Compiled pattern loaded from testsaved16BE-1
+Study data loaded from testsaved16BE-1
+Error -28 from pcre32_fullinfo(0)
+Running in 32-bit mode but pattern was compiled in 0-bit mode
+
+<!testsaved32LE-1
+Compiled pattern loaded from testsaved32LE-1
+Study data loaded from testsaved32LE-1
+------------------------------------------------------------------
+ 0 94 Bra
+ 2 ^
+ 3 [La]
+ 12 27 CBra 1
+ 15 12 Bra
+ 17 [ALal]+
+ 27 12 Ket
+ 29 [\x00-,.-WY-wy-\xff] (neg)*?
+ 39 27 Ket
+ 41 12 CBraPos 2
+ 44 [\x{150}-\x{250}\x{300}]
+ 53 19 Alt
+ 55 [^AS-Uas-u\x{800}\x{d800}-\x{dfff}]
+ 72 31 KetRpos
+ 74 [^\x08#\x{500}\x{1000}]{3,5}
+ 93 $
+ 94 94 Ket
+ 96 End
+------------------------------------------------------------------
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ name 1
+ other 2
+Options: anchored
+No first char
+No need char
+Subject length lower bound = 6
+No set of starting bytes
+
+<!testsaved32BE-1
+Compiled pattern loaded from testsaved32BE-1
+Study data loaded from testsaved32BE-1
+------------------------------------------------------------------
+ 0 94 Bra
+ 2 ^
+ 3 [La]
+ 12 27 CBra 1
+ 15 12 Bra
+ 17 [ALal]+
+ 27 12 Ket
+ 29 [\x00-,.-WY-wy-\xff] (neg)*?
+ 39 27 Ket
+ 41 12 CBraPos 2
+ 44 [\x{150}-\x{250}\x{300}]
+ 53 19 Alt
+ 55 [^AS-Uas-u\x{800}\x{d800}-\x{dfff}]
+ 72 31 KetRpos
+ 74 [^\x08#\x{500}\x{1000}]{3,5}
+ 93 $
+ 94 94 Ket
+ 96 End
+------------------------------------------------------------------
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ name 1
+ other 2
+Options: anchored
+No first char
+No need char
+Subject length lower bound = 6
+No set of starting bytes
+
+/-- End of testinput21 --/
diff --git a/testdata/testoutput22 b/testdata/testoutput22-16
index f5f6ae7..d2a99cc 100644
--- a/testdata/testoutput22
+++ b/testdata/testoutput22-16
@@ -1,4 +1,4 @@
-/-- Tests for reloading pre-compile patterns with UTF-16 support. */
+/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
@@ -56,4 +56,16 @@ No need char
Subject length lower bound = 2
No set of starting bytes
+<!testsaved32LE-2
+Compiled pattern loaded from testsaved32LE-2
+Study data loaded from testsaved32LE-2
+Error -28 from pcre16_fullinfo(0)
+Running in 16-bit mode but pattern was compiled in 32-bit mode
+
+<!testsaved32BE-2
+Compiled pattern loaded from testsaved32BE-2
+Study data loaded from testsaved32BE-2
+Error -28 from pcre16_fullinfo(0)
+Running in 16-bit mode but pattern was compiled in 0-bit mode
+
/-- End of testinput22 --/
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
new file mode 100644
index 0000000..aedad61
--- /dev/null
+++ b/testdata/testoutput22-32
@@ -0,0 +1,71 @@
+/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
+
+/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+
+<!testsaved16LE-2
+Compiled pattern loaded from testsaved16LE-2
+Study data loaded from testsaved16LE-2
+Error -28 from pcre32_fullinfo(0)
+Running in 32-bit mode but pattern was compiled in 16-bit mode
+
+<!testsaved16BE-2
+Compiled pattern loaded from testsaved16BE-2
+Study data loaded from testsaved16BE-2
+Error -28 from pcre32_fullinfo(0)
+Running in 32-bit mode but pattern was compiled in 0-bit mode
+
+<!testsaved32LE-2
+Compiled pattern loaded from testsaved32LE-2
+Study data loaded from testsaved32LE-2
+------------------------------------------------------------------
+ 0 70 Bra
+ 2 33 CBra 1
+ 5 [Za\x{400}-\x{10ffff}]{4,}
+ 23 [\x{f123}\x{10039}\x{20000}-\x{21234}]?
+ 35 19 Alt
+ 37 [A-Cx-z\x{100000}-\x{1000a7}\x{101234}]
+ 54 52 Ket
+ 56 12 CBra 2
+ 59 [\x00-`b-y{-\xff] (neg)
+ 68 12 Ket
+ 70 70 Ket
+ 72 End
+------------------------------------------------------------------
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ cb2 2
+ cbra1 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 2
+No set of starting bytes
+
+<!testsaved32BE-2
+Compiled pattern loaded from testsaved32BE-2
+Study data loaded from testsaved32BE-2
+------------------------------------------------------------------
+ 0 70 Bra
+ 2 33 CBra 1
+ 5 [Za\x{400}-\x{10ffff}]{4,}
+ 23 [\x{f123}\x{10039}\x{20000}-\x{21234}]?
+ 35 19 Alt
+ 37 [A-Cx-z\x{100000}-\x{1000a7}\x{101234}]
+ 54 52 Ket
+ 56 12 CBra 2
+ 59 [\x00-`b-y{-\xff] (neg)
+ 68 12 Ket
+ 70 70 Ket
+ 72 End
+------------------------------------------------------------------
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ cb2 2
+ cbra1 1
+Options: utf
+No first char
+No need char
+Subject length lower bound = 2
+No set of starting bytes
+
+/-- End of testinput22 --/
diff --git a/testdata/testoutput23 b/testdata/testoutput23
new file mode 100644
index 0000000..8e0e687
--- /dev/null
+++ b/testdata/testoutput23
@@ -0,0 +1,12 @@
+/-- Tests for the 16-bit library only */
+
+/-- Check maximum non-UTF character size --/
+
+/\x{ffff}/
+ A\x{ffff}B
+ 0: \x{ffff}
+
+/\x{10000}/
+Failed: character value in \x{...} sequence is too large at offset 8
+
+/-- End of testinput23 --/
diff --git a/testdata/testoutput24 b/testdata/testoutput24
new file mode 100644
index 0000000..754e588
--- /dev/null
+++ b/testdata/testoutput24
@@ -0,0 +1,3 @@
+/-- Tests for the 16-bit library with UTF-16 support only */
+
+/-- End of testinput24 --/
diff --git a/testdata/testoutput25 b/testdata/testoutput25
new file mode 100644
index 0000000..1acbed5
--- /dev/null
+++ b/testdata/testoutput25
@@ -0,0 +1,37 @@
+/-- Tests for the 32-bit library only */
+
+/-- Check maximum character size --/
+
+/\x{110000}/8
+Failed: character value in \x{...} sequence is too large at offset 9
+
+/\x{110000}/
+
+/\x{7fffffff}/
+
+/\x{80000000}/
+Failed: character value in \x{...} sequence is too large at offset 11
+
+/-- Non-UTF characters --/
+
+/\C/8
+ \x{110000}
+**Failed: character value greater than 0x10ffff cannot be converted to UTF-32
+
+/\C{2,3}/
+ \x{400000}\x{400001}\x{400002}\x{400003}
+ 0: \x{400000}\x{400001}\x{400002}
+
+/\x{400000}\x{800000}/iDZ
+------------------------------------------------------------------
+ Bra
+ /i \x{400000}\x{800000}
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: caseless
+First char = \x{400000}
+Need char = \x{800000}
+
+/-- End of testinput25 --/
diff --git a/testdata/testoutput26 b/testdata/testoutput26
new file mode 100644
index 0000000..c492659
--- /dev/null
+++ b/testdata/testoutput26
@@ -0,0 +1,3 @@
+/-- Tests for the 32-bit library with UTF-32 support only */
+
+/-- End of testinput26 --/