Load pcre-3.0 into code/trunk.

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@43 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-02-24 21:39:21 +0000
committer: nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-02-24 21:39:21 +0000
commit: 09f9da9675b33a31c605d9d1f913bc2b05522be2 (patch)
tree: e4e2b0bbc47b23f497e3f1b2208a9ac9a9d4ebea
parent: 1622a3e7058dec7de74889c69595693ac0c64187 (diff)
download: pcre-09f9da9675b33a31c605d9d1f913bc2b05522be2.tar.gz
34 files changed, 2588 insertions, 683 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..f305033
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,32 @@
+PCRE LICENCE
+------------
+
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+Written by: Philip Hazel <ph10@cam.ac.uk>
+
+University of Cambridge Computing Service,
+Cambridge, England. Phone: +44 1223 334714.
+
+Copyright (c) 1997-2000 University of Cambridge
+
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+   explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+   General Purpose Licence (GPL), then the terms of that licence shall
+   supersede any condition above with which it is incompatible.
+
+End
diff --git a/ChangeLog b/ChangeLog
index cd02638..6da0bbd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,8 +2,8 @@ ChangeLog for PCRE
 ------------------
 
 
-Version 2.09 14-Sep-99
-----------------------
+Version 3.0 01-Feb-00
+---------------------
 
 1. Add support for the /+ modifier to perltest (to output $` like it does in
 pcretest).
@@ -23,6 +23,30 @@ captured string vector to pcre_exec(), but (since release 2.00) PCRE has
 required a bigger vector, with some working space on the end. This means that
 the POSIX wrapper now has to get and free some memory, and copy the results.
 
+6. Added some simple autoconf support, placing the test data and the
+documentation in separate directories, re-organizing some of the
+information files, and making it build pcre-config (a GNU standard). Also added
+libtool support for building PCRE as a shared library, which is now the
+default.
+
+7. Got rid of the leading zero in the definition of PCRE_MINOR because 08 and
+09 are not valid octal constants. Single digits will be used for minor values
+less than 10.
+
+8. Defined REG_EXTENDED and REG_NOSUB as zero in the POSIX header, so that
+existing programs that set these in the POSIX interface can use PCRE without
+modification.
+
+9. Added a new function, pcre_fullinfo() with an extensible interface. It can
+return all that pcre_info() returns, plus additional data. The pcre_info()
+function is retained for compatibility, but is considered to be obsolete.
+
+10. Added experimental recursion feature (?R) to handle one common case that
+Perl 5.6 will be able to do with (?p{...}).
+
+11. Added support for POSIX character classes like [:alpha:], which Perl is
+adopting.
+
 
 Version 2.08 31-Aug-99
 ----------------------
diff --git a/LICENCE b/LICENCE
index 246515a..f305033 100644
--- a/LICENCE
+++ b/LICENCE
@@ -9,7 +9,7 @@ Written by: Philip Hazel <ph10@cam.ac.uk>
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.
 
-Copyright (c) 1997-1999 University of Cambridge
+Copyright (c) 1997-2000 University of Cambridge
 
 Permission is granted to anyone to use this software for any purpose on any
 computer system, and to redistribute it freely, subject to the following
diff --git a/Makefile.in b/Makefile.in
index ff677c6..958332c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -27,10 +27,10 @@
 prefix = @prefix@
 exec_prefix = @exec_prefix@
 
-BINDIR = $(exec_prefix)/bin
-LIBDIR = $(exec_prefix)/lib
-INCDIR = $(prefix)/include
-MANDIR = $(prefix)/man
+BINDIR = @bindir@
+LIBDIR = @libdir@
+INCDIR = @includedir@
+MANDIR = @mandir@
 
 CC = @CC@
 CFLAGS = @CFLAGS@
@@ -45,6 +45,11 @@ RANLIB = @RANLIB@
 LIBTOOL = @LIBTOOL@
 LIBSUFFIX = @LIBSUFFIX@
 
+# These are the version numbers for the shared libraries
+
+PCRELIBVERSION = @PCRE_LIB_VERSION@
+PCREPOSIXLIBVERSION = @PCRE_POSIXLIB_VERSION@
+
 
 #---------------------------------------------------------------------------#
 # A copy of install-sh is in this distribution and is used by default.      #
@@ -105,7 +110,7 @@ libpcre.la:     $(OBJ)
 		@echo '--- Building shared library: libpcre'
 		@echo ' '
 		-rm -f libpcre.la
-		libtool $(CC) -o libpcre.la -rpath $(LIBDIR) $(LOBJ)
+		libtool $(CC) -version-info '$(PCRELIBVERSION)' -o libpcre.la -rpath $(LIBDIR) $(LOBJ)
 
 libpcreposix.a: pcreposix.o
 		@echo ' '
@@ -120,7 +125,7 @@ libpcreposix.la: pcreposix.o
 		@echo '--- Building shared library: libpcreposix'
 		@echo ' '
 		-rm -f libpcreposix.la
-		libtool $(CC) -o libpcreposix.la -rpath $(LIBDIR) pcreposix.lo
+		libtool $(CC) -version-info '$(PCREPOSIXLIBVERSION)' -o libpcreposix.la -rpath $(LIBDIR) pcreposix.lo
 
 pcre.o:         chartables.c pcre.c pcre.h internal.h config.h Makefile
 		$(LIBTOOL) $(CC) -c $(CFLAGS) pcre.c
@@ -159,11 +164,16 @@ install:        all
 		$(INSTALL_DATA) doc/pcreposix.3 $(MANDIR)/man3/pcre.3
 		$(INSTALL_DATA) doc/pgrep.1 $(MANDIR)/man1/pgrep.1
 		@if test "$(LIBTOOL)" = "libtool"; then \
+		  echo ' '; \
 		  echo '--- Rebuilding pgrep to use installed shared library ---'; \
 		  echo $(CC) $(CFLAGS) -o pgrep pgrep.o -L$(LIBDIR) -lpcre; \
 		  $(CC) $(CFLAGS) -o pgrep pgrep.o -L$(LIBDIR) -lpcre; \
+		  echo '--- Rebuilding pcretest to use installed shared library ---'; \
+		  echo $(CC) $(CFLAGS) -o pcretest pcretest.o -L$(LIBDIR) -lpcre -lpcreposix; \
+		  $(CC) $(CFLAGS) -o pcretest pcretest.o -L$(LIBDIR) -lpcre -lpcreposix; \
 		fi
-		$(INSTALL)	pgrep $(BINDIR)/pgrep \
+		$(INSTALL)	pgrep $(BINDIR)/pgrep
+		$(INSTALL)	pcre-config $(BINDIR)/pcre-config
 
 # We deliberately omit dftables and chartables.c from 'make clean'; once made
 # chartables.c shouldn't change, and if people have edited the tables by hand,
@@ -174,7 +184,7 @@ clean:;         -rm -rf *.o *.lo *.a *.la .libs pcretest pgrep testtry
 # But "make distclean" should get back to a virgin distribution
 
 distclean:      clean
-		-rm -f chartables.c libtool \
+		-rm -f chartables.c libtool pcre-config pcre.h \
 		Makefile config.h config.status config.log config.cache
 
 check:          runtest
diff --git a/NEWS b/NEWS
index 4c0c62e..4c80bd6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,17 @@
 News about PCRE releases
 ------------------------
 
-A "configure" script is now used to configure PCRE for Unix systems. It builds
-a Makefile and a config.h file.
+Release 3.0 01-Feb-00
+---------------------
+
+1. A "configure" script is now used to configure PCRE for Unix systems. It
+builds a Makefile, a config.h file, and the pcre-config script.
+
+2. PCRE is built as a shared library by default.
+
+3. There is support for POSIX classes such as [:alpha:].
+
+5. There is an experimental recursion feature.
 
 ----------------------------------------------------------------------------
           IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
diff --git a/NON-UNIX-USE b/NON-UNIX-USE
index fee5db1..09a7432 100644
--- a/NON-UNIX-USE
+++ b/NON-UNIX-USE
@@ -6,24 +6,30 @@ entirely of code written in Standard C, and so should compile successfully
 on any machine with a Standard C compiler and library, using normal compiling
 commands to do the following:
 
-(1) Copy or rename the file config.in as config.h. You should not have to
-change any settings inside it for a Standard C environment.
+(1) Copy or rename the file config.in as config.h, and change the macros that
+define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0.
+Unfortunately, because of the way Unix autoconf works, the default setting has
+to be 0.
 
-(2) Compile dftables.c as a stand-alone program, and then run it with
+(2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions
+for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in
+configure.in.
+
+(3) Compile dftables.c as a stand-alone program, and then run it with
 the standard output sent to chartables.c. This generates a set of standard
 character tables.
 
-(3) Compile maketables.c, get.c, study.c and pcre.c and link them all
+(4) Compile maketables.c, get.c, study.c and pcre.c and link them all
 together into an object library in whichever form your system keeps such
 libraries. This is the pcre library (chartables.c gets included by means of an
 #include directive).
 
-(4) Similarly, compile pcreposix.c and link it as the pcreposix library.
+(5) Similarly, compile pcreposix.c and link it as the pcreposix library.
 
-(5) Compile the test program pcretest.c. This needs the functions in the
+(6) Compile the test program pcretest.c. This needs the functions in the
 pcre and pcreposix libraries when linking.
 
-(6) Run pcretest on the testinput files in the testdata directory, and check
+(7) Run pcretest on the testinput files in the testdata directory, and check
 that the output matches the corresponding testoutput files. You must use the
 -i option when checking testinput2.
 
diff --git a/README b/README
index aa49877..90aaf4d 100644
--- a/README
+++ b/README
@@ -1,6 +1,10 @@
 README file for PCRE (Perl-compatible regular expression library)
 -----------------------------------------------------------------
 
+The latest release of PCRE is always available from
+
+  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
+
 Please read the NEWS file if you are upgrading from a previous release.
 
 
@@ -17,34 +21,51 @@ CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
 
 specifies that the C compiler should be run with the flags '-O2 -Wall' instead
 of the default, and that "make install" should install PCRE under /opt/local
-instead of the default /usr/local. The "configure" script builds two files:
+instead of the default /usr/local. The "configure" script builds thre files:
 
-. Makefile is built by copying Makefile.in and making certain substitutions.
-. config.h is built by copying config.in and making certain substitutions.
+. Makefile is built by copying Makefile.in and making substitutions.
+. config.h is built by copying config.in and making substitutions.
+. pcre-config is built by copying pcre-config.in and making substitutions.
 
 Once "configure" has run, you can run "make". It builds two libraries called
-libpcre.a and libpcreposix.a, a test program called pcretest, and the pgrep
+libpcre and libpcreposix, a test program called pcretest, and the pgrep
 command. You can use "make install" to copy these, and the public header file
 pcre.h, to appropriate live directories on your system, in the normal way.
 
+Running "make install" also installs the command pcre-config, which can be used
+to recall information about the PCRE configuration and installation. For
+example,
+
+  pcre-config --version
+
+prints the version number, and
+
+ pcre-config --libs
+
+outputs information about where the library is installed. This command can be
+included in makefiles for programs that use PCRE, saving the programmer from
+having to remember too many details.
+
 
 Shared libraries on Unix systems
 --------------------------------
 
-The default distribution builds static libraries. It is also possible to build
-PCRE as two shared libraries. This support is new and experimental and may not
-work on all systems. It relies on the "libtool" scripts - these are distributed
-with PCRE. To build PCRE using shared libraries you must use --enable-shared
-when configuring it. For example
+The default distribution builds PCRE as two shared libraries. This support is
+new and experimental and may not work on all systems. It relies on the
+"libtool" scripts - these are distributed with PCRE. It should build a
+"libtool" script and use this to compile and link shared libraries, which are
+placed in a subdirectory called .libs. The programs pcretest and pgrep are
+built to use these uninstalled libraries by means of wrapper scripts. When you
+use "make install" to install shared libraries, pgrep and pcretest are
+automatically re-built to use the newly installed libraries. However, only
+pgrep is installed, as pcretest is really just a test program.
+
+To build PCRE using static libraries you must use --disable-shared when
+configuring it. For example
 
-./configure --prefix=/usr/gnu --enable-shared
+./configure --prefix=/usr/gnu --disable-shared
 
-Then run "make" in the usual way. It should build a "libtool" script and use
-this to compile and link shared libraries, which are placed in a subdirectory
-called .libs. The programs pcretest and pgrep are built to use these
-uninstalled libraries by means of wrapper scripts. When you use "make install"
-to install shared libraries, pgrep is automatically re-built to use the newly
-installed library before it itself is installed.
+Then run "make" in the usual way.
 
 
 Building on non-Unix systems
@@ -159,7 +180,8 @@ The distribution should contain the following files:
   study.c               ) source of
   pcre.c                )   the functions
   pcreposix.c           )
-  pcre.h                header for the external API
+  pcre.in               "source" for the header for the external API; pcre.h
+                          is built from this by "configure"
   pcreposix.h           header for the external POSIX wrapper API
   internal.h            header for internal use
   config.in             template for config.h, which is built by configure
@@ -170,6 +192,7 @@ The distribution should contain the following files:
   ChangeLog             log of changes to the code
   INSTALL               generic installation instructions
   LICENCE               conditions for the use of PCRE
+  COPYING               the same, using GNU's standard name
   Makefile.in           template for Unix Makefile, which is built by configure
   NEWS                  important changes in this release
   NON-UNIX-USE          notes on building PCRE on non-Unix systems
@@ -197,6 +220,7 @@ The distribution should contain the following files:
   pcretest.c            test program
   perltest              Perl test program
   pgrep.c               source of a grep utility that uses PCRE
+  pcre-config.in        source of script which retains PCRE information
   testdata/testinput1   test data, compatible with Perl 5.004 and 5.005
   testdata/testinput2   test data for error messages and non-Perl things
   testdata/testinput3   test data, compatible with Perl 5.005
@@ -212,4 +236,4 @@ The distribution should contain the following files:
   pcre.def
 
 Philip Hazel <ph10@cam.ac.uk>
-January 2000
+February 2000
diff --git a/config.in b/config.in
index c0f4537..7631d46 100644
--- a/config.in
+++ b/config.in
@@ -1,20 +1,28 @@
 
-/* config.in is converted by configure into config.h. PCRE is written in
-Standard C, but there are a few non-standard things it can cope with, allowing
-it to run on SunOS4 and other "close to standard" systems. The defaults below
-are the correct ones on a Standard C system. On a non-Unix system you can just
-copy this file into config.h. */
+/* On Unix systems config.in is converted by configure into config.h. PCRE is
+written in Standard C, but there are a few non-standard things it can cope
+with, allowing it to run on SunOS4 and other "close to standard" systems.
+
+On a non-Unix system you should just copy this file into config.h and change
+the definitions of HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because
+of the way autoconf works, these cannot be made the defaults. */
+
+/* Define to empty if the keyword does not work. */
 
-/* Define to empty if the keyword does not work.  */
 #undef const
 
-/* Define to `unsigned' if <stddef.h> doesn't define size_t.  */
+/* Define to `unsigned' if <stddef.h> doesn't define size_t. */
+
 #undef size_t
 
-/* Undefine if you don't have the strerror function.  */
-#define HAVE_STRERROR
+/* The following two definitions are mainly for the benefit of SunOS4, which
+doesn't have the strerror() or memmove() functions that should be present in
+all Standard C libraries. The macros should normally be defined with the value
+1 for other systems, but unfortunately we can't make this the default because
+"configure" files generated by autoconf will only change 0 to 1; they won't
+change 1 to 0 if the functions are not found. */
 
-/* Undefine if you don't have the memmove function. */
-#define HAVE_MEMMOVE
+#define HAVE_STRERROR 0
+#define HAVE_MEMMOVE  0
 
 /* End */
diff --git a/configure b/configure
index 91bba35..738230a 100755
--- a/configure
+++ b/configure
@@ -12,7 +12,7 @@ ac_help=
 ac_default_prefix=/usr/local
 # Any additions from configure.in:
 ac_help="$ac_help
-  --enable-shared         build PCRE as a shared library (using libtool)"
+  --disable-shared        build PCRE as a static library"
 
 # Initialize some variables set by options.
 # The variables have the same names as the options, with
@@ -138,9 +138,6 @@ do
     # The list generated by autoconf has been trimmed to remove many
     # options that are totally irrelevant to PCRE (e.g. relating to X),
     # or are not supported by its Makefile.
-    # The list generated by autoconf has been trimmed to remove many
-    # options that are totally irrelevant to PCRE (e.g. relating to X),
-    # or are not supported by its Makefile.
     # This message is too long to be a string in the A/UX 3.1 sh.
     cat << EOF
 Usage: ./configure [options]
@@ -504,10 +501,23 @@ fi
 
 
 
+
+
+
+PCRE_MAJOR=3
+PCRE_MINOR=0
+PCRE_DATE=01-Feb-2000
+PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
+
+
+PCRE_LIB_VERSION=0:0:0
+PCRE_POSIXLIB_VERSION=0:0:0
+
+
 # Extract the first word of "gcc", so it can be a program name with args.
 set dummy gcc; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:531: checking for $ac_word" >&5
+echo "configure:544: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -537,7 +547,7 @@ if test -z "$CC"; then
   # Extract the first word of "cc", so it can be a program name with args.
 set dummy cc; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:561: checking for $ac_word" >&5
+echo "configure:574: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -588,7 +598,7 @@ fi
       # Extract the first word of "cl", so it can be a program name with args.
 set dummy cl; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:612: checking for $ac_word" >&5
+echo "configure:625: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -620,7 +630,7 @@ fi
 fi
 
 echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6
-echo "configure:644: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
+echo "configure:657: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
 
 ac_ext=c
 # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
@@ -631,12 +641,12 @@ cross_compiling=$ac_cv_prog_cc_cross
 
 cat > conftest.$ac_ext << EOF
 
-#line 655 "configure"
+#line 668 "configure"
 #include "confdefs.h"
 
 main(){return(0);}
 EOF
-if { (eval echo configure:660: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+if { (eval echo configure:673: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
   ac_cv_prog_cc_works=yes
   # If we can't run a trivial program, we are probably using a cross compiler.
   if (./conftest; exit) 2>/dev/null; then
@@ -662,12 +672,12 @@ if test $ac_cv_prog_cc_works = no; then
   { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; }
 fi
 echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6
-echo "configure:686: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
+echo "configure:699: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6
 cross_compiling=$ac_cv_prog_cc_cross
 
 echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
-echo "configure:691: checking whether we are using GNU C" >&5
+echo "configure:704: checking whether we are using GNU C" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -676,7 +686,7 @@ else
   yes;
 #endif
 EOF
-if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:700: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
+if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:713: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
   ac_cv_prog_gcc=yes
 else
   ac_cv_prog_gcc=no
@@ -695,7 +705,7 @@ ac_test_CFLAGS="${CFLAGS+set}"
 ac_save_CFLAGS="$CFLAGS"
 CFLAGS=
 echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6
-echo "configure:719: checking whether ${CC-cc} accepts -g" >&5
+echo "configure:732: checking whether ${CC-cc} accepts -g" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -729,7 +739,7 @@ fi
 # Extract the first word of "ranlib", so it can be a program name with args.
 set dummy ranlib; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:753: checking for $ac_word" >&5
+echo "configure:766: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -757,8 +767,9 @@ else
 fi
 
 
+
 echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6
-echo "configure:782: checking how to run the C preprocessor" >&5
+echo "configure:796: checking how to run the C preprocessor" >&5
 # On Suns, sometimes $CPP names a directory.
 if test -n "$CPP" && test -d "$CPP"; then
   CPP=
@@ -773,13 +784,13 @@ else
   # On the NeXT, cc -E runs the code through the compiler's parser,
   # not just through cpp.
   cat > conftest.$ac_ext <<EOF
-#line 797 "configure"
+#line 811 "configure"
 #include "confdefs.h"
 #include <assert.h>
 Syntax Error
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:803: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+{ (eval echo configure:817: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
 ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   :
@@ -790,13 +801,13 @@ else
   rm -rf conftest*
   CPP="${CC-cc} -E -traditional-cpp"
   cat > conftest.$ac_ext <<EOF
-#line 814 "configure"
+#line 828 "configure"
 #include "confdefs.h"
 #include <assert.h>
 Syntax Error
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:820: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+{ (eval echo configure:834: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
 ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   :
@@ -807,13 +818,13 @@ else
   rm -rf conftest*
   CPP="${CC-cc} -nologo -E"
   cat > conftest.$ac_ext <<EOF
-#line 831 "configure"
+#line 845 "configure"
 #include "confdefs.h"
 #include <assert.h>
 Syntax Error
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:837: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+{ (eval echo configure:851: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
 ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   :
@@ -838,12 +849,12 @@ fi
 echo "$ac_t""$CPP" 1>&6
 
 echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6
-echo "configure:862: checking for ANSI C header files" >&5
+echo "configure:876: checking for ANSI C header files" >&5
 if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 867 "configure"
+#line 881 "configure"
 #include "confdefs.h"
 #include <stdlib.h>
 #include <stdarg.h>
@@ -851,7 +862,7 @@ else
 #include <float.h>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:875: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+{ (eval echo configure:889: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
 ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
@@ -868,7 +879,7 @@ rm -f conftest*
 if test $ac_cv_header_stdc = yes; then
   # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
 cat > conftest.$ac_ext <<EOF
-#line 892 "configure"
+#line 906 "configure"
 #include "confdefs.h"
 #include <string.h>
 EOF
@@ -886,7 +897,7 @@ fi
 if test $ac_cv_header_stdc = yes; then
   # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
 cat > conftest.$ac_ext <<EOF
-#line 910 "configure"
+#line 924 "configure"
 #include "confdefs.h"
 #include <stdlib.h>
 EOF
@@ -907,7 +918,7 @@ if test "$cross_compiling" = yes; then
   :
 else
   cat > conftest.$ac_ext <<EOF
-#line 931 "configure"
+#line 945 "configure"
 #include "confdefs.h"
 #include <ctype.h>
 #define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
@@ -918,7 +929,7 @@ if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2);
 exit (0); }
 
 EOF
-if { (eval echo configure:942: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:956: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
 then
   :
 else
@@ -945,17 +956,17 @@ for ac_hdr in limits.h
 do
 ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
 echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
-echo "configure:969: checking for $ac_hdr" >&5
+echo "configure:983: checking for $ac_hdr" >&5
 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 974 "configure"
+#line 988 "configure"
 #include "confdefs.h"
 #include <$ac_hdr>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:979: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+{ (eval echo configure:993: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
 ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
@@ -982,13 +993,14 @@ fi
 done
 
 
+
 echo $ac_n "checking for working const""... $ac_c" 1>&6
-echo "configure:1007: checking for working const" >&5
+echo "configure:1022: checking for working const" >&5
 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1012 "configure"
+#line 1027 "configure"
 #include "confdefs.h"
 
 int main() {
@@ -1037,7 +1049,7 @@ ccp = (char const *const *) p;
 
 ; return 0; }
 EOF
-if { (eval echo configure:1061: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1076: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ac_cv_c_const=yes
 else
@@ -1058,12 +1070,12 @@ EOF
 fi
 
 echo $ac_n "checking for size_t""... $ac_c" 1>&6
-echo "configure:1082: checking for size_t" >&5
+echo "configure:1097: checking for size_t" >&5
 if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1087 "configure"
+#line 1102 "configure"
 #include "confdefs.h"
 #include <sys/types.h>
 #if STDC_HEADERS
@@ -1091,15 +1103,16 @@ EOF
 fi
 
 
+
 for ac_func in memmove strerror
 do
 echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
-echo "configure:1118: checking for $ac_func" >&5
+echo "configure:1134: checking for $ac_func" >&5
 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1123 "configure"
+#line 1139 "configure"
 #include "confdefs.h"
 /* System header to define __stub macros and hopefully few prototypes,
     which can conflict with char $ac_func(); below.  */
@@ -1122,7 +1135,7 @@ $ac_func();
 
 ; return 0; }
 EOF
-if { (eval echo configure:1146: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+if { (eval echo configure:1162: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
   rm -rf conftest*
   eval "ac_cv_func_$ac_func=yes"
 else
@@ -1148,22 +1161,31 @@ done
 
 
 
-
-LIBTOOL=
-LIBSUFFIX=a
+LIBTOOL=libtool
+LIBSUFFIX=la
 # Check whether --enable-shared or --disable-shared was given.
 if test "${enable_shared+set}" = set; then
   enableval="$enable_shared"
-  if test "$enableval" = "yes"; then
-  LIBTOOL=libtool
-  LIBSUFFIX=la 
-fi   
+  if test "$enableval" = "no"; then
+  LIBTOOL=
+  LIBSUFFIX=a
+fi
 
 fi
 
 
 
 
+
+
+
+
+
+
+
+
+
+
 trap '' 1 2 15
 cat > confcache <<\EOF
 # This file is a shell script that caches the results of configure
@@ -1264,7 +1286,7 @@ done
 
 ac_given_srcdir=$srcdir
 
-trap 'rm -fr `echo "Makefile config.h:config.in" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
+trap 'rm -fr `echo "Makefile pcre.h:pcre.in pcre-config config.h:config.in" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
 EOF
 cat >> $CONFIG_STATUS <<EOF
 
@@ -1299,8 +1321,16 @@ s%@mandir@%$mandir%g
 s%@CC@%$CC%g
 s%@RANLIB@%$RANLIB%g
 s%@CPP@%$CPP%g
+s%@HAVE_MEMMOVE@%$HAVE_MEMMOVE%g
+s%@HAVE_STRERROR@%$HAVE_STRERROR%g
 s%@LIBTOOL@%$LIBTOOL%g
 s%@LIBSUFFIX@%$LIBSUFFIX%g
+s%@PCRE_MAJOR@%$PCRE_MAJOR%g
+s%@PCRE_MINOR@%$PCRE_MINOR%g
+s%@PCRE_DATE@%$PCRE_DATE%g
+s%@PCRE_VERSION@%$PCRE_VERSION%g
+s%@PCRE_LIB_VERSION@%$PCRE_LIB_VERSION%g
+s%@PCRE_POSIXLIB_VERSION@%$PCRE_POSIXLIB_VERSION%g
 
 CEOF
 EOF
@@ -1342,7 +1372,7 @@ EOF
 
 cat >> $CONFIG_STATUS <<EOF
 
-CONFIG_FILES=\${CONFIG_FILES-"Makefile"}
+CONFIG_FILES=\${CONFIG_FILES-"Makefile pcre.h:pcre.in pcre-config"}
 EOF
 cat >> $CONFIG_STATUS <<\EOF
 for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
@@ -1508,7 +1538,7 @@ cat >> $CONFIG_STATUS <<EOF
 
 EOF
 cat >> $CONFIG_STATUS <<\EOF
-
+chmod a+x pcre-config
 exit 0
 EOF
 chmod +x $CONFIG_STATUS
diff --git a/configure.in b/configure.in
index ef94892..507888b 100644
--- a/configure.in
+++ b/configure.in
@@ -5,36 +5,71 @@ dnl it should be seeing, to verify it is in the same directory.
 
 AC_INIT(dftables.c)
 
+dnl Arrange to build config.h from config.in. Note that pcre.h is
+dnl built differently, as it is just a "substitution" file.
+dnl Manual says this macro should come right after AC_INIT.
+AC_CONFIG_HEADER(config.h:config.in)
+
+dnl Provide the current PCRE version information. Do not use numbers
+dnl with leading zeros for the minor version, as they end up in a C
+dnl macro, and may be treated as octal constants. Stick to single
+dnl digits for minor numbers less than 10. There are unlikely to be
+dnl that many releases anyway.
+
+PCRE_MAJOR=3
+PCRE_MINOR=0
+PCRE_DATE=01-Feb-2000
+PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
+
+dnl Provide versioning information for libtool shared libraries that
+dnl are built by default on Unix systems.
+
+PCRE_LIB_VERSION=0:0:0
+PCRE_POSIXLIB_VERSION=0:0:0
+
 dnl Checks for programs.
+
 AC_PROG_CC
 AC_PROG_RANLIB
 
 dnl Checks for header files.
+
 AC_HEADER_STDC
 AC_CHECK_HEADERS(limits.h)
 
 dnl Checks for typedefs, structures, and compiler characteristics.
+
 AC_C_CONST
 AC_TYPE_SIZE_T
 
 dnl Checks for library functions.
-AC_CHECK_FUNCS(memmove strerror)
 
-dnl Arrange to build config.h from config.in
-AC_CONFIG_HEADER(config.h:config.in)
+AC_CHECK_FUNCS(memmove strerror)
 
 dnl Handle --enable-shared-libraries
-LIBTOOL=
-LIBSUFFIX=a
+
+LIBTOOL=libtool
+LIBSUFFIX=la
 AC_ARG_ENABLE(shared,
-[  --enable-shared         build PCRE as a shared library (using libtool)],
-if test "$enableval" = "yes"; then
-  LIBTOOL=libtool
-  LIBSUFFIX=la
+[  --disable-shared        build PCRE as a static library],
+if test "$enableval" = "no"; then
+  LIBTOOL=
+  LIBSUFFIX=a
 fi
 )
+
+dnl "Export" these variables
+
+AC_SUBST(HAVE_MEMMOVE)
+AC_SUBST(HAVE_STRERROR)
 AC_SUBST(LIBTOOL)
 AC_SUBST(LIBSUFFIX)
+AC_SUBST(PCRE_MAJOR)
+AC_SUBST(PCRE_MINOR)
+AC_SUBST(PCRE_DATE)
+AC_SUBST(PCRE_VERSION)
+AC_SUBST(PCRE_LIB_VERSION)
+AC_SUBST(PCRE_POSIXLIB_VERSION)
 
 dnl This must be last; it determines what files are written
-AC_OUTPUT(Makefile)
+AC_OUTPUT(Makefile pcre.h:pcre.in pcre-config,[chmod a+x pcre-config])
diff --git a/dftables.c b/dftables.c
index 7b336e6..d572dfd 100644
--- a/dftables.c
+++ b/dftables.c
@@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -89,9 +89,11 @@ for (i = 0; i < 256; i++)
 printf(",\n\n");
 
 printf(
-  "/* This table contains bit maps for digits, 'word' chars, and white\n"
-  "space. Each map is 32 bytes long and the bits run from the least\n"
-  "significant end of each byte. */\n\n");
+  "/* This table contains bit maps for various character classes.\n"
+  "Each map is 32 bytes long and the bits run from the least\n"
+  "significant end of each byte. The classes that have their own\n"
+  "maps are: space, xdigit, digit, upper, lower, word, graph\n"
+  "print, punct, and cntrl. Other classes are built from combinations. */\n\n");
 
 printf("  ");
 for (i = 0; i < cbit_length; i++)
@@ -104,7 +106,7 @@ for (i = 0; i < cbit_length; i++)
   printf("0x%02x", *tables++);
   if (i != cbit_length - 1) printf(",");
   }
-printf(" ,\n\n");
+printf(",\n\n");
 
 printf(
   "/* This table identifies various classes of character by individual bits:\n"
diff --git a/doc/Tech.Notes b/doc/Tech.Notes
index d485a4e..03904db 100644
--- a/doc/Tech.Notes
+++ b/doc/Tech.Notes
@@ -23,18 +23,19 @@ optionally, minimizing in Perl) the amount of the subject that matches
 individual wild portions of the pattern. This is an "NFA algorithm" in Friedl's
 terminology.
 
-For this set of functions that forms PCRE, I tried at first to invent an
-algorithm that used an amount of store bounded by a multiple of the number of
-characters in the pattern, to save on compiling time. However, because of the
-greater complexity in Perl regular expressions, I couldn't do this. In any
-case, a first pass through the pattern is needed, in order to find internal
-flag settings like (?i) at top level. So it works by running a very degenerate
-first pass to calculate a maximum store size, and then a second pass to do the
-real compile - which may use a bit less than the predicted amount of store. The
-idea is that this is going to turn out faster because the first pass is
-degenerate and the second can just store stuff straight into the vector. It
-does make the compiling functions bigger, of course, but they have got quite
-big anyway to handle all the Perl stuff.
+For the set of functions that forms PCRE (which are unrelated to those
+mentioned above), I tried at first to invent an algorithm that used an amount
+of store bounded by a multiple of the number of characters in the pattern, to
+save on compiling time. However, because of the greater complexity in Perl
+regular expressions, I couldn't do this. In any case, a first pass through the
+pattern is needed, in order to find internal flag settings like (?i) at top
+level. So PCRE works by running a very degenerate first pass to calculate a
+maximum store size, and then a second pass to do the real compile - which may
+use a bit less than the predicted amount of store. The idea is that this is
+going to turn out faster because the first pass is degenerate and the second
+pass can just store stuff straight into the vector. It does make the compiling
+functions bigger, of course, but they have got quite big anyway to handle all
+the Perl stuff.
 
 The compiled form of a pattern is a vector of bytes, containing items of
 variable length. The first byte in an item is an opcode, and the length of the
@@ -61,6 +62,7 @@ These items are all just one byte long
   OP_EODN                match end of data or \n at end: \Z
   OP_EOD                 match end of data: \z
   OP_DOLL                $ (end of data, or before \n in multiline)
+  OP_RECURSE             match the pattern recursively
 
 
 Repeating single characters
@@ -125,9 +127,9 @@ positive class, and OP_NOT for a negative one (that is, for something like
 repeated, negated, single-character class. The normal ones (OP_STAR etc.) are
 used for a repeated positive single-character class.
 
-OP_CLASS is followed by a 32-byte bit map containing a 1
-bit for every character that is acceptable. The bits are counted from the least
-significant end of each byte.
+OP_CLASS is followed by a 32-byte bit map containing a 1 bit for every
+character that is acceptable. The bits are counted from the least significant
+end of each byte.
 
 
 Back references
@@ -159,11 +161,12 @@ four bytes of data, comprising the minimum and maximum repeat counts.
 Brackets and alternation
 ------------------------
 
-A pair of non-identifying (round) brackets is wrapped round each expression at
+A pair of non-capturing (round) brackets is wrapped round each expression at
 compile time, so alternation always happens in the context of brackets.
-Non-identifying brackets use the opcode OP_BRA, while identifying brackets use
+Non-capturing brackets use the opcode OP_BRA, while capturing brackets use
 OP_BRA+1, OP_BRA+2, etc. [Note for North Americans: "bracket" to some English
-speakers, including myself, can be round, square, or curly. Hence this usage.]
+speakers, including myself, can be round, square, curly, or pointy. Hence this
+usage.]
 
 A bracket opcode is followed by two bytes which give the offset to the next
 alternative OP_ALT or, if there aren't any branches, to the matching KET
@@ -236,4 +239,4 @@ the compiled data.
 
 
 Philip Hazel
-January 1999
+February 2000
diff --git a/doc/pcre.3 b/doc/pcre.3
index 47971b9..bd435e9 100644
--- a/doc/pcre.3
+++ b/doc/pcre.3
@@ -47,6 +47,11 @@ pcre - Perl-compatible regular expressions.
 .B const unsigned char *pcre_maketables(void);
 .PP
 .br
+.B int pcre_fullinfo(const pcre *\fIcode\fR, "const pcre_extra *\fIextra\fR,"
+.ti +5n
+.B int \fIwhat\fR, void *\fIwhere\fR);
+.PP
+.br
 .B int pcre_info(const pcre *\fIcode\fR, int *\fIoptptr\fR, int
 .B *\fIfirstcharptr\fR);
 .PP
@@ -64,16 +69,19 @@ pcre - Perl-compatible regular expressions.
 .SH DESCRIPTION
 The PCRE library is a set of functions that implement regular expression
 pattern matching using the same syntax and semantics as Perl 5, with just a few
-differences (see below). The current implementation corresponds to Perl 5.005.
+differences (see below). The current implementation corresponds to Perl 5.005,
+with some additional features from the Perl development release.
 
 PCRE has its own native API, which is described in this document. There is also
-a set of wrapper functions that correspond to the POSIX API. These are
-described in the \fBpcreposix\fR documentation.
+a set of wrapper functions that correspond to the POSIX regular expression API.
+These are described in the \fBpcreposix\fR documentation.
 
 The native API function prototypes are defined in the header file \fBpcre.h\fR,
 and on Unix systems the library itself is called \fBlibpcre.a\fR, so can be
 accessed by adding \fB-lpcre\fR to the command for linking an application which
-calls it.
+calls it. The header file defines the macros PCRE_MAJOR and PCRE_MINOR to
+contain the major and minor release numbers for the library. Applications can
+use these to include support for different releases.
 
 The functions \fBpcre_compile()\fR, \fBpcre_study()\fR, and \fBpcre_exec()\fR
 are used for compiling and matching regular expressions, while
@@ -83,9 +91,11 @@ captured substrings from a matched subject string. The function
 \fBpcre_maketables()\fR is used (optionally) to build a set of character tables
 in the current locale for passing to \fBpcre_compile()\fR.
 
-The function \fBpcre_info()\fR is used to find out information about a compiled
-pattern, while the function \fBpcre_version()\fR returns a pointer to a string
-containing the version of PCRE and its date of release.
+The function \fBpcre_fullinfo()\fR is used to find out information about a
+compiled pattern; \fBpcre_info()\fR is an obsolete version which returns only
+some of the available information, but is retained for backwards compatibility.
+The function \fBpcre_version()\fR returns a pointer to a string containing the
+version of PCRE and its date of release.
 
 The global variables \fBpcre_malloc\fR and \fBpcre_free\fR initially contain
 the entry points of the standard \fBmalloc()\fR and \fBfree()\fR functions
@@ -182,12 +192,14 @@ sequence (?( which introduces a conditional subpattern.
 
   PCRE_EXTRA
 
-This option turns on additional functionality of PCRE that is incompatible with
-Perl. Any backslash in a pattern that is followed by a letter that has no
+This option was invented in order to turn on additional functionality of PCRE
+that is incompatible with Perl, but it is currently of very little use. When
+set, any backslash in a pattern that is followed by a letter that has no
 special meaning causes an error, thus reserving these combinations for future
 expansion. By default, as in Perl, a backslash followed by a letter with no
 special meaning is treated as a literal. There are at present no other features
-controlled by this option.
+controlled by this option. It can also be set by a (?X) option setting within a
+pattern.
 
   PCRE_MULTILINE
 
@@ -261,25 +273,58 @@ memory containing the tables remains available for as long as it is needed.
 
 
 .SH INFORMATION ABOUT A PATTERN
-The \fBpcre_info()\fR function returns information about a compiled pattern.
-Its yield is the number of capturing subpatterns, or one of the following
-negative numbers:
+The \fBpcre_fullinfo()\fR function returns information about a compiled
+pattern. It replaces the obsolete \fBpcre_info()\fR function, which is
+nevertheless retained for backwards compability (and is documented below).
+
+The first argument for \fBpcre_fullinfo()\fR is a pointer to the compiled
+pattern. The second argument is the result of \fBpcre_study()\fR, or NULL if
+the pattern was not studied. The third argument specifies which piece of
+information is required, while the fourth argument is a pointer to a variable
+to receive the data. The yield of the function is zero for success, or one of
+the following negative numbers:
 
   PCRE_ERROR_NULL       the argument \fIcode\fR was NULL
+                        the argument \fIwhere\fR was NULL
   PCRE_ERROR_BADMAGIC   the "magic number" was not found
+  PCRE_ERROR_BADOPTION  the value of \fIwhat\fR was invalid
 
-If the \fIoptptr\fR argument is not NULL, a copy of the options with which the
-pattern was compiled is placed in the integer it points to. These option bits
+The possible values for the third argument are defined in \fBpcre.h\fR, and are
+as follows:
+
+  PCRE_INFO_OPTIONS
+
+Return a copy of the options with which the pattern was compiled. The fourth
+argument should point to au \fBunsigned long int\fR variable. These option bits
 are those specified in the call to \fBpcre_compile()\fR, modified by any
 top-level option settings within the pattern itself, and with the PCRE_ANCHORED
-bit set if the form of the pattern implies that it can match only at the start
-of a subject string.
+bit forcibly set if the form of the pattern implies that it can match only at
+the start of a subject string.
 
-If the pattern is not anchored and the \fIfirstcharptr\fR argument is not NULL,
-it is used to pass back information about the first character of any matched
-string. If there is a fixed first character, e.g. from a pattern such as
-(cat|cow|coyote), then it is returned in the integer pointed to by
-\fIfirstcharptr\fR. Otherwise, if either
+  PCRE_INFO_SIZE
+
+Return the size of the compiled pattern, that is, the value that was passed as
+the argument to \fBpcre_malloc()\fR when PCRE was getting memory in which to
+place the compiled data. The fourth argument should point to a \fBsize_t\fR
+variable.
+
+  PCRE_INFO_CAPTURECOUNT
+
+Return the number of capturing subpatterns in the pattern. The fourth argument
+should point to an \fbint\fR variable.
+
+  PCRE_INFO_BACKREFMAX
+
+Return the number of the highest back reference in the pattern. The fourth
+argument should point to an \fBint\fR variable. Zero is returned if there are
+no back references.
+
+  PCRE_INFO_FIRSTCHAR
+
+Return information about the first character of any matched string, for a
+non-anchored pattern. If there is a fixed first character, e.g. from a pattern
+such as (cat|cow|coyote), then it is returned in the integer pointed to by
+\fIwhere\fR. Otherwise, if either
 
 (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
 starts with "^", or
@@ -289,7 +334,40 @@ starts with "^", or
 
 then -1 is returned, indicating that the pattern matches only at the
 start of a subject string or after any "\\n" within the string. Otherwise -2 is
-returned.
+returned. For anchored patterns, -2 is returned.
+
+  PCRE_INFO_FIRSTTABLE
+
+If the pattern was studied, and this resulted in the construction of a 256-bit
+table indicating a fixed set of characters for the first character in any
+matching string, a pointer to the table is returned. Otherwise NULL is
+returned. The fourth argument should point to an \fBunsigned char *\fR
+variable.
+
+  PCRE_INFO_LASTLITERAL
+
+For a non-anchored pattern, return the value of the rightmost literal character
+which must exist in any matched string, other than at its start. The fourth
+argument should point to an \fBint\fR variable. If there is no such character,
+or if the pattern is anchored, -1 is returned. For example, for the pattern
+/a\\d+z\\d+/ the returned value is 'z'.
+
+The \fBpcre_info()\fR function is now obsolete because its interface is too
+restrictive to return all the available data about a compiled pattern. New
+programs should use \fBpcre_fullinfo()\fR instead. The yield of
+\fBpcre_info()\fR is the number of capturing subpatterns, or one of the
+following negative numbers:
+
+  PCRE_ERROR_NULL       the argument \fIcode\fR was NULL
+  PCRE_ERROR_BADMAGIC   the "magic number" was not found
+
+If the \fIoptptr\fR argument is not NULL, a copy of the options with which the
+pattern was compiled is placed in the integer it points to (see
+PCRE_INFO_OPTIONS above).
+
+If the pattern is not anchored and the \fIfirstcharptr\fR argument is not NULL,
+it is used to pass back information about the first character of any matched
+string (see PCRE_INFO_FIRSTCHAR above).
 
 
 .SH MATCHING A PATTERN
@@ -564,7 +642,9 @@ are not part of its pattern matching engine.
 6. The Perl \\G assertion is not supported as it is not relevant to single
 pattern matches.
 
-7. Fairly obviously, PCRE does not support the (?{code}) construction.
+7. Fairly obviously, PCRE does not support the (?{code}) and (?p{code})
+constructions. However, there is some experimental support for recursive
+patterns using the non-Perl item (?R).
 
 8. There are at the time of writing some oddities in Perl 5.005_02 concerned
 with the settings of captured strings when part of a pattern is repeated. For
@@ -602,13 +682,16 @@ of the subject.
 (f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY options for
 \fBpcre_exec()\fR have no Perl equivalents.
 
+(g) The (?R) construct allows for recursive pattern matching (Perl 5.6 can do
+this using the (?p{code}) construct, which PCRE cannot of course support.)
+
 
 .SH REGULAR EXPRESSION DETAILS
 The syntax and semantics of the regular expressions supported by PCRE are
 described below. Regular expressions are also described in the Perl
 documentation and in a number of other books, some of which have copious
 examples. Jeffrey Friedl's "Mastering Regular Expressions", published by
-O'Reilly (ISBN 1-56592-257-3), covers them in great detail. The description
+O'Reilly (ISBN 1-56592-257), covers them in great detail. The description
 here is intended as reference documentation.
 
 A regular expression is a pattern that is matched against a subject string from
@@ -906,6 +989,40 @@ terminating ] are non-special in character classes, but it does no harm if they
 are escaped.
 
 
+.SH POSIX CHARACTER CLASSES
+Perl 5.6 (not yet released at the time of writing) is going to support the
+POSIX notation for character classes, which uses names enclosed by [: and :]
+within the enclosing square brackets. PCRE supports this notation. For example,
+
+  [01[:alpha:]%]
+
+matches "0", "1", any alphabetic character, or "%". The supported class names
+are
+
+  alnum    letters and digits
+  alpha    letters
+  ascii    character codes 0 - 127
+  cntrl    control characters
+  digit    decimal digits (same as \\d)
+  graph    printing characters, excluding space
+  lower    lower case letters
+  print    printing characters, including space
+  punct    printing characters, excluding letters and digits
+  space    white space (same as \\s)
+  upper    upper case letters
+  word     "word" characters (same as \\w)
+  xdigit   hexadecimal digits
+
+The names "ascii" and "word" are Perl extensions. Another Perl extension is
+negation, which is indicated by a ^ character after the colon. For example,
+
+  [12[:^digit:]]
+
+matches "1", "2", or any non-digit. PCRE (and Perl) also recogize the POSIX
+syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
+supported, and an error is given if they are encountered.
+
+
 .SH VERTICAL BAR
 Vertical bar characters are used to separate alternative patterns. For example,
 the pattern
@@ -1352,18 +1469,17 @@ pattern such as
 
   abcd$
 
-when applied to a long string which does not match it. Because matching
-proceeds from left to right, PCRE will look for each "a" in the subject and
-then see if what follows matches the rest of the pattern. If the pattern is
-specified as
+when applied to a long string which does not match. Because matching proceeds
+from left to right, PCRE will look for each "a" in the subject and then see if
+what follows matches the rest of the pattern. If the pattern is specified as
 
   ^.*abcd$
 
-then the initial .* matches the entire string at first, but when this fails, it
-backtracks to match all but the last character, then all but the last two
-characters, and so on. Once again the search for "a" covers the entire string,
-from right to left, so we are no better off. However, if the pattern is written
-as
+then the initial .* matches the entire string at first, but when this fails
+(because there is no following "a"), it backtracks to match all but the last
+character, then all but the last two characters, and so on. Once again the
+search for "a" covers the entire string, from right to left, so we are no
+better off. However, if the pattern is written as
 
   ^(?>.*)(?<=abcd)
 
@@ -1372,6 +1488,31 @@ string. The subsequent lookbehind assertion does a single test on the last four
 characters. If it fails, the match fails immediately. For long strings, this
 approach makes a significant difference to the processing time.
 
+When a pattern contains an unlimited repeat inside a subpattern that can itself
+be repeated an unlimited number of times, the use of a once-only subpattern is
+the only way to avoid some failing matches taking a very long time indeed.
+The pattern
+
+  (\\D+|<\\d+>)*[!?]
+
+matches an unlimited number of substrings that either consist of non-digits, or
+digits enclosed in <>, followed by either ! or ?. When it matches, it runs
+quickly. However, if it is applied to
+
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+it takes a long time before reporting failure. This is because the string can
+be divided between the two repeats in a large number of ways, and all have to
+be tried. (The example used [!?] rather than a single character at the end,
+because both PCRE and Perl have an optimization that allows for fast failure
+when a single character is used. They remember the last single character that
+is required for a match, and fail early if it is not present in the string.)
+If the pattern is changed to
+
+  ((?>\\D+)|<\\d+>)*[!?]
+
+sequences of non-digits cannot be broken, and failure happens quickly.
+
 
 .SH CONDITIONAL SUBPATTERNS
 It is possible to cause the matching process to obey a subpattern
@@ -1431,6 +1572,65 @@ character class introduces a comment that continues up to the next newline
 character in the pattern.
 
 
+.SH RECURSIVE PATTERNS
+Consider the problem of matching a string in parentheses, allowing for
+unlimited nested parentheses. Without the use of recursion, the best that can
+be done is to use a pattern that matches up to some fixed depth of nesting. It
+is not possible to handle an arbitrary nesting depth. Perl 5.6 has provided an
+experimental facility that allows regular expressions to recurse (amongst other
+things). It does this by interpolating Perl code in the expression at run time,
+and the code can refer to the expression itself. A Perl pattern to solve the
+parentheses problem can be created like this:
+
+  $re = qr{\\( (?: (?>[^()]+) | (?p{$re}) )* \\)}x;
+
+The (?p{...}) item interpolates Perl code at run time, and in this case refers
+recursively to the pattern in which it appears. Obviously, PCRE cannot support
+the interpolation of Perl code. Instead, the special item (?R) is provided for
+the specific case of recursion. This PCRE pattern solves the parentheses
+problem (assume the PCRE_EXTENDED option is set so that white space is
+ignored):
+
+  \\( ( (?>[^()]+) | (?R) )* \\)
+
+First it matches an opening parenthesis. Then it matches any number of
+substrings which can either be a sequence of non-parentheses, or a recursive
+match of the pattern itself (i.e. a correctly parenthesized substring). Finally
+there is a closing parenthesis.
+
+This particular example pattern contains nested unlimited repeats, and so the
+use of a once-only subpattern for matching strings of non-parentheses is
+important when applying the pattern to strings that do not match. For example,
+when it is applied to
+
+  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+
+it yields "no match" quickly. However, if a once-only subpattern is not used,
+the match runs for a very long time indeed because there are so many different
+ways the + and * repeats can carve up the subject, and all have to be tested
+before failure can be reported.
+
+The values set for any capturing subpatterns are those from the outermost level
+of the recursion at which the subpattern value is set. If the pattern above is
+matched against
+
+  (ab(cd)ef)
+
+the value for the capturing parentheses is "ef", which is the last value taken
+on at the top level. If additional parentheses are added, giving
+
+  \\( ( ( (?>[^()]+) | (?R) )* ) \\)
+     ^                        ^
+     ^                        ^
+then the string they capture is "ab(cd)ef", the contents of the top level
+parentheses. If there are more than 15 capturing parentheses in a pattern, PCRE
+has to obtain extra memory to store data during a recursion, which it does by
+using \fBpcre_malloc\fR, freeing it via \fBpcre_free\fR afterwards. If no
+memory can be obtained, it saves data for the first 15 capturing parentheses
+only, as there is no way to give an out-of-memory error from within a
+recursion.
+
+
 .SH PERFORMANCE
 Certain items that may appear in patterns are more efficient than others. It is
 more efficient to use a character class like [aeiou] than a set of alternatives
@@ -1497,6 +1697,6 @@ Cambridge CB2 3QG, England.
 .br
 Phone: +44 1223 334714
 
-Last updated: 29 July 1999
+Last updated: 27 January 2000
 .br
-Copyright (c) 1997-1999 University of Cambridge.
+Copyright (c) 1997-2000 University of Cambridge.
diff --git a/doc/pcre.html b/doc/pcre.html
index 6d91a5c..2ce2890 100644
--- a/doc/pcre.html
+++ b/doc/pcre.html
@@ -25,17 +25,19 @@ conversion went wrong.
 <LI><A NAME="TOC15" HREF="#SEC15">CIRCUMFLEX AND DOLLAR</A>
 <LI><A NAME="TOC16" HREF="#SEC16">FULL STOP (PERIOD, DOT)</A>
 <LI><A NAME="TOC17" HREF="#SEC17">SQUARE BRACKETS</A>
-<LI><A NAME="TOC18" HREF="#SEC18">VERTICAL BAR</A>
-<LI><A NAME="TOC19" HREF="#SEC19">INTERNAL OPTION SETTING</A>
-<LI><A NAME="TOC20" HREF="#SEC20">SUBPATTERNS</A>
-<LI><A NAME="TOC21" HREF="#SEC21">REPETITION</A>
-<LI><A NAME="TOC22" HREF="#SEC22">BACK REFERENCES</A>
-<LI><A NAME="TOC23" HREF="#SEC23">ASSERTIONS</A>
-<LI><A NAME="TOC24" HREF="#SEC24">ONCE-ONLY SUBPATTERNS</A>
-<LI><A NAME="TOC25" HREF="#SEC25">CONDITIONAL SUBPATTERNS</A>
-<LI><A NAME="TOC26" HREF="#SEC26">COMMENTS</A>
-<LI><A NAME="TOC27" HREF="#SEC27">PERFORMANCE</A>
-<LI><A NAME="TOC28" HREF="#SEC28">AUTHOR</A>
+<LI><A NAME="TOC18" HREF="#SEC18">POSIX CHARACTER CLASSES</A>
+<LI><A NAME="TOC19" HREF="#SEC19">VERTICAL BAR</A>
+<LI><A NAME="TOC20" HREF="#SEC20">INTERNAL OPTION SETTING</A>
+<LI><A NAME="TOC21" HREF="#SEC21">SUBPATTERNS</A>
+<LI><A NAME="TOC22" HREF="#SEC22">REPETITION</A>
+<LI><A NAME="TOC23" HREF="#SEC23">BACK REFERENCES</A>
+<LI><A NAME="TOC24" HREF="#SEC24">ASSERTIONS</A>
+<LI><A NAME="TOC25" HREF="#SEC25">ONCE-ONLY SUBPATTERNS</A>
+<LI><A NAME="TOC26" HREF="#SEC26">CONDITIONAL SUBPATTERNS</A>
+<LI><A NAME="TOC27" HREF="#SEC27">COMMENTS</A>
+<LI><A NAME="TOC28" HREF="#SEC28">RECURSIVE PATTERNS</A>
+<LI><A NAME="TOC29" HREF="#SEC29">PERFORMANCE</A>
+<LI><A NAME="TOC30" HREF="#SEC30">AUTHOR</A>
 </UL>
 <LI><A NAME="SEC1" HREF="#TOC1">NAME</A>
 <P>
@@ -77,6 +79,10 @@ pcre - Perl-compatible regular expressions.
 <B>const unsigned char *pcre_maketables(void);</B>
 </P>
 <P>
+<B>int pcre_fullinfo(const pcre *<I>code</I>, const pcre_extra *<I>extra</I>,</B>
+<B>int <I>what</I>, void *<I>where</I>);</B>
+</P>
+<P>
 <B>int pcre_info(const pcre *<I>code</I>, int *<I>optptr</I>, int</B>
 <B>*<I>firstcharptr</I>);</B>
 </P>
@@ -93,18 +99,21 @@ pcre - Perl-compatible regular expressions.
 <P>
 The PCRE library is a set of functions that implement regular expression
 pattern matching using the same syntax and semantics as Perl 5, with just a few
-differences (see below). The current implementation corresponds to Perl 5.005.
+differences (see below). The current implementation corresponds to Perl 5.005,
+with some additional features from the Perl development release.
 </P>
 <P>
 PCRE has its own native API, which is described in this document. There is also
-a set of wrapper functions that correspond to the POSIX API. These are
-described in the <B>pcreposix</B> documentation.
+a set of wrapper functions that correspond to the POSIX regular expression API.
+These are described in the <B>pcreposix</B> documentation.
 </P>
 <P>
 The native API function prototypes are defined in the header file <B>pcre.h</B>,
 and on Unix systems the library itself is called <B>libpcre.a</B>, so can be
 accessed by adding <B>-lpcre</B> to the command for linking an application which
-calls it.
+calls it. The header file defines the macros PCRE_MAJOR and PCRE_MINOR to
+contain the major and minor release numbers for the library. Applications can
+use these to include support for different releases.
 </P>
 <P>
 The functions <B>pcre_compile()</B>, <B>pcre_study()</B>, and <B>pcre_exec()</B>
@@ -116,9 +125,11 @@ captured substrings from a matched subject string. The function
 in the current locale for passing to <B>pcre_compile()</B>.
 </P>
 <P>
-The function <B>pcre_info()</B> is used to find out information about a compiled
-pattern, while the function <B>pcre_version()</B> returns a pointer to a string
-containing the version of PCRE and its date of release.
+The function <B>pcre_fullinfo()</B> is used to find out information about a
+compiled pattern; <B>pcre_info()</B> is an obsolete version which returns only
+some of the available information, but is retained for backwards compatibility.
+The function <B>pcre_version()</B> returns a pointer to a string containing the
+version of PCRE and its date of release.
 </P>
 <P>
 The global variables <B>pcre_malloc</B> and <B>pcre_free</B> initially contain
@@ -246,12 +257,14 @@ sequence (?( which introduces a conditional subpattern.
 </PRE>
 </P>
 <P>
-This option turns on additional functionality of PCRE that is incompatible with
-Perl. Any backslash in a pattern that is followed by a letter that has no
+This option was invented in order to turn on additional functionality of PCRE
+that is incompatible with Perl, but it is currently of very little use. When
+set, any backslash in a pattern that is followed by a letter that has no
 special meaning causes an error, thus reserving these combinations for future
 expansion. By default, as in Perl, a backslash followed by a letter with no
 special meaning is treated as a literal. There are at present no other features
-controlled by this option.
+controlled by this option. It can also be set by a (?X) option setting within a
+pattern.
 </P>
 <P>
 <PRE>
@@ -342,30 +355,83 @@ memory containing the tables remains available for as long as it is needed.
 </P>
 <LI><A NAME="SEC8" HREF="#TOC1">INFORMATION ABOUT A PATTERN</A>
 <P>
-The <B>pcre_info()</B> function returns information about a compiled pattern.
-Its yield is the number of capturing subpatterns, or one of the following
-negative numbers:
+The <B>pcre_fullinfo()</B> function returns information about a compiled
+pattern. It replaces the obsolete <B>pcre_info()</B> function, which is
+nevertheless retained for backwards compability (and is documented below).
+</P>
+<P>
+The first argument for <B>pcre_fullinfo()</B> is a pointer to the compiled
+pattern. The second argument is the result of <B>pcre_study()</B>, or NULL if
+the pattern was not studied. The third argument specifies which piece of
+information is required, while the fourth argument is a pointer to a variable
+to receive the data. The yield of the function is zero for success, or one of
+the following negative numbers:
 </P>
 <P>
 <PRE>
   PCRE_ERROR_NULL       the argument <I>code</I> was NULL
+                        the argument <I>where</I> was NULL
   PCRE_ERROR_BADMAGIC   the "magic number" was not found
+  PCRE_ERROR_BADOPTION  the value of <I>what</I> was invalid
 </PRE>
 </P>
 <P>
-If the <I>optptr</I> argument is not NULL, a copy of the options with which the
-pattern was compiled is placed in the integer it points to. These option bits
+The possible values for the third argument are defined in <B>pcre.h</B>, and are
+as follows:
+</P>
+<P>
+<PRE>
+  PCRE_INFO_OPTIONS
+</PRE>
+</P>
+<P>
+Return a copy of the options with which the pattern was compiled. The fourth
+argument should point to au <B>unsigned long int</B> variable. These option bits
 are those specified in the call to <B>pcre_compile()</B>, modified by any
 top-level option settings within the pattern itself, and with the PCRE_ANCHORED
-bit set if the form of the pattern implies that it can match only at the start
-of a subject string.
+bit forcibly set if the form of the pattern implies that it can match only at
+the start of a subject string.
 </P>
 <P>
-If the pattern is not anchored and the <I>firstcharptr</I> argument is not NULL,
-it is used to pass back information about the first character of any matched
-string. If there is a fixed first character, e.g. from a pattern such as
-(cat|cow|coyote), then it is returned in the integer pointed to by
-<I>firstcharptr</I>. Otherwise, if either
+<PRE>
+  PCRE_INFO_SIZE
+</PRE>
+</P>
+<P>
+Return the size of the compiled pattern, that is, the value that was passed as
+the argument to <B>pcre_malloc()</B> when PCRE was getting memory in which to
+place the compiled data. The fourth argument should point to a <B>size_t</B>
+variable.
+</P>
+<P>
+<PRE>
+  PCRE_INFO_CAPTURECOUNT
+</PRE>
+</P>
+<P>
+Return the number of capturing subpatterns in the pattern. The fourth argument
+should point to an \fbint\fR variable.
+</P>
+<P>
+<PRE>
+  PCRE_INFO_BACKREFMAX
+</PRE>
+</P>
+<P>
+Return the number of the highest back reference in the pattern. The fourth
+argument should point to an <B>int</B> variable. Zero is returned if there are
+no back references.
+</P>
+<P>
+<PRE>
+  PCRE_INFO_FIRSTCHAR
+</PRE>
+</P>
+<P>
+Return information about the first character of any matched string, for a
+non-anchored pattern. If there is a fixed first character, e.g. from a pattern
+such as (cat|cow|coyote), then it is returned in the integer pointed to by
+<I>where</I>. Otherwise, if either
 </P>
 <P>
 (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
@@ -378,7 +444,54 @@ starts with "^", or
 <P>
 then -1 is returned, indicating that the pattern matches only at the
 start of a subject string or after any "\n" within the string. Otherwise -2 is
-returned.
+returned. For anchored patterns, -2 is returned.
+</P>
+<P>
+<PRE>
+  PCRE_INFO_FIRSTTABLE
+</PRE>
+</P>
+<P>
+If the pattern was studied, and this resulted in the construction of a 256-bit
+table indicating a fixed set of characters for the first character in any
+matching string, a pointer to the table is returned. Otherwise NULL is
+returned. The fourth argument should point to an <B>unsigned char *</B>
+variable.
+</P>
+<P>
+<PRE>
+  PCRE_INFO_LASTLITERAL
+</PRE>
+</P>
+<P>
+For a non-anchored pattern, return the value of the rightmost literal character
+which must exist in any matched string, other than at its start. The fourth
+argument should point to an <B>int</B> variable. If there is no such character,
+or if the pattern is anchored, -1 is returned. For example, for the pattern
+/a\d+z\d+/ the returned value is 'z'.
+</P>
+<P>
+The <B>pcre_info()</B> function is now obsolete because its interface is too
+restrictive to return all the available data about a compiled pattern. New
+programs should use <B>pcre_fullinfo()</B> instead. The yield of
+<B>pcre_info()</B> is the number of capturing subpatterns, or one of the
+following negative numbers:
+</P>
+<P>
+<PRE>
+  PCRE_ERROR_NULL       the argument <I>code</I> was NULL
+  PCRE_ERROR_BADMAGIC   the "magic number" was not found
+</PRE>
+</P>
+<P>
+If the <I>optptr</I> argument is not NULL, a copy of the options with which the
+pattern was compiled is placed in the integer it points to (see
+PCRE_INFO_OPTIONS above).
+</P>
+<P>
+If the pattern is not anchored and the <I>firstcharptr</I> argument is not NULL,
+it is used to pass back information about the first character of any matched
+string (see PCRE_INFO_FIRSTCHAR above).
 </P>
 <LI><A NAME="SEC9" HREF="#TOC1">MATCHING A PATTERN</A>
 <P>
@@ -735,7 +848,9 @@ are not part of its pattern matching engine.
 pattern matches.
 </P>
 <P>
-7. Fairly obviously, PCRE does not support the (?{code}) construction.
+7. Fairly obviously, PCRE does not support the (?{code}) and (?p{code})
+constructions. However, there is some experimental support for recursive
+patterns using the non-Perl item (?R).
 </P>
 <P>
 8. There are at the time of writing some oddities in Perl 5.005_02 concerned
@@ -783,13 +898,17 @@ of the subject.
 (f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY options for
 <B>pcre_exec()</B> have no Perl equivalents.
 </P>
+<P>
+(g) The (?R) construct allows for recursive pattern matching (Perl 5.6 can do
+this using the (?p{code}) construct, which PCRE cannot of course support.)
+</P>
 <LI><A NAME="SEC13" HREF="#TOC1">REGULAR EXPRESSION DETAILS</A>
 <P>
 The syntax and semantics of the regular expressions supported by PCRE are
 described below. Regular expressions are also described in the Perl
 documentation and in a number of other books, some of which have copious
 examples. Jeffrey Friedl's "Mastering Regular Expressions", published by
-O'Reilly (ISBN 1-56592-257-3), covers them in great detail. The description
+O'Reilly (ISBN 1-56592-257), covers them in great detail. The description
 here is intended as reference documentation.
 </P>
 <P>
@@ -1144,7 +1263,53 @@ All non-alphameric characters other than \, -, ^ (at the start) and the
 terminating ] are non-special in character classes, but it does no harm if they
 are escaped.
 </P>
-<LI><A NAME="SEC18" HREF="#TOC1">VERTICAL BAR</A>
+<LI><A NAME="SEC18" HREF="#TOC1">POSIX CHARACTER CLASSES</A>
+<P>
+Perl 5.6 (not yet released at the time of writing) is going to support the
+POSIX notation for character classes, which uses names enclosed by [: and :]
+within the enclosing square brackets. PCRE supports this notation. For example,
+</P>
+<P>
+<PRE>
+  [01[:alpha:]%]
+</PRE>
+</P>
+<P>
+matches "0", "1", any alphabetic character, or "%". The supported class names
+are
+</P>
+<P>
+<PRE>
+  alnum    letters and digits
+  alpha    letters
+  ascii    character codes 0 - 127
+  cntrl    control characters
+  digit    decimal digits (same as \d)
+  graph    printing characters, excluding space
+  lower    lower case letters
+  print    printing characters, including space
+  punct    printing characters, excluding letters and digits
+  space    white space (same as \s)
+  upper    upper case letters
+  word     "word" characters (same as \w)
+  xdigit   hexadecimal digits
+</PRE>
+</P>
+<P>
+The names "ascii" and "word" are Perl extensions. Another Perl extension is
+negation, which is indicated by a ^ character after the colon. For example,
+</P>
+<P>
+<PRE>
+  [12[:^digit:]]
+</PRE>
+</P>
+<P>
+matches "1", "2", or any non-digit. PCRE (and Perl) also recogize the POSIX
+syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
+supported, and an error is given if they are encountered.
+</P>
+<LI><A NAME="SEC19" HREF="#TOC1">VERTICAL BAR</A>
 <P>
 Vertical bar characters are used to separate alternative patterns. For example,
 the pattern
@@ -1162,7 +1327,7 @@ and the first one that succeeds is used. If the alternatives are within a
 subpattern (defined below), "succeeds" means matching the rest of the main
 pattern as well as the alternative in the subpattern.
 </P>
-<LI><A NAME="SEC19" HREF="#TOC1">INTERNAL OPTION SETTING</A>
+<LI><A NAME="SEC20" HREF="#TOC1">INTERNAL OPTION SETTING</A>
 <P>
 The settings of PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and PCRE_EXTENDED
 can be changed from within the pattern by a sequence of Perl option letters
@@ -1238,7 +1403,7 @@ respectively. The (?X) flag setting is special in that it must always occur
 earlier in the pattern than any of the additional features it turns on, even
 when it is at top level. It is best put at the start.
 </P>
-<LI><A NAME="SEC20" HREF="#TOC1">SUBPATTERNS</A>
+<LI><A NAME="SEC21" HREF="#TOC1">SUBPATTERNS</A>
 <P>
 Subpatterns are delimited by parentheses (round brackets), which can be nested.
 Marking part of a pattern as a subpattern does two things:
@@ -1309,7 +1474,7 @@ from left to right, and options are not reset until the end of the subpattern
 is reached, an option setting in one branch does affect subsequent branches, so
 the above patterns match "SUNDAY" as well as "Saturday".
 </P>
-<LI><A NAME="SEC21" HREF="#TOC1">REPETITION</A>
+<LI><A NAME="SEC22" HREF="#TOC1">REPETITION</A>
 <P>
 Repetition is specified by quantifiers, which can follow any of the following
 items:
@@ -1484,7 +1649,7 @@ example, after
 <P>
 matches "aba" the value of the second captured substring is "b".
 </P>
-<LI><A NAME="SEC22" HREF="#TOC1">BACK REFERENCES</A>
+<LI><A NAME="SEC23" HREF="#TOC1">BACK REFERENCES</A>
 <P>
 Outside a character class, a backslash followed by a digit greater than 0 (and
 possibly further digits) is a back reference to a capturing subpattern earlier
@@ -1560,7 +1725,7 @@ that the first iteration does not need to match the back reference. This can be
 done using alternation, as in the example above, or by a quantifier with a
 minimum of zero.
 </P>
-<LI><A NAME="SEC23" HREF="#TOC1">ASSERTIONS</A>
+<LI><A NAME="SEC24" HREF="#TOC1">ASSERTIONS</A>
 <P>
 An assertion is a test on the characters following or preceding the current
 matching point that does not actually consume any characters. The simple
@@ -1718,7 +1883,7 @@ because it does not make sense for negative assertions.
 <P>
 Assertions count towards the maximum of 200 parenthesized subpatterns.
 </P>
-<LI><A NAME="SEC24" HREF="#TOC1">ONCE-ONLY SUBPATTERNS</A>
+<LI><A NAME="SEC25" HREF="#TOC1">ONCE-ONLY SUBPATTERNS</A>
 <P>
 With both maximizing and minimizing repetition, failure of what follows
 normally causes the repeated item to be re-evaluated to see if a different
@@ -1782,10 +1947,9 @@ pattern such as
 </PRE>
 </P>
 <P>
-when applied to a long string which does not match it. Because matching
-proceeds from left to right, PCRE will look for each "a" in the subject and
-then see if what follows matches the rest of the pattern. If the pattern is
-specified as
+when applied to a long string which does not match. Because matching proceeds
+from left to right, PCRE will look for each "a" in the subject and then see if
+what follows matches the rest of the pattern. If the pattern is specified as
 </P>
 <P>
 <PRE>
@@ -1793,11 +1957,11 @@ specified as
 </PRE>
 </P>
 <P>
-then the initial .* matches the entire string at first, but when this fails, it
-backtracks to match all but the last character, then all but the last two
-characters, and so on. Once again the search for "a" covers the entire string,
-from right to left, so we are no better off. However, if the pattern is written
-as
+then the initial .* matches the entire string at first, but when this fails
+(because there is no following "a"), it backtracks to match all but the last
+character, then all but the last two characters, and so on. Once again the
+search for "a" covers the entire string, from right to left, so we are no
+better off. However, if the pattern is written as
 </P>
 <P>
 <PRE>
@@ -1810,7 +1974,45 @@ string. The subsequent lookbehind assertion does a single test on the last four
 characters. If it fails, the match fails immediately. For long strings, this
 approach makes a significant difference to the processing time.
 </P>
-<LI><A NAME="SEC25" HREF="#TOC1">CONDITIONAL SUBPATTERNS</A>
+<P>
+When a pattern contains an unlimited repeat inside a subpattern that can itself
+be repeated an unlimited number of times, the use of a once-only subpattern is
+the only way to avoid some failing matches taking a very long time indeed.
+The pattern
+</P>
+<P>
+<PRE>
+  (\D+|&#60;\d+&#62;)*[!?]
+</PRE>
+</P>
+<P>
+matches an unlimited number of substrings that either consist of non-digits, or
+digits enclosed in &#60;&#62;, followed by either ! or ?. When it matches, it runs
+quickly. However, if it is applied to
+</P>
+<P>
+<PRE>
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+</PRE>
+</P>
+<P>
+it takes a long time before reporting failure. This is because the string can
+be divided between the two repeats in a large number of ways, and all have to
+be tried. (The example used [!?] rather than a single character at the end,
+because both PCRE and Perl have an optimization that allows for fast failure
+when a single character is used. They remember the last single character that
+is required for a match, and fail early if it is not present in the string.)
+If the pattern is changed to
+</P>
+<P>
+<PRE>
+  ((?&#62;\D+)|&#60;\d+&#62;)*[!?]
+</PRE>
+</P>
+<P>
+sequences of non-digits cannot be broken, and failure happens quickly.
+</P>
+<LI><A NAME="SEC26" HREF="#TOC1">CONDITIONAL SUBPATTERNS</A>
 <P>
 It is possible to cause the matching process to obey a subpattern
 conditionally or to choose between two alternative subpatterns, depending on
@@ -1872,7 +2074,7 @@ subject is matched against the first alternative; otherwise it is matched
 against the second. This pattern matches strings in one of the two forms
 dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
 </P>
-<LI><A NAME="SEC26" HREF="#TOC1">COMMENTS</A>
+<LI><A NAME="SEC27" HREF="#TOC1">COMMENTS</A>
 <P>
 The sequence (?# marks the start of a comment which continues up to the next
 closing parenthesis. Nested parentheses are not permitted. The characters
@@ -1883,7 +2085,87 @@ If the PCRE_EXTENDED option is set, an unescaped # character outside a
 character class introduces a comment that continues up to the next newline
 character in the pattern.
 </P>
-<LI><A NAME="SEC27" HREF="#TOC1">PERFORMANCE</A>
+<LI><A NAME="SEC28" HREF="#TOC1">RECURSIVE PATTERNS</A>
+<P>
+Consider the problem of matching a string in parentheses, allowing for
+unlimited nested parentheses. Without the use of recursion, the best that can
+be done is to use a pattern that matches up to some fixed depth of nesting. It
+is not possible to handle an arbitrary nesting depth. Perl 5.6 has provided an
+experimental facility that allows regular expressions to recurse (amongst other
+things). It does this by interpolating Perl code in the expression at run time,
+and the code can refer to the expression itself. A Perl pattern to solve the
+parentheses problem can be created like this:
+</P>
+<P>
+<PRE>
+  $re = qr{\( (?: (?&#62;[^()]+) | (?p{$re}) )* \)}x;
+</PRE>
+</P>
+<P>
+The (?p{...}) item interpolates Perl code at run time, and in this case refers
+recursively to the pattern in which it appears. Obviously, PCRE cannot support
+the interpolation of Perl code. Instead, the special item (?R) is provided for
+the specific case of recursion. This PCRE pattern solves the parentheses
+problem (assume the PCRE_EXTENDED option is set so that white space is
+ignored):
+</P>
+<P>
+<PRE>
+  \( ( (?&#62;[^()]+) | (?R) )* \)
+</PRE>
+</P>
+<P>
+First it matches an opening parenthesis. Then it matches any number of
+substrings which can either be a sequence of non-parentheses, or a recursive
+match of the pattern itself (i.e. a correctly parenthesized substring). Finally
+there is a closing parenthesis.
+</P>
+<P>
+This particular example pattern contains nested unlimited repeats, and so the
+use of a once-only subpattern for matching strings of non-parentheses is
+important when applying the pattern to strings that do not match. For example,
+when it is applied to
+</P>
+<P>
+<PRE>
+  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+</PRE>
+</P>
+<P>
+it yields "no match" quickly. However, if a once-only subpattern is not used,
+the match runs for a very long time indeed because there are so many different
+ways the + and * repeats can carve up the subject, and all have to be tested
+before failure can be reported.
+</P>
+<P>
+The values set for any capturing subpatterns are those from the outermost level
+of the recursion at which the subpattern value is set. If the pattern above is
+matched against
+</P>
+<P>
+<PRE>
+  (ab(cd)ef)
+</PRE>
+</P>
+<P>
+the value for the capturing parentheses is "ef", which is the last value taken
+on at the top level. If additional parentheses are added, giving
+</P>
+<P>
+<PRE>
+  \( ( ( (?&#62;[^()]+) | (?R) )* ) \)
+     ^                        ^
+     ^                        ^
+</PRE>
+then the string they capture is "ab(cd)ef", the contents of the top level
+parentheses. If there are more than 15 capturing parentheses in a pattern, PCRE
+has to obtain extra memory to store data during a recursion, which it does by
+using <B>pcre_malloc</B>, freeing it via <B>pcre_free</B> afterwards. If no
+memory can be obtained, it saves data for the first 15 capturing parentheses
+only, as there is no way to give an out-of-memory error from within a
+recursion.
+</P>
+<LI><A NAME="SEC29" HREF="#TOC1">PERFORMANCE</A>
 <P>
 Certain items that may appear in patterns are more efficient than others. It is
 more efficient to use a character class like [aeiou] than a set of alternatives
@@ -1959,7 +2241,7 @@ with the pattern above. The former gives a failure almost instantly when
 applied to a whole line of "a" characters, whereas the latter takes an
 appreciable time with strings longer than about 20 characters.
 </P>
-<LI><A NAME="SEC28" HREF="#TOC1">AUTHOR</A>
+<LI><A NAME="SEC30" HREF="#TOC1">AUTHOR</A>
 <P>
 Philip Hazel &#60;ph10@cam.ac.uk&#62;
 <BR>
@@ -1972,6 +2254,6 @@ Cambridge CB2 3QG, England.
 Phone: +44 1223 334714
 </P>
 <P>
-Last updated: 29 July 1999
+Last updated: 27 January 2000
 <BR>
-Copyright (c) 1997-1999 University of Cambridge.
+Copyright (c) 1997-2000 University of Cambridge.
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 2374f7c..f28ee99 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -30,6 +30,9 @@ SYNOPSIS
 
      const unsigned char *pcre_maketables(void);
 
+     int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
+          int what, void *where);
+
      int pcre_info(const pcre *code, int *optptr, *firstcharptr);
 
      char *pcre_version(void);
@@ -46,16 +49,22 @@ DESCRIPTION
      lar  expression  pattern  matching using the same syntax and
      semantics as Perl  5,  with  just  a  few  differences  (see
      below).  The  current  implementation  corresponds  to  Perl
-     5.005.
+     5.005, with some additional features from the Perl  develop-
+     ment release.
 
      PCRE has its own native API,  which  is  described  in  this
      document.  There  is  also  a  set of wrapper functions that
-     correspond to the POSIX API.  These  are  described  in  the
-     pcreposix documentation.
+     correspond to the POSIX regular expression API.   These  are
+     described in the pcreposix documentation.
+
      The native API function prototypes are defined in the header
      file  pcre.h,  and  on  Unix  systems  the library itself is
      called libpcre.a, so can be accessed by adding -lpcre to the
-     command for linking an application which calls it.
+     command  for  linking  an  application  which  calls it. The
+     header file defines the macros PCRE_MAJOR and PCRE_MINOR  to
+     contain the major and minor release numbers for the library.
+     Applications can use these to include support for  different
+     releases.
 
      The functions pcre_compile(), pcre_study(), and  pcre_exec()
      are  used  for  compiling  and matching regular expressions,
@@ -66,10 +75,12 @@ DESCRIPTION
      to build a set of character tables in the current locale for
      passing to pcre_compile().
 
-     The function pcre_info() is used  to  find  out  information
-     about  a compiled pattern, while the function pcre_version()
-     returns a pointer to a string containing the version of PCRE
-     and its date of release.
+     The function pcre_fullinfo() is used to find out information
+     about a compiled pattern; pcre_info() is an obsolete version
+     which returns only some of the available information, but is
+     retained   for   backwards   compatibility.    The  function
+     pcre_version() returns a pointer to a string containing  the
+     version of PCRE and its date of release.
 
      The global variables  pcre_malloc  and  pcre_free  initially
      contain the entry points of the standard malloc() and free()
@@ -92,6 +103,7 @@ MULTI-THREADING
 
 
 
+
 COMPILING A PATTERN
      The function pcre_compile() is called to compile  a  pattern
      into  an internal form. The pattern is a C string terminated
@@ -187,14 +199,16 @@ COMPILING A PATTERN
 
        PCRE_EXTRA
 
-     This option turns on additional functionality of  PCRE  that
-     is  incompatible  with Perl. Any backslash in a pattern that
-     is followed by a letter that has no special  meaning  causes
-     an  error,  thus  reserving  these  combinations  for future
-     expansion. By default, as in Perl, a backslash followed by a
-     letter  with  no  special  meaning  is treated as a literal.
-     There are at present no other features  controlled  by  this
-     option.
+     This option was invented in  order  to  turn  on  additional
+     functionality of PCRE that is incompatible with Perl, but it
+     is currently of very little use. When set, any backslash  in
+     a  pattern  that is followed by a letter that has no special
+     meaning causes an error, thus reserving  these  combinations
+     for  future  expansion.  By default, as in Perl, a backslash
+     followed by a letter with no special meaning is treated as a
+     literal.  There  are at present no other features controlled
+     by this option. It can also be set by a (?X) option  setting
+     within a pattern.
 
        PCRE_MULTILINE
 
@@ -207,9 +221,9 @@ COMPILING A PATTERN
      PCRE_DOLLAR_ENDONLY is set). This is the same as Perl.
 
      When PCRE_MULTILINE it is set, the "start of line" and  "end
-     of   line"   constructs   match   immediately  following  or
-     immediately  before  any  newline  in  the  subject  string,
-     respectively,  as well as at the very start and end. This is
+     of  line"  constructs match immediately following or immedi-
+     ately before any newline  in  the  subject  string,  respec-
+     tively,  as  well  as  at  the  very  start and end. This is
      equivalent to Perl's /m option. If there are no "\n" charac-
      ters  in  a subject string, or no occurrences of ^ or $ in a
      pattern, setting PCRE_MULTILINE has no effect.
@@ -284,27 +298,63 @@ LOCALE SUPPORT
 
 
 INFORMATION ABOUT A PATTERN
-     The pcre_info() function returns information  about  a  com-
-     piled pattern.  Its yield is the number of capturing subpat-
-     terns, or one of the following negative numbers:
+     The pcre_fullinfo() function  returns  information  about  a
+     compiled pattern. It replaces the obsolete pcre_info() func-
+     tion, which is nevertheless retained for backwards compabil-
+     ity (and is documented below).
+
+     The first argument for pcre_fullinfo() is a pointer  to  the
+     compiled  pattern.  The  second  argument  is  the result of
+     pcre_study(), or NULL if the pattern was  not  studied.  The
+     third  argument  specifies  which  piece  of  information is
+     required, while the fourth argument is a pointer to a  vari-
+     able  to receive the data. The yield of the function is zero
+     for success, or one of the following negative numbers:
 
        PCRE_ERROR_NULL       the argument code was NULL
+                             the argument where was NULL
        PCRE_ERROR_BADMAGIC   the "magic number" was not found
+       PCRE_ERROR_BADOPTION  the value of what was invalid
 
-     If the optptr argument is not NULL, a copy  of  the  options
-     with which the pattern was compiled is placed in the integer
-     it points to. These option bits are those specified  in  the
+     The possible values for the third argument  are  defined  in
+     pcre.h, and are as follows:
+
+       PCRE_INFO_OPTIONS
+
+     Return a copy of the options with which the pattern was com-
+     piled.  The fourth argument should point to au unsigned long
+     int variable. These option bits are those specified  in  the
      call  to  pcre_compile(),  modified  by any top-level option
      settings  within  the   pattern   itself,   and   with   the
-     PCRE_ANCHORED  bit  set  if  the form of the pattern implies
-     that it can match only at the start of a subject string.
+     PCRE_ANCHORED  bit  forcibly  set if the form of the pattern
+     implies that it can match only at the  start  of  a  subject
+     string.
 
-     If the pattern is not anchored and the firstcharptr argument
-     is  not  NULL, it is used to pass back information about the
-     first character of any matched string. If there is  a  fixed
-     first    character,    e.g.   from   a   pattern   such   as
+       PCRE_INFO_SIZE
+
+     Return the size of the compiled pattern, that is, the  value
+     that  was  passed as the argument to pcre_malloc() when PCRE
+     was getting memory in which to place the compiled data.  The
+     fourth argument should point to a size_t variable.
+
+       PCRE_INFO_CAPTURECOUNT
+
+     Return the number of capturing subpatterns in  the  pattern.
+     The fourth argument should point to an int variable.
+
+       PCRE_INFO_BACKREFMAX
+
+     Return the number of the highest back reference in the  pat-
+     tern.  The  fourth argument should point to an int variable.
+     Zero is returned if there are no back references.
+
+       PCRE_INFO_FIRSTCHAR
+
+     Return information about the first character of any  matched
+     string,  for  a  non-anchored  pattern.  If there is a fixed
+     first   character,   e.g.   from   a   pattern    such    as
      (cat|cow|coyote), then it is returned in the integer pointed
-     to by firstcharptr. Otherwise, if either
+     to by where. Otherwise, if either
 
      (a) the pattern was compiled with the PCRE_MULTILINE option,
      and every branch starts with "^", or
@@ -312,9 +362,48 @@ INFORMATION ABOUT A PATTERN
      (b) every  branch  of  the  pattern  starts  with  ".*"  and
      PCRE_DOTALL is not set (if it were set, the pattern would be
      anchored),
+
      then -1 is returned, indicating  that  the  pattern  matches
      only  at  the  start  of  a subject string or after any "\n"
-     within the string. Otherwise -2 is returned.
+     within the string. Otherwise -2 is  returned.  For  anchored
+     patterns, -2 is returned.
+
+       PCRE_INFO_FIRSTTABLE
+
+     If the pattern was studied, and this resulted  in  the  con-
+     struction of a 256-bit table indicating a fixed set of char-
+     acters for the first character in  any  matching  string,  a
+     pointer   to  the  table  is  returned.  Otherwise  NULL  is
+     returned. The fourth argument should point  to  an  unsigned
+     char * variable.
+
+       PCRE_INFO_LASTLITERAL
+
+     For a non-anchored pattern, return the value of  the  right-
+     most  literal  character  which  must  exist  in any matched
+     string, other than at its start. The fourth argument  should
+     point  to an int variable. If there is no such character, or
+     if the pattern is anchored, -1 is returned. For example, for
+     the pattern /a\d+z\d+/ the returned value is 'z'.
+
+     The pcre_info() function is now obsolete because its  inter-
+     face  is  too  restrictive  to return all the available data
+     about  a  compiled  pattern.   New   programs   should   use
+     pcre_fullinfo()  instead.  The  yield  of pcre_info() is the
+     number of capturing subpatterns, or  one  of  the  following
+     negative numbers:
+
+       PCRE_ERROR_NULL       the argument code was NULL
+       PCRE_ERROR_BADMAGIC   the "magic number" was not found
+
+     If the optptr argument is not NULL, a copy  of  the  options
+     with which the pattern was compiled is placed in the integer
+     it points to (see PCRE_INFO_OPTIONS above).
+
+     If the pattern is not anchored and the firstcharptr argument
+     is  not  NULL, it is used to pass back information about the
+     first    character    of    any    matched    string    (see
+     PCRE_INFO_FIRSTCHAR above).
 
 
 
@@ -640,9 +729,10 @@ DIFFERENCES FROM PERL
      6. The Perl \G assertion is  not  supported  as  it  is  not
      relevant to single pattern matches.
 
-     7. Fairly obviously, PCRE does  not  support  the  (?{code})
-     construction.
-
+     7. Fairly obviously, PCRE does not support the (?{code}) and
+     (?p{code})  constructions. However, there is some experimen-
+     tal support for recursive patterns using the  non-Perl  item
+     (?R).
      8. There are at the time of writing some  oddities  in  Perl
      5.005_02  concerned  with  the  settings of captured strings
      when part of a pattern is repeated.  For  example,  matching
@@ -675,9 +765,9 @@ DIFFERENCES FROM PERL
      (c) If PCRE_EXTRA is set, a backslash followed by  a  letter
      with no special meaning is faulted.
 
-     (d)  If  PCRE_UNGREEDY  is  set,  the  greediness   of   the
-     repetition quantifiers is inverted, that is, by default they
-     are not greedy, but if followed by a question mark they are.
+     (d) If PCRE_UNGREEDY is set, the greediness of  the  repeti-
+     tion  quantifiers  is inverted, that is, by default they are
+     not greedy, but if followed by a question mark they are.
 
      (e) PCRE_ANCHORED can be used to force a pattern to be tried
      only at the start of the subject.
@@ -685,15 +775,20 @@ DIFFERENCES FROM PERL
      (f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY  options
      for pcre_exec() have no Perl equivalents.
 
+     (g) The (?R) construct allows for recursive pattern matching
+     (Perl  5.6 can do this using the (?p{code}) construct, which
+     PCRE cannot of course support.)
+
 
 
 REGULAR EXPRESSION DETAILS
      The syntax and semantics of  the  regular  expressions  sup-
      ported  by PCRE are described below. Regular expressions are
      also described in the Perl documentation and in a number  of
+
      other  books,  some  of which have copious examples. Jeffrey
      Friedl's  "Mastering  Regular  Expressions",  published   by
-     O'Reilly  (ISBN 1-56592-257-3), covers them in great detail.
+     O'Reilly  (ISBN  1-56592-257),  covers them in great detail.
      The description here is intended as reference documentation.
 
      A regular expression is a pattern that is matched against  a
@@ -780,8 +875,7 @@ BACKSLASH
        \f     formfeed (hex 0C)
        \n     newline (hex 0A)
        \r     carriage return (hex 0D)
-
-            tab (hex 09)
+       \t     tab (hex 09)
        \xhh   character with hex code hh
        \ddd   character with octal code ddd, or backreference
 
@@ -833,6 +927,7 @@ BACKSLASH
      Note that octal values of 100 or greater must not be  intro-
      duced  by  a  leading zero, because no more than three octal
      digits are ever read.
+
      All the sequences that define a single  byte  value  can  be
      used both inside and outside character classes. In addition,
      inside a character class, the sequence "\b"  is  interpreted
@@ -885,6 +980,7 @@ BACKSLASH
      These assertions may not appear in  character  classes  (but
      note that "\b" has a different meaning, namely the backspace
      character, inside a character class).
+
      A word boundary is a position in the  subject  string  where
      the current character and the previous character do not both
      match \w or \W (i.e. one matches \w and  the  other  matches
@@ -1046,6 +1142,44 @@ SQUARE BRACKETS
 
 
 
+POSIX CHARACTER CLASSES
+     Perl 5.6 (not yet released at the time of writing) is  going
+     to  support  the POSIX notation for character classes, which
+     uses names enclosed by  [:  and  :]   within  the  enclosing
+     square brackets. PCRE supports this notation. For example,
+
+       [01[:alpha:]%]
+
+     matches "0", "1", any alphabetic character, or "%". The sup-
+     ported class names are
+
+       alnum    letters and digits
+       alpha    letters
+       ascii    character codes 0 - 127
+       cntrl    control characters
+       digit    decimal digits (same as \d)
+       graph    printing characters, excluding space
+       lower    lower case letters
+       print    printing characters, including space
+       punct    printing characters, excluding letters and digits
+       space    white space (same as \s)
+       upper    upper case letters
+       word     "word" characters (same as \w)
+       xdigit   hexadecimal digits
+
+     The names "ascii" and "word" are  Perl  extensions.  Another
+     Perl  extension is negation, which is indicated by a ^ char-
+     acter after the colon. For example,
+
+       [12[:^digit:]]
+
+     matches "1", "2", or any non-digit.  PCRE  (and  Perl)  also
+     recogize  the POSIX syntax [.ch.] and [=ch=] where "ch" is a
+     "collating element", but these are  not  supported,  and  an
+     error is given if they are encountered.
+
+
+
 VERTICAL BAR
      Vertical bar characters are  used  to  separate  alternative
      patterns. For example, the pattern
@@ -1197,7 +1331,6 @@ REPETITION
      Repetition is specified by quantifiers, which can follow any
      of the following items:
 
-
        a single character, possibly escaped
        the . metacharacter
        a character class
@@ -1384,8 +1517,8 @@ BACK REFERENCES
      A back reference that occurs inside the parentheses to which
      it  refers  fails when the subpattern is first used, so, for
      example, (a\1) never matches.  However, such references  can
-     be useful inside repeated subpatterns. For example, the pat-
-     tern
+     be  useful  inside  repeated  subpatterns.  For example, the
+     pattern
 
        (a|b\1)+
 
@@ -1407,6 +1540,7 @@ ASSERTIONS
      cated assertions are coded as  subpatterns.  There  are  two
      kinds:  those that look ahead of the current position in the
      subject string, and those that look behind it.
+
      An assertion subpattern is matched in the normal way, except
      that  it  does not cause the current matching position to be
      changed. Lookahead assertions start with  (?=  for  positive
@@ -1572,20 +1706,19 @@ ONCE-ONLY SUBPATTERNS
 
        abcd$
 
-     when applied to a long  string  which  does  not  match  it.
-     Because matching proceeds from left to right, PCRE will look
-     for each "a" in the subject and then  see  if  what  follows
-     matches the rest of the pattern. If the pattern is specified
-     as
+     when applied to a long string which does not match.  Because
+     matching  proceeds  from  left  to right, PCRE will look for
+     each "a" in the subject and then see if what follows matches
+     the rest of the pattern. If the pattern is specified as
 
        ^.*abcd$
 
      then the initial .* matches the entire string at first,  but
-     when  this  fails,  it  backtracks to match all but the last
-     character, then all but the last two characters, and so  on.
-     Once again the search for "a" covers the entire string, from
-     right to left, so we are no better off. However, if the pat-
-     tern is written as
+     when  this  fails  (because  there  is no following "a"), it
+     backtracks to match all but the last character, then all but
+     the  last  two  characters, and so on. Once again the search
+     for "a" covers the entire string, from right to left, so  we
+     are no better off. However, if the pattern is written as
 
        ^(?>.*)(?<=abcd)
 
@@ -1596,6 +1729,36 @@ ONCE-ONLY SUBPATTERNS
      this approach makes a significant difference to the process-
      ing time.
 
+     When a pattern contains an unlimited repeat inside a subpat-
+     tern  that  can  itself  be  repeated an unlimited number of
+     times, the use of a once-only subpattern is the only way  to
+     avoid  some  failing matches taking a very long time indeed.
+     The pattern
+
+       (\D+|<\d+>)*[!?]
+
+     matches an unlimited number of substrings that  either  con-
+     sist  of  non-digits,  or digits enclosed in <>, followed by
+     either ! or ?. When it matches, it runs quickly. However, if
+     it is applied to
+
+       aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+     it takes a long  time  before  reporting  failure.  This  is
+     because the string can be divided between the two repeats in
+     a large number of ways, and all have to be tried. (The exam-
+     ple  used  [!?]  rather  than a single character at the end,
+     because both PCRE and Perl have an optimization that  allows
+     for  fast  failure  when  a  single  character is used. They
+     remember the last single character that is  required  for  a
+     match,  and  fail early if it is not present in the string.)
+     If the pattern is changed to
+
+       ((?>\D+)|<\d+>)*[!?]
+
+     sequences of non-digits cannot be broken, and  failure  hap-
+     pens quickly.
+
 
 
 CONDITIONAL SUBPATTERNS
@@ -1668,6 +1831,75 @@ COMMENTS
 
 
 
+RECURSIVE PATTERNS
+     Consider the problem of matching a  string  in  parentheses,
+     allowing  for  unlimited nested parentheses. Without the use
+     of recursion, the best that can be done is to use a  pattern
+     that  matches  up  to some fixed depth of nesting. It is not
+     possible to handle an arbitrary nesting depth. Perl 5.6  has
+     provided   an  experimental  facility  that  allows  regular
+     expressions to recurse (amongst other things). It does  this
+     by  interpolating  Perl  code in the expression at run time,
+     and the code can refer to the expression itself. A Perl pat-
+     tern  to  solve  the parentheses problem can be created like
+     this:
+
+       $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;
+
+     The (?p{...}) item interpolates Perl code at run  time,  and
+     in  this  case refers recursively to the pattern in which it
+     appears. Obviously, PCRE cannot support the interpolation of
+     Perl  code.  Instead,  the special item (?R) is provided for
+     the specific case of recursion. This PCRE pattern solves the
+     parentheses  problem (assume the PCRE_EXTENDED option is set
+     so that white space is ignored):
+
+       \( ( (?>[^()]+) | (?R) )* \)
+
+     First it matches an opening parenthesis. Then it matches any
+     number  of substrings which can either be a sequence of non-
+     parentheses, or a recursive  match  of  the  pattern  itself
+     (i.e. a correctly parenthesized substring). Finally there is
+     a closing parenthesis.
+
+     This particular example pattern  contains  nested  unlimited
+     repeats, and so the use of a once-only subpattern for match-
+     ing strings of non-parentheses is  important  when  applying
+     the  pattern to strings that do not match. For example, when
+     it is applied to
+
+       (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+
+     it yields "no match" quickly. However, if a  once-only  sub-
+     pattern  is  not  used,  the match runs for a very long time
+     indeed because there are so many different ways the + and  *
+     repeats  can carve up the subject, and all have to be tested
+     before failure can be reported.
+
+     The values set for any capturing subpatterns are those  from
+     the outermost level of the recursion at which the subpattern
+     value is set. If the pattern above is matched against
+
+       (ab(cd)ef)
+
+     the value for the capturing parentheses is  "ef",  which  is
+     the  last  value  taken  on  at the top level. If additional
+     parentheses are added, giving
+
+       \( ( ( (?>[^()]+) | (?R) )* ) \)
+          ^                        ^
+          ^                        ^ then the string they capture
+     is "ab(cd)ef", the contents of the top level parentheses. If
+     there are more than 15 capturing parentheses in  a  pattern,
+     PCRE  has  to  obtain  extra  memory  to store data during a
+     recursion, which it does by using  pcre_malloc,  freeing  it
+     via  pcre_free  afterwards. If no memory can be obtained, it
+     saves data for the first 15 capturing parentheses  only,  as
+     there is no way to give an out-of-memory error from within a
+     recursion.
+
+
+
 PERFORMANCE
      Certain items that may appear in patterns are more efficient
      than  others.  It is more efficient to use a character class
@@ -1742,5 +1974,5 @@ AUTHOR
      Cambridge CB2 3QG, England.
      Phone: +44 1223 334714
 
-     Last updated: 29 July 1999
-     Copyright (c) 1997-1999 University of Cambridge.
+     Last updated: 27 January 2000
+     Copyright (c) 1997-2000 University of Cambridge.
diff --git a/doc/pcreposix.3 b/doc/pcreposix.3
index 0a40369..1be5d9a 100644
--- a/doc/pcreposix.3
+++ b/doc/pcreposix.3
@@ -36,11 +36,11 @@ can be accessed by adding \fB-lpcreposix\fR to the command for linking an
 application which uses them. Because the POSIX functions call the native ones,
 it is also necessary to add \fR-lpcre\fR.
 
-As I am pretty ignorant about POSIX, these functions must be considered as
-experimental. I have implemented only those option bits that can be reasonably
-mapped to PCRE native options. Other POSIX options are not even defined. It may
-be that it is useful to define, but ignore, other options. Feedback from more
-knowledgeable folk may cause this kind of detail to change.
+I have implemented only those option bits that can be reasonably mapped to PCRE
+native options. In addition, the options REG_EXTENDED and REG_NOSUB are defined
+with the value zero. They have no effect, but since programs that are written
+to the POSIX interface often use them, this makes it easier to slot in PCRE as
+a replacement library. Other POSIX options are not even defined.
 
 When PCRE is called via these functions, it is only the API that is POSIX-like
 in style. The syntax and semantics of the regular expressions themselves are
diff --git a/doc/pcreposix.html b/doc/pcreposix.html
index 2c764b6..121d90f 100644
--- a/doc/pcreposix.html
+++ b/doc/pcreposix.html
@@ -55,11 +55,11 @@ application which uses them. Because the POSIX functions call the native ones,
 it is also necessary to add \fR-lpcre\fR.
 </P>
 <P>
-As I am pretty ignorant about POSIX, these functions must be considered as
-experimental. I have implemented only those option bits that can be reasonably
-mapped to PCRE native options. Other POSIX options are not even defined. It may
-be that it is useful to define, but ignore, other options. Feedback from more
-knowledgeable folk may cause this kind of detail to change.
+I have implemented only those option bits that can be reasonably mapped to PCRE
+native options. In addition, the options REG_EXTENDED and REG_NOSUB are defined
+with the value zero. They have no effect, but since programs that are written
+to the POSIX interface often use them, this makes it easier to slot in PCRE as
+a replacement library. Other POSIX options are not even defined.
 </P>
 <P>
 When PCRE is called via these functions, it is only the API that is POSIX-like
diff --git a/doc/pcreposix.txt b/doc/pcreposix.txt
index c85fb84..4a7036f 100644
--- a/doc/pcreposix.txt
+++ b/doc/pcreposix.txt
@@ -34,13 +34,13 @@ DESCRIPTION
      which uses them. Because the POSIX functions call the native
      ones, it is also necessary to add -lpcre.
 
-     As I am pretty ignorant about POSIX, these functions must be
-     considered  as  experimental.  I have implemented only those
-     option bits that can be reasonably  mapped  to  PCRE  native
-     options. Other POSIX options are not even defined. It may be
-     that it is useful to  define,  but  ignore,  other  options.
-     Feedback from more knowledgeable folk may cause this kind of
-     detail to change.
+     I have implemented only those option bits that can  be  rea-
+     sonably  mapped  to  PCRE  native  options. In addition, the
+     options REG_EXTENDED and  REG_NOSUB  are  defined  with  the
+     value zero. They have no effect, but since programs that are
+     written to the POSIX interface often use them, this makes it
+     easier to slot in PCRE as a replacement library. Other POSIX
+     options are not even defined.
 
      When PCRE is called via these functions, it is only the  API
      that is POSIX-like in style. The syntax and semantics of the
diff --git a/doc/pcretest.txt b/doc/pcretest.txt
index 29e2f5c..831fdac 100644
--- a/doc/pcretest.txt
+++ b/doc/pcretest.txt
@@ -7,20 +7,23 @@ experimenting with regular expressions.
 If it is given two filename arguments, it reads from the first and writes to
 the second. If it is given only one filename argument, it reads from that file
 and writes to stdout. Otherwise, it reads from stdin and writes to stdout, and
-prompts for each line of input.
+prompts for each line of input, using "re>" to prompt for regular expressions,
+and "data>" to prompt for data lines.
 
 The program handles any number of sets of input on a single input file. Each
 set starts with a regular expression, and continues with any number of data
 lines to be matched against the pattern. An empty line signals the end of the
-set. The regular expressions are given enclosed in any non-alphameric
-delimiters other than backslash, for example
+data lines, at which point a new regular expression is read. The regular
+expressions are given enclosed in any non-alphameric delimiters other than
+backslash, for example
 
   /(a|bc)x+yz/
 
 White space before the initial delimiter is ignored. A regular expression may
 be continued over several input lines, in which case the newline characters are
-included within it. See the testinput files for many examples. It is possible
-to include the delimiter within the pattern by escaping it, for example
+included within it. See the test input files in the testdata directory for many
+examples. It is possible to include the delimiter within the pattern by
+escaping it, for example
 
   /abc\/def/
 
@@ -85,9 +88,9 @@ is, /L applies only to the expression on which it appears.
 
 The /I modifier requests that pcretest output information about the compiled
 expression (whether it is anchored, has a fixed first character, and so on). It
-does this by calling pcre_info() after compiling an expression, and outputting
-the information it gets back. If the pattern is studied, the results of that
-are also output.
+does this by calling pcre_fullinfo() after compiling an expression, and
+outputting the information it gets back. If the pattern is studied, the results
+of that are also output.
 
 The /D modifier is a PCRE debugging feature, which also assumes /I. It causes
 the internal form of compiled regular expressions to be output after
diff --git a/internal.h b/internal.h
index 5c782ac..91ff301 100644
--- a/internal.h
+++ b/internal.h
@@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -40,9 +40,9 @@ modules, but which are not relevant to the outside. */
 #include "config.h"
 
 /* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
-define a macro for memmove() if HAVE_MEMMOVE is not defined. */
+define a macro for memmove() if HAVE_MEMMOVE is false. */
 
-#ifndef HAVE_MEMMOVE
+#if ! HAVE_MEMMOVE
 #undef  memmove        /* some systems may have a macro */
 #define memmove(a, b, c) bcopy(b, a, c)
 #endif
@@ -188,6 +188,7 @@ enum {
 
   OP_CLASS,          /* Match a character class */
   OP_REF,            /* Match a back reference */
+  OP_RECURSE,        /* Match this pattern recursively */
 
   OP_ALT,            /* Start of alternation */
   OP_KET,            /* End of group that doesn't have an unbounded repeat */
@@ -254,6 +255,9 @@ just to accommodate the POSIX wrapper. */
 #define ERR26 "malformed number after (?("
 #define ERR27 "conditional group contains more than two branches"
 #define ERR28 "assertion expected after (?("
+#define ERR29 "(?p must be followed by )"
+#define ERR30 "unknown POSIX class name"
+#define ERR31 "POSIX collating elements are not supported"
 
 /* All character handling must be done as unsigned characters. Otherwise there
 are problems with top-bit-set characters and functions such as isspace().
@@ -269,6 +273,7 @@ runs on as long as necessary after the end. */
 
 typedef struct real_pcre {
   unsigned long int magic_number;
+  size_t size;
   const unsigned char *tables;
   unsigned long int options;
   uschar top_bracket;
@@ -311,11 +316,12 @@ typedef struct match_data {
   BOOL   noteol;                /* NOTEOL flag */
   BOOL   endonly;               /* Dollar not before final \n */
   BOOL   notempty;              /* Empty string match not wanted */
+  const uschar *start_pattern;  /* For use when recursing */
   const uschar *start_subject;  /* Start of the subject string */
   const uschar *end_subject;    /* End of the subject string */
   const uschar *start_match;    /* Start of this match attempt */
   const uschar *end_match_ptr;  /* Subject position at end match */
-  int     end_offset_top;       /* Highwater mark at end of match */
+  int    end_offset_top;        /* Highwater mark at end of match */
 } match_data;
 
 /* Bit definitions for entries in the pcre_ctypes table. */
@@ -328,12 +334,19 @@ typedef struct match_data {
 #define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
 
 /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
-of bits for a class map. */
-
-#define cbit_digit    0      /* for \d */
-#define cbit_word    32      /* for \w */
-#define cbit_space   64      /* for \s */
-#define cbit_length  96      /* Length of the cbits table */
+of bits for a class map. Some classes are built by combining these tables. */
+
+#define cbit_space     0      /* [:space:] or \s */
+#define cbit_xdigit   32      /* [:xdigit:] */
+#define cbit_digit    64      /* [:digit:] or \d */
+#define cbit_upper    96      /* [:upper:] */
+#define cbit_lower   128      /* [:lower:] */
+#define cbit_word    160      /* [:word:] or \w */
+#define cbit_graph   192      /* [:graph:] */
+#define cbit_print   224      /* [:print:] */
+#define cbit_punct   256      /* [:punct:] */
+#define cbit_cntrl   288      /* [:cntrl:] */
+#define cbit_length  320      /* Length of the cbits table */
 
 /* Offsets of the various tables from the base tables pointer, and
 total length. */
diff --git a/maketables.c b/maketables.c
index eb5fcd1..c0f06c0 100644
--- a/maketables.c
+++ b/maketables.c
@@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -81,15 +81,34 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
 
 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
 
-/* Then the character class tables */
+/* Then the character class tables. Don't try to be clever and save effort
+on exclusive ones - in some locales things may be different. */
 
 memset(p, 0, cbit_length);
 for (i = 0; i < 256; i++)
   {
-  if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7);
-  if (isalnum(i) || i == '_')
-                  p[cbit_word   + i/8] |= 1 << (i&7);
+  if (isdigit(i))
+    {
+    p[cbit_digit  + i/8] |= 1 << (i&7);
+    p[cbit_word   + i/8] |= 1 << (i&7);
+    }
+  if (isupper(i))
+    {
+    p[cbit_upper  + i/8] |= 1 << (i&7);
+    p[cbit_word   + i/8] |= 1 << (i&7);
+    }
+  if (islower(i))
+    {
+    p[cbit_lower  + i/8] |= 1 << (i&7);
+    p[cbit_word   + i/8] |= 1 << (i&7);
+    }
+  if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
   if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
+  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
+  if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
+  if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
+  if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
+  if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
   }
 p += cbit_length;
 
diff --git a/pcre-config.in b/pcre-config.in
new file mode 100644
index 0000000..8daded9
--- /dev/null
+++ b/pcre-config.in
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+exec_prefix_set=no
+
+usage="\
+Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]"
+
+if test $# -eq 0; then
+      echo "${usage}" 1>&2
+      exit 1
+fi
+
+while test $# -gt 0; do
+  case "$1" in
+  -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+  *) optarg= ;;
+  esac
+
+  case $1 in
+    --prefix=*)
+      prefix=$optarg
+      if test $exec_prefix_set = no ; then
+        exec_prefix=$optarg
+      fi
+      ;;
+    --prefix)
+      echo $prefix
+      ;;
+    --exec-prefix=*)
+      exec_prefix=$optarg
+      exec_prefix_set=yes
+      ;;
+    --exec-prefix)
+      echo $exec_prefix
+      ;;
+    --version)
+      echo @PCRE_VERSION@
+      ;;
+    --cflags | --cflags-posix)
+      if test @includedir@ != /usr/include ; then
+        includes=-I@includedir@
+      fi
+      echo $includes
+      ;;
+    --libs-posix)
+      echo -L@libdir@ -lpcreposix -lpcre
+      ;;
+    --libs)
+      echo -L@libdir@ -lpcre
+      ;;
+    *)
+      echo "${usage}" 1>&2
+      exit 1
+      ;;
+  esac
+  shift
+done
diff --git a/pcre.c b/pcre.c
index 6735b82..e45dee8 100644
--- a/pcre.c
+++ b/pcre.c
@@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -82,7 +82,7 @@ static const char *OP_names[] = {
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
   "*", "*?", "+", "+?", "?", "??", "{", "{",
-  "class", "Ref",
+  "class", "Ref", "Recurse",
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
   "Brazero", "Braminzero", "Bra"
@@ -107,6 +107,38 @@ static const short int escapes[] = {
     0,      0, -ESC_z                                            /* x - z */
 };
 
+/* Tables of names of POSIX character classes and their lengths. The list is
+terminated by a zero length entry. The first three must be alpha, upper, lower,
+as this is assumed for handling case independence. */
+
+static const char *posix_names[] = {
+  "alpha", "lower", "upper",
+  "alnum", "ascii", "cntrl", "digit", "graph",
+  "print", "punct", "space", "word",  "xdigit" };
+
+static const uschar posix_name_lengths[] = {
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
+
+/* Table of class bit maps for each POSIX class; up to three may be combined
+to form the class. */
+
+static const int posix_class_maps[] = {
+  cbit_lower, cbit_upper, -1,             /* alpha */
+  cbit_lower, -1,         -1,             /* lower */
+  cbit_upper, -1,         -1,             /* upper */
+  cbit_digit, cbit_lower, cbit_upper,     /* alnum */
+  cbit_print, cbit_cntrl, -1,             /* ascii */
+  cbit_cntrl, -1,         -1,             /* cntrl */
+  cbit_digit, -1,         -1,             /* digit */
+  cbit_graph, -1,         -1,             /* graph */
+  cbit_print, -1,         -1,             /* print */
+  cbit_punct, -1,         -1,             /* punct */
+  cbit_space, -1,         -1,             /* space */
+  cbit_word,  -1,         -1,             /* word */
+  cbit_xdigit,-1,         -1              /* xdigit */
+};
+
+
 /* Definition to allow mutual recursion */
 
 static BOOL
@@ -161,12 +193,13 @@ return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
 
 
 /*************************************************
-*       Return info about a compiled pattern     *
+* (Obsolete) Return info about compiled pattern  *
 *************************************************/
 
-/* This function picks potentially useful data out of the private
-structure. The public options are passed back in an int - though the
-re->options field has been expanded to a long int, all the public options
+/* This is the original "info" function. It picks potentially useful data out
+of the private structure, but its interface was too rigid. It remains for
+backwards compatibility. The public options are passed back in an int - though
+the re->options field has been expanded to a long int, all the public options
 at the low end of it, and so even on 16-bit systems this will still be OK.
 Therefore, I haven't changed the API for pcre_info().
 
@@ -177,7 +210,7 @@ Arguments:
                 or -1 if multiline and all branches start ^,
                 or -2 otherwise
 
-Returns:        number of identifying extraction brackets
+Returns:        number of capturing subpatterns
                 or negative values on error
 */
 
@@ -196,6 +229,74 @@ return re->top_bracket;
 
 
 
+/*************************************************
+*        Return info about compiled pattern      *
+*************************************************/
+
+/* This is a newer "info" function which has an extensible interface so
+that additional items can be added compatibly.
+
+Arguments:
+  external_re      points to compiled code
+  external_study   points to study data, or NULL
+  what             what information is required
+  where            where to put the information
+
+Returns:           0 if data returned, negative on error
+*/
+
+int
+pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
+  void *where)
+{
+const real_pcre *re = (const real_pcre *)external_re;
+const real_pcre_extra *study = (const real_pcre_extra *)study_data;
+
+if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
+if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
+
+switch (what)
+  {
+  case PCRE_INFO_OPTIONS:
+  *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
+  break;
+
+  case PCRE_INFO_SIZE:
+  *((size_t *)where) = re->size;
+  break;
+
+  case PCRE_INFO_CAPTURECOUNT:
+  *((int *)where) = re->top_bracket;
+  break;
+
+  case PCRE_INFO_BACKREFMAX:
+  *((int *)where) = re->top_backref;
+  break;
+
+  case PCRE_INFO_FIRSTCHAR:
+  *((int *)where) =
+    ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
+    ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
+  break;
+
+  case PCRE_INFO_FIRSTTABLE:
+  *((const uschar **)where) =
+    (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
+      study->start_bits : NULL;
+  break;
+
+  case PCRE_INFO_LASTLITERAL:
+  *((int *)where) =
+    ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
+  break;
+
+  default: return PCRE_ERROR_BADOPTION;
+  }
+
+return 0;
+}
+
+
 
 #ifdef DEBUG
 /*************************************************
@@ -255,9 +356,9 @@ check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
   int options, BOOL isclass, compile_data *cd)
 {
 const uschar *ptr = *ptrptr;
-int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
-int i;
+int c, i;
 
+c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
 if (c == 0) *errorptr = ERR1;
 
 /* Digits or letters may have special meaning; all others are literals. */
@@ -622,6 +723,71 @@ for (;;)
 
 
 /*************************************************
+*           Check for POSIX class syntax         *
+*************************************************/
+
+/* This function is called when the sequence "[:" or "[." or "[=" is
+encountered in a character class. It checks whether this is followed by an
+optional ^ and then a sequence of letters, terminated by a matching ":]" or
+".]" or "=]".
+
+Argument:
+  ptr      pointer to the initial [
+  endptr   where to return the end pointer
+  cd       pointer to compile data
+
+Returns:   TRUE or FALSE
+*/
+
+static BOOL
+check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
+{
+int terminator;          /* Don't combine these lines; the Solaris cc */
+terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
+if (*(++ptr) == '^') ptr++;
+while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
+if (*ptr == terminator && ptr[1] == ']')
+  {
+  *endptr = ptr;
+  return TRUE;
+  }
+return FALSE;
+}
+
+
+
+
+/*************************************************
+*          Check POSIX class name                *
+*************************************************/
+
+/* This function is called to check the name given in a POSIX-style class entry
+such as [:alnum:].
+
+Arguments:
+  ptr        points to the first letter
+  len        the length of the name
+
+Returns:     a value representing the name, or -1 if unknown
+*/
+
+static int
+check_posix_name(const uschar *ptr, int len)
+{
+register int yield = 0;
+while (posix_name_lengths[yield] != 0)
+  {
+  if (len == posix_name_lengths[yield] &&
+    strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
+  yield++;
+  }
+return -1;
+}
+
+
+
+
+/*************************************************
 *           Compile one branch                   *
 *************************************************/
 
@@ -764,6 +930,66 @@ for (;; ptr++)
         goto FAILED;
         }
 
+      /* Handle POSIX class names. Perl allows a negation extension of the
+      form [:^name]. A square bracket that doesn't match the syntax is
+      treated as a literal. We also recognize the POSIX constructions
+      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
+      5.6 does. */
+
+      if (c == '[' &&
+          (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
+          check_posix_syntax(ptr, &tempptr, cd))
+        {
+        BOOL local_negate = FALSE;
+        int posix_class, i;
+        register const uschar *cbits = cd->cbits;
+
+        if (ptr[1] != ':')
+          {
+          *errorptr = ERR31;
+          goto FAILED;
+          }
+
+        ptr += 2;
+        if (*ptr == '^')
+          {
+          local_negate = TRUE;
+          ptr++;
+          }
+
+        posix_class = check_posix_name(ptr, tempptr - ptr);
+        if (posix_class < 0)
+          {
+          *errorptr = ERR30;
+          goto FAILED;
+          }
+
+        /* If matching is caseless, upper and lower are converted to
+        alpha. This relies on the fact that the class table starts with
+        alpha, lower, upper as the first 3 entries. */
+
+        if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
+          posix_class = 0;
+
+        /* Or into the map we are building up to 3 of the static class
+        tables, or their negations. */
+
+        posix_class *= 3;
+        for (i = 0; i < 3; i++)
+          {
+          int taboffset = posix_class_maps[posix_class + i];
+          if (taboffset < 0) break;
+          if (local_negate)
+            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
+          else
+            for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
+          }
+
+        ptr = tempptr + 1;
+        class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
+        continue;
+        }
+
       /* Backslash may introduce a single character, or it may introduce one
       of the specials, which just set a flag. Escaped items are checked for
       validity in the pre-compiling pass. The sequence \b is a special case.
@@ -791,13 +1017,11 @@ for (;; ptr++)
             continue;
 
             case ESC_w:
-            for (c = 0; c < 32; c++)
-              class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
+            for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
             continue;
 
             case ESC_W:
-            for (c = 0; c < 32; c++)
-              class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
+            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
             continue;
 
             case ESC_s:
@@ -1360,6 +1584,11 @@ for (;; ptr++)
         ptr++;
         break;
 
+        case 'R':                 /* Pattern recursion */
+        *code++ = OP_RECURSE;
+        ptr++;
+        continue;
+
         default:                  /* Option setting */
         set = unset = 0;
         optset = &set;
@@ -2015,12 +2244,13 @@ pcre_compile(const char *pattern, int options, const char **errorptr,
 real_pcre *re;
 int length = 3;      /* For initial BRA plus length */
 int runlength;
-int c, size, reqchar, countlits;
+int c, reqchar, countlits;
 int bracount = 0;
 int top_backref = 0;
 int branch_extra = 0;
 int branch_newextra;
 unsigned int brastackptr = 0;
+size_t size;
 uschar *code;
 const uschar *ptr;
 compile_data compile_block;
@@ -2248,6 +2478,19 @@ while ((c = *(++ptr)) != 0)
         ptr += 2;
         break;
 
+        /* A recursive call to the regex is an extension, to provide the
+        facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
+
+        case 'R':
+        if (ptr[3] != ')')
+          {
+          *errorptr = ERR29;
+          goto PCRE_ERROR_RETURN;
+          }
+        ptr += 3;
+        length += 1;
+        break;
+
         /* Lookbehinds are in Perl from version 5.005 */
 
         case '<':
@@ -2550,9 +2793,10 @@ if (re == NULL)
   return NULL;
   }
 
-/* Put in the magic number and the options. */
+/* Put in the magic number, and save the size, options, and table pointer */
 
 re->magic_number = MAGIC_NUMBER;
+re->size = size;
 re->options = options;
 re->tables = tables;
 
@@ -3147,6 +3391,53 @@ for (;;)
     ecode += 3;
     break;
 
+    /* Recursion matches the current regex, nested. If there are any capturing
+    brackets started but not finished, we have to save their starting points
+    and reinstate them after the recursion. However, we don't know how many
+    such there are (offset_top records the completed total) so we just have
+    to save all the potential data. There may be up to 99 such values, which
+    is a bit large to put on the stack, but using malloc for small numbers
+    seems expensive. As a compromise, the stack is used when there are fewer
+    than 16 values to store; otherwise malloc is used. A problem is what to do
+    if the malloc fails ... there is no way of returning to the top level with
+    an error. Save the top 15 values on the stack, and accept that the rest
+    may be wrong. */
+
+    case OP_RECURSE:
+      {
+      BOOL rc;
+      int *save;
+      int stacksave[15];
+
+      c = md->offset_max;
+
+      if (c < 16) save = stacksave; else
+        {
+        save = (int *)(pcre_malloc)((c+1) * sizeof(int));
+        if (save == NULL)
+          {
+          save = stacksave;
+          c = 15;
+          }
+        }
+
+      for (i = 1; i <= c; i++)
+        save[i] = md->offset_vector[md->offset_end - i];
+      rc = match(eptr, md->start_pattern, offset_top, md, ims, FALSE, eptrb);
+      for (i = 1; i <= c; i++)
+        md->offset_vector[md->offset_end - i] = save[i];
+      if (save != stacksave) (pcre_free)(save);
+      if (!rc) return FALSE;
+
+      /* In case the recursion has set more capturing values, save the final
+      number, then move along the subject till after the recursive match,
+      and advance one byte in the pattern code. */
+
+      offset_top = md->end_offset_top;
+      eptr = md->end_match_ptr;
+      ecode++;
+      }
+    break;
 
     /* "Once" brackets are like assertion brackets except that after a match,
     the point in the subject string is not moved back. Thus there can never be
@@ -4216,6 +4507,7 @@ if (re == NULL || subject == NULL ||
    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
 
+match_block.start_pattern = re->code;
 match_block.start_subject = (const uschar *)subject;
 match_block.end_subject = match_block.start_subject + length;
 end_subject = match_block.end_subject;
diff --git a/pcre.h b/pcre.in
index 4888b45..74b0cfc 100644
--- a/pcre.h
+++ b/pcre.in
@@ -2,14 +2,14 @@
 *       Perl-Compatible Regular Expressions      *
 *************************************************/
 
-/* Copyright (c) 1997-1999 University of Cambridge */
+/* Copyright (c) 1997-2000 University of Cambridge */
 
 #ifndef _PCRE_H
 #define _PCRE_H
 
-#define PCRE_MAJOR 2
-#define PCRE_MINOR 08
-#define PCRE_DATE  31-Aug-1999
+#define PCRE_MAJOR @PCRE_MAJOR@
+#define PCRE_MINOR @PCRE_MINOR@
+#define PCRE_DATE  @PCRE_DATE@
 
 /* Win32 uses DLL by default */
 
@@ -59,6 +59,16 @@ extern "C" {
 #define PCRE_ERROR_NOMEMORY       (-6)
 #define PCRE_ERROR_NOSUBSTRING    (-7)
 
+/* Request types for pcre_fullinfo() */
+
+#define PCRE_INFO_OPTIONS         0
+#define PCRE_INFO_SIZE            1
+#define PCRE_INFO_CAPTURECOUNT    2
+#define PCRE_INFO_BACKREFMAX      3
+#define PCRE_INFO_FIRSTCHAR       4
+#define PCRE_INFO_FIRSTTABLE      5
+#define PCRE_INFO_LASTLITERAL     6
+
 /* Types */
 
 typedef void pcre;
@@ -83,6 +93,7 @@ extern int pcre_exec(const pcre *, const pcre_extra *, const char *,
 extern int pcre_get_substring(const char *, int *, int, int, const char **);
 extern int pcre_get_substring_list(const char *, int *, int, const char ***);
 extern int pcre_info(const pcre *, int *, int *);
+extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *);
 extern unsigned const char *pcre_maketables(void);
 extern pcre_extra *pcre_study(const pcre *, int, const char **);
 extern const char *pcre_version(void);
diff --git a/pcreposix.c b/pcreposix.c
index 12606af..7c66cce 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -12,7 +12,7 @@ functions.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -46,7 +46,8 @@ restrictions:
 static const char *estring[] = {
   ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
   ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
-  ERR21, ERR22, ERR23, ERR24, ERR25 };
+  ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
+  ERR31 };
 
 static int eint[] = {
   REG_EESCAPE, /* "\\ at end of pattern" */
@@ -76,7 +77,10 @@ static int eint[] = {
   REG_BADPAT,  /* "lookbehind assertion is not fixed length" */
   REG_BADPAT,  /* "malformed number after (?(" */
   REG_BADPAT,  /* "conditional group containe more than two branches" */
-  REG_BADPAT   /* "assertion expected after (?(" */
+  REG_BADPAT,  /* "assertion expected after (?(" */
+  REG_BADPAT,  /* "(?p must be followed by )" */
+  REG_ECTYPE,  /* "unknown POSIX class name" */
+  REG_BADPAT   /* "POSIX collating elements are not supported" */
 };
 
 /* Table of texts corresponding to POSIX error codes */
@@ -231,7 +235,7 @@ preg->re_erroffset = (size_t)(-1);   /* Only has meaning after compile */
 
 if (nmatch > 0)
   {
-  ovector = malloc(sizeof(int) * nmatch * 3);
+  ovector = (int *)malloc(sizeof(int) * nmatch * 3);
   if (ovector == NULL) return REG_ESPACE;
   }
 
diff --git a/pcreposix.h b/pcreposix.h
index 208db35..7660acb 100644
--- a/pcreposix.h
+++ b/pcreposix.h
@@ -2,7 +2,7 @@
 *       Perl-Compatible Regular Expressions      *
 *************************************************/
 
-/* Copyright (c) 1997-1999 University of Cambridge */
+/* Copyright (c) 1997-2000 University of Cambridge */
 
 #ifndef _PCREPOSIX_H
 #define _PCREPOSIX_H
@@ -28,6 +28,12 @@ extern "C" {
 #define REG_NOTBOL    0x04
 #define REG_NOTEOL    0x08
 
+/* These are not used by PCRE, but by defining them we make it easier
+to slot PCRE into existing programs that make POSIX calls. */
+
+#define REG_EXTENDED  0
+#define REG_NOSUB     0
+
 /* Error values. Not all these are relevant or used by the wrapper. */
 
 enum {
diff --git a/pcretest.c b/pcretest.c
index 8e2fe4f..b9e36e2 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -34,6 +34,7 @@ Makefile. */
 
 static FILE *outfile;
 static int log_store = 0;
+static size_t gotten_store;
 
 
 
@@ -48,7 +49,7 @@ static const char *OP_names[] = {
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
   "*", "*?", "+", "+?", "?", "??", "{", "{",
-  "class", "Ref",
+  "class", "Ref", "Recurse",
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
   "Brazero", "Braminzero", "Bra"
@@ -281,6 +282,7 @@ compiled re. */
 
 static void *new_malloc(size_t size)
 {
+gotten_store = size;
 if (log_store)
   fprintf(outfile, "Memory allocation (code space): %d\n",
     (int)((int)size - offsetof(real_pcre, code[0])));
@@ -289,6 +291,19 @@ return malloc(size);
 
 
 
+
+/* Get one piece of information from the pcre_fullinfo() function */
+
+static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
+{
+int rc;
+if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
+  fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
+}
+
+
+
+
 /* Read lines from named file or stdin and write to named file or stdout; lines
 consist of a regular expression, in delimiters and optionally followed by
 options, followed by a set of test data, terminated by an empty line. */
@@ -573,59 +588,90 @@ while (!done)
       goto CONTINUE;
       }
 
-    /* Compilation succeeded; print data if required */
+    /* Compilation succeeded; print data if required. There are now two
+    info-returning functions. The old one has a limited interface and
+    returns only limited data. Check that it agrees with the newer one. */
 
     if (do_showinfo)
       {
-      int first_char, count;
+      int old_first_char, old_options, old_count;
+      int count, backrefmax, first_char, need_char;
+      size_t size;
 
       if (do_debug) print_internals(re);
 
-      count = pcre_info(re, &options, &first_char);
+      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
+      new_info(re, NULL, PCRE_INFO_SIZE, &size);
+      new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
+      new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
+      new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
+      new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
+
+      old_count = pcre_info(re, &old_options, &old_first_char);
       if (count < 0) fprintf(outfile,
-        "Error %d while reading info\n", count);
+        "Error %d from pcre_info()\n", count);
       else
         {
-        fprintf(outfile, "Identifying subpattern count = %d\n", count);
-        if (options == 0) fprintf(outfile, "No options\n");
-          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
-            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
-            ((options & PCRE_CASELESS) != 0)? " caseless" : "",
-            ((options & PCRE_EXTENDED) != 0)? " extended" : "",
-            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
-            ((options & PCRE_DOTALL) != 0)? " dotall" : "",
-            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
-            ((options & PCRE_EXTRA) != 0)? " extra" : "",
-            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
-
-        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
-          fprintf(outfile, "Case state changes\n");
-
-        if (first_char == -1)
-          {
-          fprintf(outfile, "First char at start or follows \\n\n");
-          }
-        else if (first_char < 0)
-          {
-          fprintf(outfile, "No first char\n");
-          }
+        if (old_count != count) fprintf(outfile,
+          "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
+            old_count);
+
+        if (old_first_char != first_char) fprintf(outfile,
+          "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
+            first_char, old_first_char);
+
+        if (old_options != options) fprintf(outfile,
+          "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
+            old_options);
+        }
+
+      if (size != gotten_store) fprintf(outfile,
+        "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
+        size, gotten_store);
+
+      fprintf(outfile, "Capturing subpattern count = %d\n", count);
+      if (backrefmax > 0)
+        fprintf(outfile, "Max back reference = %d\n", backrefmax);
+      if (options == 0) fprintf(outfile, "No options\n");
+        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
+          ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
+          ((options & PCRE_CASELESS) != 0)? " caseless" : "",
+          ((options & PCRE_EXTENDED) != 0)? " extended" : "",
+          ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
+          ((options & PCRE_DOTALL) != 0)? " dotall" : "",
+          ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
+          ((options & PCRE_EXTRA) != 0)? " extra" : "",
+          ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
+
+      if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
+        fprintf(outfile, "Case state changes\n");
+
+      if (first_char == -1)
+        {
+        fprintf(outfile, "First char at start or follows \\n\n");
+        }
+      else if (first_char < 0)
+        {
+        fprintf(outfile, "No first char\n");
+        }
+      else
+        {
+        if (isprint(first_char))
+          fprintf(outfile, "First char = \'%c\'\n", first_char);
         else
-          {
-          if (isprint(first_char))
-            fprintf(outfile, "First char = \'%c\'\n", first_char);
-          else
-            fprintf(outfile, "First char = %d\n", first_char);
-          }
+          fprintf(outfile, "First char = %d\n", first_char);
+        }
 
-        if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
-          {
-          int req_char = ((real_pcre *)re)->req_char;
-          if (isprint(req_char))
-            fprintf(outfile, "Req char = \'%c\'\n", req_char);
-          else
-            fprintf(outfile, "Req char = %d\n", req_char);
-          }
-        else fprintf(outfile, "No req char\n");
+      if (need_char < 0)
+        {
+        fprintf(outfile, "No need char\n");
+        }
+      else
+        {
+        if (isprint(need_char))
+          fprintf(outfile, "Need char = \'%c\'\n", need_char);
+        else
+          fprintf(outfile, "Need char = %d\n", need_char);
         }
       }
 
@@ -654,13 +700,11 @@ while (!done)
       else if (extra == NULL)
         fprintf(outfile, "Study returned NULL\n");
 
-      /* This looks at internal information. A bit kludgy to do it this
-      way, but it is useful for testing. */
-
       else if (do_showinfo)
         {
-        real_pcre_extra *xx = (real_pcre_extra *)extra;
-        if ((xx->options & PCRE_STUDY_MAPPED) == 0)
+        uschar *start_bits = NULL;
+        new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
+        if (start_bits == NULL)
           fprintf(outfile, "No starting character set\n");
         else
           {
@@ -669,7 +713,7 @@ while (!done)
           fprintf(outfile, "Starting character set: ");
           for (i = 0; i < 256; i++)
             {
-            if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
+            if ((start_bits[i/8] & (1<<(i%8))) != 0)
               {
               if (c > 75)
                 {
diff --git a/pgrep.c b/pgrep.c
index 3e63058..ad1b87e 100644
--- a/pgrep.c
+++ b/pgrep.c
@@ -32,7 +32,7 @@ static BOOL whole_lines = FALSE;
 
 
 
-#ifndef HAVE_STRERROR
+#if ! HAVE_STRERROR
 /*************************************************
 *     Provide strerror() for non-ANSI libraries  *
 *************************************************/
diff --git a/study.c b/study.c
index 284833b..676db94 100644
--- a/study.c
+++ b/study.c
@@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
 
 Written by: Philip Hazel <ph10@cam.ac.uk>
 
-           Copyright (c) 1997-1999 University of Cambridge
+           Copyright (c) 1997-2000 University of Cambridge
 
 -----------------------------------------------------------------------------
 Permission is granted to anyone to use this software for any purpose on any
@@ -207,12 +207,12 @@ do
 
       case OP_NOT_WORDCHAR:
       for (c = 0; c < 32; c++)
-        start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
+        start_bits[c] |= ~cd->cbits[c+cbit_word];
       break;
 
       case OP_WORDCHAR:
       for (c = 0; c < 32; c++)
-        start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
+        start_bits[c] |= cd->cbits[c+cbit_word];
       break;
 
       /* One or more character type fudges the pointer and restarts, knowing
@@ -264,12 +264,12 @@ do
 
         case OP_NOT_WORDCHAR:
         for (c = 0; c < 32; c++)
-          start_bits[c] |= ~(cd->cbits[c] | cd->cbits[c+cbit_word]);
+          start_bits[c] |= ~cd->cbits[c+cbit_word];
         break;
 
         case OP_WORDCHAR:
         for (c = 0; c < 32; c++)
-          start_bits[c] |= (cd->cbits[c] | cd->cbits[c+cbit_word]);
+          start_bits[c] |= cd->cbits[c+cbit_word];
         break;
         }
 
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5e641b2..1d9504c 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -591,5 +591,120 @@
     aaaabbbbzzzz\O3
     aaaabbbbzzzz\O4
     aaaabbbbzzzz\O5
+    
+/^.?abcd/S 
+
+/\(             # ( at start
+  (?:           # Non-capturing bracket
+  (?>[^()]+)    # Either a sequence of non-brackets (no backtracking)
+  |             # Or
+  (?R)          # Recurse - i.e. nested bracketed string
+  )*            # Zero or more contents
+  \)            # Closing )
+  /x
+    (abcd)
+    (abcd)xyz
+    xyz(abcd)
+    (ab(xy)cd)pqr 
+    (ab(xycd)pqr 
+    () abc () 
+    12(abcde(fsh)xyz(foo(bar))lmno)89
+    *** Failers
+    abcd 
+    abcd)
+    (abcd  
+
+/\(  ( (?>[^()]+) | (?R) )* \) /xg
+    (ab(xy)cd)pqr 
+    1(abcd)(x(y)z)pqr
+
+/\(  (?: (?>[^()]+) | (?R) ) \) /x
+    (abcd)
+    (ab(xy)cd)
+    (a(b(c)d)e) 
+    ((ab)) 
+    *** Failers
+    ()   
+
+/\(  (?: (?>[^()]+) | (?R) )? \) /x
+    ()
+    12(abcde(fsh)xyz(foo(bar))lmno)89
+
+/\(  ( (?>[^()]+) | (?R) )* \) /x
+    (ab(xy)cd)
+
+/\( ( ( (?>[^()]+) | (?R) )* ) \) /x
+    (ab(xy)cd)
+
+/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /x
+    (ab(xy)cd)
+    (123ab(xy)cd)
+
+/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /x
+    (ab(xy)cd)
+    (123ab(xy)cd)
+
+/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /x
+    (ab(xy)cd)
+
+/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /x
+    (abcd(xyz<p>qrs)123)
+
+/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /x
+    (ab(cd)ef)
+    (ab(cd(ef)gh)ij)
+
+/^[[:alnum:]]/D
+
+/^[[:alpha:]]/D
+             
+/^[[:ascii:]]/D
+
+/^[[:cntrl:]]/D
+
+/^[[:digit:]]/D
+
+/^[[:graph:]]/D
+
+/^[[:lower:]]/D
+
+/^[[:print:]]/D
+
+/^[[:punct:]]/D
+
+/^[[:space:]]/D
+
+/^[[:upper:]]/D
+
+/^[[:xdigit:]]/D
+
+/^[[:word:]]/D
+
+/^[[:^cntrl:]]/D
+
+/^[12[:^digit:]]/D
+
+/[01[:alpha:]%]/D
+
+/[[.ch.]]/
+
+/[[=ch=]]/
+
+/[[:rhubarb:]]/
+
+/[[:upper:]]/i
+    A
+    a 
+    
+/[[:lower:]]/i
+    A
+    a 
+
+/((?-i)[[:lower:]])[[:lower:]]/i
+    ab
+    aB
+    *** Failers
+    Ab
+    AB        
 
 / End of test input /
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index b53e183..1a10a74 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -1,4 +1,4 @@
-PCRE version 2.08 31-Aug-1999
+PCRE version 3.0 01-Feb-2000
 
 /the quick brown fox/
     the quick brown fox
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index aaea4b7..493f460 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -1,16 +1,16 @@
-PCRE version 2.08 31-Aug-1999
+PCRE version 3.0 01-Feb-2000
 
 /(a)b|/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
 
 /abc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
     abc
  0: abc
     defabc
@@ -25,10 +25,10 @@ No match
 No match
 
 /^abc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 'c'
+Need char = 'c'
     abc
  0: abc
     \Aabc
@@ -41,34 +41,34 @@ No match
 No match
 
 /a+bc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
 
 /a*bc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /a{3}bc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
 
 /(abc|a+z)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'a'
-No req char
+No need char
 
 /^abc$/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 'c'
+Need char = 'c'
     abc
  0: abc
     *** Failers
@@ -113,32 +113,32 @@ Failed: missing ) after comment at offset 7
 Failed: unrecognized character after (? at offset 2
 
 /.*b/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char at start or follows \n
-Req char = 'b'
+Need char = 'b'
 
 /.*?b/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char at start or follows \n
-Req char = 'b'
+Need char = 'b'
 
 /cat|dog|elephant/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
     this sentence eventually mentions a cat
  0: cat
     this sentences rambles on and on for a while and then reaches elephant
  0: elephant
 
 /cat|dog|elephant/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: c d e 
     this sentence eventually mentions a cat
  0: cat
@@ -146,10 +146,10 @@ Starting character set: c d e
  0: elephant
 
 /cat|dog|elephant/iS
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: caseless
 No first char
-No req char
+No need char
 Starting character set: C D E c d e 
     this sentence eventually mentions a CAT cat
  0: CAT
@@ -157,17 +157,17 @@ Starting character set: C D E c d e
  0: elephant
 
 /a|[bcd]/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: a b c d 
 
 /(a|[^\dZ])/S
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
 Starting character set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 
@@ -184,10 +184,10 @@ Starting character set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
   \xfc \xfd \xfe \xff 
 
 /(a|b)*[\s]/S
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
 Starting character set: \x09 \x0a \x0b \x0c \x0d \x20 a b 
 
 /(ab\2)/
@@ -197,10 +197,11 @@ Failed: back reference to non-existent subpattern at offset 6
 Failed: nothing to repeat at offset 4
 
 /(a)(b)(c)\2/
-Identifying subpattern count = 3
+Capturing subpattern count = 3
+Max back reference = 2
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
     abcb
  0: abcb
  1: a
@@ -227,10 +228,11 @@ Matched, but too many substrings
  3: c
 
 /(a)bc|(a)(b)\2/
-Identifying subpattern count = 3
+Capturing subpattern count = 3
+Max back reference = 2
 No options
 First char = 'a'
-No req char
+No need char
     abc
  0: abc
  1: a
@@ -268,10 +270,10 @@ Matched, but too many substrings
  3: b
 
 /abc$/E
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: dollar_endonly
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
     abc
  0: abc
     *** Failers
@@ -285,20 +287,20 @@ No match
 Failed: back reference to non-existent subpattern at offset 17
 
 /the quick brown fox/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 't'
-Req char = 'x'
+Need char = 'x'
     the quick brown fox
  0: the quick brown fox
     this is a line with the quick brown fox
  0: the quick brown fox
 
 /the quick brown fox/A
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 'x'
+Need char = 'x'
     the quick brown fox
  0: the quick brown fox
     *** Failers
@@ -310,20 +312,20 @@ No match
 Failed: unrecognized character after (? at offset 4
 
 /^abc|def/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
     abcdef
  0: abc
     abcdef\B
  0: def
 
 /.*((abc)$|(def))/
-Identifying subpattern count = 3
+Capturing subpattern count = 3
 No options
 First char at start or follows \n
-No req char
+No need char
     defabc
  0: defabc
  1: abc
@@ -396,74 +398,74 @@ Failed: unmatched parentheses at offset 0
 Failed: missing terminating ] for character class at offset 4
 
 /[^aeiou ]{3,}/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
     co-processors, and for 
  0: -pr
     
 /<.*>/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = '<'
-Req char = '>'
+Need char = '>'
     abc<def>ghi<klm>nop
  0: <def>ghi<klm>
 
 /<.*?>/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = '<'
-Req char = '>'
+Need char = '>'
     abc<def>ghi<klm>nop
  0: <def>
 
 /<.*>/U
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: ungreedy
 First char = '<'
-Req char = '>'
+Need char = '>'
     abc<def>ghi<klm>nop
  0: <def>
     
 /<.*>(?U)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: ungreedy
 First char = '<'
-Req char = '>'
+Need char = '>'
     abc<def>ghi<klm>nop
  0: <def>
 
 /<.*?>/U
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: ungreedy
 First char = '<'
-Req char = '>'
+Need char = '>'
     abc<def>ghi<klm>nop
  0: <def>ghi<klm>
     
 /={3,}/U
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: ungreedy
 First char = '='
-Req char = '='
+Need char = '='
     abc========def
  0: ===
     
 /(?U)={3,}?/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: ungreedy
 First char = '='
-Req char = '='
+Need char = '='
     abc========def
  0: ========
     
 /(?<!bar|cattle)foo/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'f'
-Req char = 'o'
+Need char = 'o'
     foo
  0: foo
     catfoo 
@@ -485,68 +487,68 @@ Failed: lookbehind assertion is not fixed length at offset 14
 Failed: lookbehind assertion is not fixed length at offset 12
 
 /(?i)abc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: caseless
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
 
 /(a|(?m)a)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'a'
-No req char
+No need char
 
 /(?i)^1234/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored caseless
 No first char
-Req char = '4'
+Need char = '4'
 
 /(^b|(?i)^d)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: anchored
 Case state changes
 No first char
-No req char
+No need char
 
 /(?s).*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored dotall
 No first char
-No req char
+No need char
 
 /[abcd]/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: a b c d 
 
 /(?i)[abcd]/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: caseless
 No first char
-No req char
+No need char
 Starting character set: A B C D a b c d 
 
 /(?m)[xy]|(b|c)/S
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: multiline
 No first char
-No req char
+No need char
 Starting character set: b c x y 
 
 /(^a|^b)/m
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: multiline
 First char at start or follows \n
-No req char
+No need char
 
 /(?i)(^a|^b)/m
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: caseless multiline
 First char at start or follows \n
-No req char
+No need char
 
 /(a)(?(1)a|b|c)/
 Failed: conditional group contains more than two branches at offset 13
@@ -567,17 +569,19 @@ Failed: assertion expected after (?( at offset 3
 Failed: unrecognized character after (?< at offset 2
 
 /((?s)blah)\s+\1/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
+Max back reference = 1
 No options
 First char = 'b'
-Req char = 'h'
+Need char = 'h'
 
 /((?i)blah)\s+\1/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
+Max back reference = 1
 No options
 Case state changes
 No first char
-Req char = 'h'
+Need char = 'h'
 
 /((?i)b)/DS
 ------------------------------------------------------------------
@@ -590,26 +594,26 @@ Req char = 'h'
  16  16 Ket
  19     End
 ------------------------------------------------------------------
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 Case state changes
 No first char
-Req char = 'b'
+Need char = 'b'
 Starting character set: B b 
 
 /(a*b|(?i:c*(?-i)d))/S
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 Case state changes
 No first char
-No req char
+No need char
 Starting character set: C a b c d 
 
 /a$/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
     a
  0: a
     a\n
@@ -622,10 +626,10 @@ No match
 No match
 
 /a$/m
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: multiline
 First char = 'a'
-No req char
+No need char
     a
  0: a
     a\n
@@ -638,22 +642,22 @@ No match
 No match
     
 /\Aabc/m
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored multiline
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /^abc/m 
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: multiline
 First char at start or follows \n
-Req char = 'c'
+Need char = 'c'
 
 /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
-Identifying subpattern count = 5
+Capturing subpattern count = 5
 Options: anchored
 No first char
-Req char = 'a'
+Need char = 'a'
   aaaaabbbbbcccccdef
  0: aaaaabbbbbcccccdef
  1: aaaaabbbbbcccccdef
@@ -663,37 +667,37 @@ Req char = 'a'
  5: def
 
 /(?<=foo)[ab]/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: a b 
 
 /(?<!foo)(alpha|omega)/S
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-Req char = 'a'
+Need char = 'a'
 Starting character set: a o 
 
 /(?!alphabet)[ab]/S
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: a b 
 
 /(?<=foo\n)^bar/m
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: multiline
 First char at start or follows \n
-Req char = 'r'
+Need char = 'r'
 
 /(?>^abc)/m
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: multiline
 First char at start or follows \n
-Req char = 'c'
+Need char = 'c'
     abc
  0: abc
     def\nabc
@@ -713,16 +717,16 @@ Failed: lookbehind assertion is not fixed length at offset 12
 Failed: lookbehind assertion is not fixed length at offset 13
 
 /The next three are in testinput2 because they have variable length branches/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'T'
-Req char = 's'
+Need char = 's'
 
 /(?<=bullock|donkey)-cart/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = '-'
-Req char = 't'
+Need char = 't'
     the bullock-cart
  0: -cart
     a donkey-cart race
@@ -735,17 +739,17 @@ No match
 No match
       
 /(?<=ab(?i)x|y|z)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 Case state changes
 No first char
-No req char
+No need char
 
 /(?>.*)(?<=(abcd)|(xyz))/
-Identifying subpattern count = 2
+Capturing subpattern count = 2
 No options
 First char at start or follows \n
-No req char
+No need char
     alphabetabcd
  0: alphabetabcd
  1: abcd
@@ -755,11 +759,11 @@ No req char
  2: xyz
 
 /(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 Case state changes
 First char = 'Z'
-Req char = 'Z'
+Need char = 'Z'
     abxyZZ
  0: ZZ
     abXyZZ
@@ -784,10 +788,10 @@ No match
 No match
 
 /(?<!(foo)a)bar/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'b'
-Req char = 'r'
+Need char = 'r'
     bar
  0: bar
     foobbar 
@@ -798,41 +802,42 @@ No match
 No match
 
 /This one is here because Perl 5.005_02 doesn't fail it/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'T'
-Req char = 't'
+Need char = 't'
 
 /^(a)?(?(1)a|b)+$/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: anchored
 No first char
-No req char
+No need char
     *** Failers
 No match
     a 
 No match
 
 /This one is here because I think Perl 5.005_02 gets the setting of $1 wrong/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'T'
-Req char = 'g'
+Need char = 'g'
 
 /^(a\1?){4}$/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
+Max back reference = 1
 Options: anchored
 No first char
-Req char = 'a'
+Need char = 'a'
     aaaaaa
  0: aaaaaa
  1: aa
     
 /These are syntax tests from Perl 5.005/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'T'
-Req char = '5'
+Need char = '5'
 
 /a[b-a]/
 Failed: range out of order in character class at offset 4
@@ -943,10 +948,10 @@ Failed: POSIX code 9: bad escape sequence at offset 4
 Failed: \ at end of pattern at offset 4
 
 /(a)bc(d)/
-Identifying subpattern count = 2
+Capturing subpattern count = 2
 No options
 First char = 'a'
-Req char = 'd'
+Need char = 'd'
     abcd
  0: abcd
  1: a
@@ -963,10 +968,10 @@ Req char = 'd'
 copy substring 5 failed -7
      
 /(.{20})/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
     abcdefghijklmnopqrstuvwxyz
  0: abcdefghijklmnopqrst
  1: abcdefghijklmnopqrst
@@ -980,10 +985,10 @@ copy substring 1 failed -6
  1G abcdefghijklmnopqrst (20)
      
 /(.{15})/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
     abcdefghijklmnopqrstuvwxyz
  0: abcdefghijklmno
  1: abcdefghijklmno
@@ -994,10 +999,10 @@ No req char
  1G abcdefghijklmno (15)
 
 /(.{16})/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
     abcdefghijklmnopqrstuvwxyz
  0: abcdefghijklmnop
  1: abcdefghijklmnop
@@ -1010,10 +1015,10 @@ copy substring 1 failed -6
  1L abcdefghijklmnop
     
 /^(a|(bc))de(f)/
-Identifying subpattern count = 3
+Capturing subpattern count = 3
 Options: anchored
 No first char
-Req char = 'f'
+Need char = 'f'
     adef\G1\G2\G3\G4\L 
  0: adef
  1: a
@@ -1048,10 +1053,10 @@ get substring 4 failed -7
  0C adef (4)
     
 /^abc\00def/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 'f'
+Need char = 'f'
     abc\00def\L\C0 
  0: abc\x00def
  0C abc (7)
@@ -1061,10 +1066,10 @@ Req char = 'f'
 )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ 
 )?)?)?)?)?)?)?)?)?otherword/M
 Memory allocation (code space): 428
-Identifying subpattern count = 8
+Capturing subpattern count = 8
 No options
 First char = 'w'
-Req char = 'd'
+Need char = 'd'
 
 /.*X/D
 ------------------------------------------------------------------
@@ -1074,10 +1079,10 @@ Req char = 'd'
   8   8 Ket
  11     End
 ------------------------------------------------------------------
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char at start or follows \n
-Req char = 'X'
+Need char = 'X'
 
 /.*X/Ds
 ------------------------------------------------------------------
@@ -1087,10 +1092,10 @@ Req char = 'X'
   8   8 Ket
  11     End
 ------------------------------------------------------------------
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored dotall
 No first char
-Req char = 'X'
+Need char = 'X'
 
 /(.*X|^B)/D
 ------------------------------------------------------------------
@@ -1105,10 +1110,10 @@ Req char = 'X'
  21  21 Ket
  24     End
 ------------------------------------------------------------------
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char at start or follows \n
-No req char
+No need char
 
 /(.*X|^B)/Ds
 ------------------------------------------------------------------
@@ -1123,10 +1128,10 @@ No req char
  21  21 Ket
  24     End
 ------------------------------------------------------------------
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: anchored dotall
 No first char
-No req char
+No need char
     
 /(?s)(.*X|^B)/D
 ------------------------------------------------------------------
@@ -1141,10 +1146,10 @@ No req char
  21  21 Ket
  24     End
 ------------------------------------------------------------------
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 Options: anchored dotall
 No first char
-No req char
+No need char
 
 /(?s:.*X|^B)/D
 ------------------------------------------------------------------
@@ -1162,16 +1167,16 @@ No req char
  27  27 Ket
  30     End
 ------------------------------------------------------------------
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char at start or follows \n
-No req char
+No need char
 
 /\Biss\B/+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
@@ -1182,10 +1187,10 @@ Req char = 's'
  0+ issippi
 
 /iss/G+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
@@ -1193,19 +1198,19 @@ Req char = 's'
  0+ ippi
 
 /\Biss\B/G+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
 
 /\Biss\B/g+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
@@ -1217,10 +1222,10 @@ No match
 No match
 
 /(?<=[Ms])iss/g+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
@@ -1228,28 +1233,28 @@ Req char = 's'
  0+ ippi
 
 /(?<=[Ms])iss/G+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'i'
-Req char = 's'
+Need char = 's'
     Mississippi
  0: iss
  0+ issippi
 
 /^iss/g+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 's'
+Need char = 's'
     ississippi
  0: iss
  0+ issippi
     
 /.*iss/g+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char at start or follows \n
-Req char = 's'
+Need char = 's'
     abciss\nxyzisspqr 
  0: abciss
  0+ \x0axyzisspqr
@@ -1257,10 +1262,10 @@ Req char = 's'
  0+ pqr
 
 /.i./+g
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-Req char = 'i'
+Need char = 'i'
     Mississippi
  0: Mis
  0+ sissippi
@@ -1287,28 +1292,28 @@ Req char = 'i'
  0+ souri river
 
 /^.is/+g
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 's'
+Need char = 's'
     Mississippi
  0: Mis
  0+ sissippi
 
 /^ab\n/g+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: anchored
 No first char
-Req char = 10
+Need char = 10
     ab\nab\ncd
  0: ab\x0a
  0+ ab\x0acd
 
 /^ab\n/mg+
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 Options: multiline
 First char at start or follows \n
-Req char = 10
+Need char = 10
     ab\nab\ncd
  0: ab\x0a
  0+ ab\x0acd
@@ -1316,256 +1321,256 @@ Req char = 10
  0+ cd
 
 /abc/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
 
 /abc|bac/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /(abc|bac)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /(abc|(c|dc))/
-Identifying subpattern count = 2
+Capturing subpattern count = 2
 No options
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /(abc|(d|de)c)/
-Identifying subpattern count = 2
+Capturing subpattern count = 2
 No options
 No first char
-Req char = 'c'
+Need char = 'c'
 
 /a*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 
 /a+/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /(baa|a+)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-Req char = 'a'
+Need char = 'a'
 
 /a{0,3}/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 
 /baa{3,}/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'b'
-Req char = 'a'
+Need char = 'a'
 
 /"([^\\"]+|\\.)*"/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = '"'
-Req char = '"'
+Need char = '"'
 
 /(abc|ab[cd])/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'a'
-No req char
+No need char
 
 /(a|.)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
 
 /a|ba|\w/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 
 /abc(?=pqr)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'r'
+Need char = 'r'
 
 /...(?<=abc)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 
 /abc(?!pqr)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'c'
+Need char = 'c'
 
 /ab./
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /ab[xyz]/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /abc*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /ab.c*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /a.c*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /.c*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 
 /ac*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /(a.c*|b.c*)/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 No first char
-No req char
+No need char
 
 /a.c*|aba/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /.+a/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-Req char = 'a'
+Need char = 'a'
 
 /(?=abcda)a.*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /(?=a)a.*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /a(b)*/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'a'
-No req char
+No need char
 
 /a\d*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /ab\d*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /a(\d)*/
-Identifying subpattern count = 1
+Capturing subpattern count = 1
 No options
 First char = 'a'
-No req char
+No need char
 
 /abcde{0,0}/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'd'
+Need char = 'd'
 
 /ab\d+/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /a(?(1)b)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /a(?(1)bag|big)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'g'
+Need char = 'g'
 
 /a(?(1)bag|big)*/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-No req char
+No need char
 
 /a(?(1)bag|big)+/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'g'
+Need char = 'g'
 
 /a(?(1)b..|b..)/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'b'
+Need char = 'b'
 
 /ab\d{0}e/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = 'a'
-Req char = 'e'
+Need char = 'e'
 
 /a?b?/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
     a
  0: a
     b
@@ -1580,10 +1585,10 @@ No req char
 No match
     
 /|-/
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
     abcd
  0: 
     -abc
@@ -1621,10 +1626,447 @@ No match
  1: bbbb
  2: z
  3: z
+    
+/^.?abcd/S 
+Capturing subpattern count = 0
+Options: anchored
+No first char
+Need char = 'd'
+Study returned NULL
+
+/\(             # ( at start
+  (?:           # Non-capturing bracket
+  (?>[^()]+)    # Either a sequence of non-brackets (no backtracking)
+  |             # Or
+  (?R)          # Recurse - i.e. nested bracketed string
+  )*            # Zero or more contents
+  \)            # Closing )
+  /x
+Capturing subpattern count = 0
+Options: extended
+First char = '('
+Need char = ')'
+    (abcd)
+ 0: (abcd)
+    (abcd)xyz
+ 0: (abcd)
+    xyz(abcd)
+ 0: (abcd)
+    (ab(xy)cd)pqr 
+ 0: (ab(xy)cd)
+    (ab(xycd)pqr 
+ 0: (xycd)
+    () abc () 
+ 0: ()
+    12(abcde(fsh)xyz(foo(bar))lmno)89
+ 0: (abcde(fsh)xyz(foo(bar))lmno)
+    *** Failers
+No match
+    abcd 
+No match
+    abcd)
+No match
+    (abcd  
+No match
+
+/\(  ( (?>[^()]+) | (?R) )* \) /xg
+Capturing subpattern count = 1
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)pqr 
+ 0: (ab(xy)cd)
+ 1: cd
+    1(abcd)(x(y)z)pqr
+ 0: (abcd)
+ 1: abcd
+ 0: (x(y)z)
+ 1: z
+
+/\(  (?: (?>[^()]+) | (?R) ) \) /x
+Capturing subpattern count = 0
+Options: extended
+First char = '('
+Need char = ')'
+    (abcd)
+ 0: (abcd)
+    (ab(xy)cd)
+ 0: (xy)
+    (a(b(c)d)e) 
+ 0: (c)
+    ((ab)) 
+ 0: ((ab))
+    *** Failers
+No match
+    ()   
+No match
+
+/\(  (?: (?>[^()]+) | (?R) )? \) /x
+Capturing subpattern count = 0
+Options: extended
+First char = '('
+Need char = ')'
+    ()
+ 0: ()
+    12(abcde(fsh)xyz(foo(bar))lmno)89
+ 0: (fsh)
+
+/\(  ( (?>[^()]+) | (?R) )* \) /x
+Capturing subpattern count = 1
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)
+ 0: (ab(xy)cd)
+ 1: cd
+
+/\( ( ( (?>[^()]+) | (?R) )* ) \) /x
+Capturing subpattern count = 2
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)
+ 0: (ab(xy)cd)
+ 1: ab(xy)cd
+ 2: cd
+
+/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /x
+Capturing subpattern count = 3
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)
+ 0: (ab(xy)cd)
+ 1: <unset>
+ 2: ab(xy)cd
+ 3: cd
+    (123ab(xy)cd)
+ 0: (123ab(xy)cd)
+ 1: 123
+ 2: ab(xy)cd
+ 3: cd
+
+/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /x
+Capturing subpattern count = 3
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)
+ 0: (ab(xy)cd)
+ 1: ab(xy)cd
+ 2: <unset>
+ 3: cd
+    (123ab(xy)cd)
+ 0: (123ab(xy)cd)
+ 1: 123ab(xy)cd
+ 2: 123
+ 3: cd
+
+/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /x
+Capturing subpattern count = 11
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(xy)cd)
+ 0: (ab(xy)cd)
+ 1: ab(xy)cd
+ 2: ab(xy)cd
+ 3: ab(xy)cd
+ 4: ab(xy)cd
+ 5: ab(xy)cd
+ 6: ab(xy)cd
+ 7: ab(xy)cd
+ 8: ab(xy)cd
+ 9: ab(xy)cd
+10: ab(xy)cd
+11: cd
+
+/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /x
+Capturing subpattern count = 3
+Options: extended
+First char = '('
+Need char = ')'
+    (abcd(xyz<p>qrs)123)
+ 0: (abcd(xyz<p>qrs)123)
+ 1: abcd(xyz<p>qrs)123
+ 2: 123
+ 3: <p>qrs
+
+/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /x
+Capturing subpattern count = 3
+Options: extended
+First char = '('
+Need char = ')'
+    (ab(cd)ef)
+ 0: (ab(cd)ef)
+ 1: ab(cd)ef
+ 2: ef
+ 3: (cd)
+    (ab(cd(ef)gh)ij)
+ 0: (ab(cd(ef)gh)ij)
+ 1: ab(cd(ef)gh)ij
+ 2: ij
+ 3: (cd(ef)gh)
+
+/^[[:alnum:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [0-9A-Za-z]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:alpha:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [A-Za-z]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+             
+/^[[:ascii:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [\x00-\x7f]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:cntrl:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [\x00-\x1f\x7f]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:digit:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [0-9]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:graph:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [!-~]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:lower:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [a-z]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:print:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [ -~]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:punct:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [!-/:-@[-`{-~]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:space:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [\x09-\x0d ]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:upper:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [A-Z]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:xdigit:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [0-9A-Fa-f]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:word:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [0-9A-Z_a-z]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[[:^cntrl:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [ -~\x80-\xff]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/^[12[:^digit:]]/D
+------------------------------------------------------------------
+  0  37 Bra 0
+  3     ^
+  4     [\x00-/1-2:-\xff]
+ 37  37 Ket
+ 40     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: anchored
+No first char
+No need char
+
+/[01[:alpha:]%]/D
+------------------------------------------------------------------
+  0  36 Bra 0
+  3     [%0-1A-Za-z]
+ 36  36 Ket
+ 39     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+
+/[[.ch.]]/
+Failed: POSIX collating elements are not supported at offset 1
+
+/[[=ch=]]/
+Failed: POSIX collating elements are not supported at offset 1
+
+/[[:rhubarb:]]/
+Failed: unknown POSIX class name at offset 3
+
+/[[:upper:]]/i
+Capturing subpattern count = 0
+Options: caseless
+No first char
+No need char
+    A
+ 0: A
+    a 
+ 0: a
+    
+/[[:lower:]]/i
+Capturing subpattern count = 0
+Options: caseless
+No first char
+No need char
+    A
+ 0: A
+    a 
+ 0: a
+
+/((?-i)[[:lower:]])[[:lower:]]/i
+Capturing subpattern count = 1
+Options: caseless
+Case state changes
+No first char
+No need char
+    ab
+ 0: ab
+ 1: a
+    aB
+ 0: aB
+ 1: a
+    *** Failers
+ 0: ai
+ 1: a
+    Ab
+No match
+    AB        
+No match
 
 / End of test input /
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 First char = ' '
-Req char = ' '
+Need char = ' '
 
diff --git a/testdata/testoutput3 b/testdata/testoutput3
index d997659..a4a28c1 100644
--- a/testdata/testoutput3
+++ b/testdata/testoutput3
@@ -1,4 +1,4 @@
-PCRE version 2.08 31-Aug-1999
+PCRE version 3.0 01-Feb-2000
 
 /(?<!bar)foo/
     foo
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index c8af6cf..586cbbd 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1,4 +1,4 @@
-PCRE version 2.08 31-Aug-1999
+PCRE version 3.0 01-Feb-2000
 
 /^[\w]+/
     *** Failers
@@ -81,18 +81,18 @@ No match
  0: �cole
 
 /\w/IS
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
 
 /\w/ISLfr
-Identifying subpattern count = 0
+Capturing subpattern count = 0
 No options
 No first char
-No req char
+No need char
 Starting character set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
   � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � �
author	nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-02-24 21:39:21 +0000
committer	nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-02-24 21:39:21 +0000
commit	09f9da9675b33a31c605d9d1f913bc2b05522be2 (patch)
tree	e4e2b0bbc47b23f497e3f1b2208a9ac9a9d4ebea
parent	1622a3e7058dec7de74889c69595693ac0c64187 (diff)
download	pcre-09f9da9675b33a31c605d9d1f913bc2b05522be2.tar.gz