summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am46
-rwxr-xr-xPrepareRelease16
-rw-r--r--doc/html/NON-AUTOTOOLS-BUILD.txt402
-rw-r--r--doc/html/README.txt833
-rw-r--r--doc/html/pcre2-config.html102
-rw-r--r--doc/html/pcre2.html182
-rw-r--r--doc/html/pcre2grep.html759
-rw-r--r--doc/pcre2-config.186
-rw-r--r--doc/pcre2-config.txt81
-rw-r--r--doc/pcre2.3180
-rw-r--r--doc/pcre2build.3490
-rw-r--r--doc/pcre2compat.3190
-rw-r--r--doc/pcre2grep.1683
-rw-r--r--doc/pcre2grep.txt741
-rw-r--r--src/pcre2grep.c2
-rw-r--r--src/pcre2test.c4
16 files changed, 4754 insertions, 43 deletions
diff --git a/Makefile.am b/Makefile.am
index 7df1e86..366b88c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,29 +6,31 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
## Specify the documentation files that are distributed.
-# FIXME
dist_doc_DATA = \
AUTHORS \
COPYING \
ChangeLog \
LICENCE \
- README
-
-# doc/pcre.txt \
-# doc/pcre-config.txt \
-# doc/pcregrep.txt \
-# doc/pcretest.txt \
-# NEWS
+ NEWS \
+ README \
+ doc/pcre2.txt \
+ doc/pcre2-config.txt \
+ doc/pcre2grep.txt \
+ doc/pcre2test.txt
# FIXME
-#dist_html_DATA = \
-# doc/html/NON-AUTOTOOLS-BUILD.txt \
-# doc/html/README.txt \
-# doc/html/index.html \
-# doc/html/pcre-config.html \
+dist_html_DATA = \
+ doc/html/NON-AUTOTOOLS-BUILD.txt \
+ doc/html/README.txt \
+ doc/html/index.html \
+ doc/html/pcre2-config.html \
+ doc/html/pcre2api.html \
+ doc/html/pcre2callout.html \
+ doc/html/pcre2demo.html \
+ doc/html/pcre2test.html \
+ doc/html/pcre2unicode.html
+
# doc/html/pcre.html \
-# doc/html/pcre16.html \
-# doc/html/pcre32.html \
# doc/html/pcre_assign_jit_stack.html \
# doc/html/pcre_compile.html \
# doc/html/pcre_compile2.html \
@@ -56,11 +58,8 @@ dist_doc_DATA = \
# doc/html/pcre_utf16_to_host_byte_order.html \
# doc/html/pcre_utf32_to_host_byte_order.html \
# doc/html/pcre_version.html \
-# doc/html/pcreapi.html \
# doc/html/pcrebuild.html \
-# doc/html/pcrecallout.html \
# doc/html/pcrecompat.html \
-# doc/html/pcredemo.html \
# doc/html/pcregrep.html \
# doc/html/pcrejit.html \
# doc/html/pcrelimits.html \
@@ -72,18 +71,16 @@ dist_doc_DATA = \
# doc/html/pcreprecompile.html \
# doc/html/pcresample.html \
# doc/html/pcrestack.html \
-# doc/html/pcresyntax.html \
-# doc/html/pcretest.html \
-# doc/html/pcreunicode.html
+# doc/html/pcresyntax.html
# FIXME
dist_man_MANS = \
+ doc/pcre2-config.1 \
doc/pcre2api.3 \
doc/pcre2callout.3 \
doc/pcre2test.1 \
doc/pcre2unicode.3
-# doc/pcre2-config.1 \
# doc/pcre2.3 \
# doc/pcre2-16.3 \
# doc/pcre2-32.3 \
@@ -168,7 +165,6 @@ EXTRA_DIST += \
EXTRA_DIST += \
doc/perltest.txt \
- NON-UNIX-USE \
NON-AUTOTOOLS-BUILD \
HACKING
@@ -719,9 +715,9 @@ else
coverage:
@echo "Configuring with --enable-coverage is required to generate code coverage report."
-DISTCLEANFILES += src/*.gcda src/*.gcno
+DISTCLEANFILES += src/*.gcda src/*.gcno
-distclean-local:
+distclean-local:
rm -rf $(PACKAGE)-$(VERSION)-coverage*
endif # WITH_GCOV
diff --git a/PrepareRelease b/PrepareRelease
index c92d7f9..2b58a47 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -83,8 +83,7 @@ for file in pcre2api pcre2callout pcre2unicode ; do
done
# The three commands
-for file in pcre2test ; do
-# for file in pcre2test pcre2grep pcre-config ; do
+for file in pcre2test pcre2grep pcre2-config ; do
echo Making $file.txt
nroff -c -man $file.1 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >$file.txt
@@ -133,7 +132,7 @@ echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
-# cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
+cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
for file in *.1 ; do
base=`basename $file .1`
@@ -187,7 +186,6 @@ files="\
COPYING \
AUTHORS \
NEWS \
- NON-UNIX-USE \
NON-AUTOTOOLS-BUILD \
INSTALL \
132html \
@@ -240,16 +238,6 @@ files="\
pcre32_utf32_utils.c \
pcre16_valid_utf16.c \
pcre32_valid_utf32.c \
- pcre_scanner.cc \
- pcre_scanner.h \
- pcre_scanner_unittest.cc \
- pcrecpp.cc \
- pcrecpp.h \
- pcrecpparg.h.in \
- pcrecpp_unittest.cc \
- pcre_stringpiece.cc \
- pcre_stringpiece.h.in \
- pcre_stringpiece_unittest.cc \
perltest.pl \
ucp.h \
makevp.bat \
diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt
new file mode 100644
index 0000000..6f36fe6
--- /dev/null
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -0,0 +1,402 @@
+Building PCRE2 without using autotools
+--------------------------------------
+
+This document has been converted from the PCRE1 document, but is not yet
+complete. I have removed a number of quite old sections about building in
+various environments, as they applied only to PCRE1 and are probably out of
+date.
+
+
+This document contains the following sections:
+
+ General
+ Generic instructions for the PCRE2 C library
+ Building for virtual Pascal
+ Stack size in Windows environments
+ Linking programs in Windows environments
+ Calling conventions in Windows environments
+ Comments about Win32 builds
+ Building PCRE2 on Windows with CMake
+ Testing with RunTest.bat
+ Building PCRE2 on native z/OS and z/VM
+
+
+GENERAL
+
+I (Philip Hazel) have no experience of Windows or VMS sytems and how their
+libraries work. The items in the PCRE2 distribution and Makefile that relate to
+anything other than Linux systems are untested by me.
+
+The basic PCRE2 library consists entirely of code written in Standard C, and so
+should compile successfully on any system that has a Standard C compiler and
+library.
+
+The PCRE2 distribution includes a "configure" file for use by the
+configure/make (autotools) build system, as found in many Unix-like
+environments. The README file contains information about the options for
+"configure".
+
+There is also support for CMake, which some users prefer, especially in Windows
+environments, though it can also be run in Unix-like environments. See the
+section entitled "Building PCRE2 on Windows with CMake" below.
+
+Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
+under the names src/config.h.generic and src/pcre2.h.generic. These are
+provided for those who build PCRE2 without using "configure" or CMake. If you
+use "configure" or CMake, the .generic versions are not used.
+
+
+GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
+
+The following are generic instructions for building the PCRE2 C library "by
+hand". If you are going to use CMake, this section does not apply to you; you
+can skip ahead to the CMake section.
+
+ (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
+ macro settings that it contains to whatever is appropriate for your
+ environment. In particular, you can alter the definition of the NEWLINE
+ macro to specify what character(s) you want to be interpreted as line
+ terminators.
+
+ When you compile any of the PCRE2 modules, you must specify
+ -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
+ sources.
+
+ An alternative approach is not to edit src/config.h, but to use -D on the
+ compiler command line to make any changes that you need to the
+ configuration options. In this case -DHAVE_CONFIG_H must not be set.
+
+ NOTE: There have been occasions when the way in which certain parameters
+ in src/config.h are used has changed between releases. (In the
+ configure/make world, this is handled automatically.) When upgrading to a
+ new release, you are strongly advised to review src/config.h.generic
+ before re-using what you had previously.
+
+ (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
+
+ (3) EITHER:
+ Copy or rename file src/pcre2_chartables.c.dist as
+ src/pcre2_chartables.c.
+
+ OR:
+ Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
+ if you have set up src/config.h), and then run it with the single
+ argument "src/pcre2_chartables.c". This generates a set of standard
+ character tables and writes them to that file. The tables are generated
+ using the default C locale for your system. If you want to use a locale
+ that is specified by LC_xxx environment variables, add the -L option to
+ the dftables command. You must use this method if you are building on a
+ system that uses EBCDIC code.
+
+ The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
+ specify alternative tables at run time.
+
+ (4) For an 8-bit library, compile the following source files, setting
+ -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set -DHAVE_CONFIG_H
+ if you have set up src/config.h with your configuration, or else use other
+ -D settings to change the configuration as required.
+
+ pcre2_auto_possess.c
+ pcre2_chartables.c
+ pcre2_compile.c
+ pcre2_config.c
+ pcre2_context.c
+ pcre2_dfa_match.c
+ pcre2_error.c
+ pcre2_jit_compile.c
+ pcre2_jit_match.c
+ pcre2_jit_misc.c
+ pcre2_maketables.c
+ pcre2_match.c
+ pcre2_match_data.c
+ pcre2_newline.c
+ pcre2_ord2utf.c
+ pcre2_pattern_info.c
+ pcre2_string_utils.c
+ pcre2_study.c
+ pcre2_substring.c
+ pcre2_tables.c
+ pcre2_ucd.c
+ pcre2_valid_utf.c
+ pcre2_xclass.c
+
+ Make sure that you include -I. in the compiler command (or equivalent for
+ an unusual compiler) so that all included PCRE2 header files are first
+ sought in the src directory under the current directory. Otherwise you run
+ the risk of picking up a previously-installed file from somewhere else.
+
+ Note that you must compile pcre2_jit_xxx.c, even if you have not defined
+ SUPPORT_JIT in src/config.h, because when JIT support is not configured,
+ dummy functions are compiled. When JIT support IS configured, the JIT
+ sources #include other files from the sljit subdirectory, where there
+ should be 16 files, all of whose names begin with "sljit".
+
+ (5) Now link all the compiled code into an object library in whichever form
+ your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
+ If your system has static and shared libraries, you may have to do this
+ once for each type.
+
+ (6) If you want to build a 16-bit library or 32-bit library (as well as, or
+ instead of the 8-bit library) just supply 16 or 32 as the value of
+ -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+
+ (7) If you want to build the POSIX wrapper functions (which apply only to the
+ 8-bit library), ensure that you have the pcre2posix.h file and then
+ compile pcre2posix.c. Link the result (on its own) as the pcre2posix
+ library.
+
+ (8) The pcre2test program can be linked with any combination of the 8-bit,
+ 16-bit and 32-bit libraries (depending on what you selected in
+ src/config.h). Compile pcre2test.c; don't forget -DHAVE_CONFIG_H if
+ necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
+ appropriate library/ies. If you compiled an 8-bit library, pcre2test also
+ needs the pcre2posix wrapper library.
+
+ (9) Run pcre2test on the testinput files in the testdata directory, and check
+ that the output matches the corresponding testoutput files. There are
+ comments about what each test does in the section entitled "Testing PCRE2"
+ in the README file. If you compiled more than one of the 8-bit, 16-bit and
+ 32-bit libraries, you need to run pcre2test with the -16 option to do
+ 16-bit tests and with the -32 option to do 32-bit tests.
+
+ Some tests are relevant only when certain build-time options are selected.
+ For example, test 4 is for Unicode support, and will not run if you have
+ built PCRE2 without it. See the comments at the start of each testinput
+ file. If you have a suitable Unix-like shell, the RunTest script will run
+ the appropriate tests for you. The command "RunTest list" will output a
+ list of all the tests.
+
+ Note that the supplied files are in Unix format, with just LF characters
+ as line terminators. You may need to edit them to change this if your
+ system uses a different convention.
+
+(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
+ by running pcre2test with the -jit option. This is done automatically by
+ the RunTest script. You might also like to build and run the freestanding
+ JIT test program, pcre2_jit_test.c.
+
+(11) If you want to use the pcre2grep command, compile and link pcre2grep.c; it
+ uses only the basic 8-bit PCRE2 library (it does not need the pcre2posix
+ library).
+
+
+BUILDING FOR VIRTUAL PASCAL
+
+FIXME FOR PCRE2
+
+A script for building PCRE2 using Borland's C++ compiler for use with VPASCAL
+was contributed by Alexander Tokarev. Stefan Weber updated the script and added
+additional files. The following files in the distribution are for building
+PCRE2 for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat,
+pcre2gexp.pas.
+
+
+STACK SIZE IN WINDOWS ENVIRONMENTS
+
+The default processor stack size of 1Mb in some Windows environments is too
+small for matching patterns that need much recursion. In particular, test 2 may
+fail because of this. Normally, running out of stack causes a crash, but there
+have been cases where the test program has just died silently. See your linker
+documentation for how to increase stack size if you experience problems. The
+Linux default of 8Mb is a reasonable choice for the stack, though even that can
+be too small for some pattern/subject combinations.
+
+PCRE2 has a compile configuration option to disable the use of stack for
+recursion so that heap is used instead. However, pattern matching is
+significantly slower when this is done. There is more about stack usage in the
+"pcre2stack" documentation.
+
+
+LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
+
+If you want to statically link a program against a PCRE2 library in the form of
+a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
+
+
+CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
+
+It is possible to compile programs to use different calling conventions using
+MSVC. Search the web for "calling conventions" for more information. To make it
+easier to change the calling convention for the exported functions in the
+PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
+definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
+not set, it defaults to empty; the default calling convention is then used
+(which is what is wanted most of the time).
+
+
+COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
+
+There are two ways of building PCRE2 using the "configure, make, make install"
+paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
+the same thing; they are completely different from each other. There is also
+support for building using CMake, which some users find a more straightforward
+way of building PCRE2 under Windows.
+
+The MinGW home page (http://www.mingw.org/) says this:
+
+ MinGW: A collection of freely available and freely distributable Windows
+ specific header files and import libraries combined with GNU toolsets that
+ allow one to produce native Windows programs that do not rely on any
+ 3rd-party C runtime DLLs.
+
+The Cygwin home page (http://www.cygwin.com/) says this:
+
+ Cygwin is a Linux-like environment for Windows. It consists of two parts:
+
+ . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
+ substantial Linux API functionality
+
+ . A collection of tools which provide Linux look and feel.
+
+On both MinGW and Cygwin, PCRE2 should build correctly using:
+
+ ./configure && make && make install
+
+This should create two libraries called libpcre2-8 and libpcre2-posix. These
+are independent libraries: when you link with libpcre2-posix you must also link
+with libpcre2-8, which contains the basic functions.
+
+Using Cygwin's compiler generates libraries and executables that depend on
+cygwin1.dll. If a library that is generated this way is distributed,
+cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
+licence, this forces not only PCRE2 to be under the GPL, but also the entire
+application. A distributor who wants to keep their own code proprietary must
+purchase an appropriate Cygwin licence.
+
+MinGW has no such restrictions. The MinGW compiler generates a library or
+executable that can run standalone on Windows without any third party dll or
+licensing issues.
+
+But there is more complication:
+
+If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
+to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
+front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
+gcc and MinGW's gcc). So, a user can:
+
+. Build native binaries by using MinGW or by getting Cygwin and using
+ -mno-cygwin.
+
+. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
+ compiler flags.
+
+The test files that are supplied with PCRE2 are in UNIX format, with LF
+characters as line terminators. Unless your PCRE2 library uses a default
+newline option that includes LF as a valid newline, it may be necessary to
+change the line terminators in the test files to get some of the tests to work.
+
+
+BUILDING PCRE2 ON WINDOWS WITH CMAKE
+
+CMake is an alternative configuration facility that can be used instead of
+"configure". CMake creates project files (make files, solution files, etc.)
+tailored to numerous development environments, including Visual Studio,
+Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
+spaces in the names for your CMake installation and your PCRE2 source and build
+directories.
+
+The following instructions were contributed by a PCRE1 user, but they should
+also work for PCRE2. If they are not followed exactly, errors may occur. In the
+event that errors do occur, it is recommended that you delete the CMake cache
+before attempting to repeat the CMake build process. In the CMake GUI, the
+cache can be deleted by selecting "File > Delete Cache".
+
+1. Install the latest CMake version available from http://www.cmake.org/, and
+ ensure that cmake\bin is on your path.
+
+2. Unzip (retaining folder structure) the PCRE2 source tree into a source
+ directory such as C:\pcre2. You should ensure your local date and time
+ is not earlier than the file dates in your source dir if the release is
+ very new.
+
+3. Create a new, empty build directory, preferably a subdirectory of the
+ source dir. For example, C:\pcre2\pcre2-xx\build.
+
+4. Run cmake-gui from the Shell envirornment of your build tool, for example,
+ Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
+ to start Cmake from the Windows Start menu, as this can lead to errors.
+
+5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
+ build directories, respectively.
+
+6. Hit the "Configure" button.
+
+7. Select the particular IDE / build tool that you are using (Visual
+ Studio, MSYS makefiles, MinGW makefiles, etc.)
+
+8. The GUI will then list several configuration options. This is where
+ you can enable Unicode support or other PCRE2 optional features.
+
+9. Hit "Configure" again. The adjacent "Generate" button should now be
+ active.
+
+10. Hit "Generate".
+
+11. The build directory should now contain a usable build system, be it a
+ solution file for Visual Studio, makefiles for MinGW, etc. Exit from
+ cmake-gui and use the generated build system with your compiler or IDE.
+ E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
+ solution, select the desired configuration (Debug, or Release, etc.) and
+ build the ALL_BUILD project.
+
+12. If during configuration with cmake-gui you've elected to build the test
+ programs, you can execute them by building the test project. E.g., for
+ MinGW: "make check"; for Visual Studio build the RUN_TESTS project. The
+ most recent build configuration is targeted by the tests. A summary of
+ test results is presented. Complete test output is subsequently
+ available for review in Testing\Temporary under your build dir.
+
+
+TESTING WITH RUNTEST.BAT FIXME FIXME NOT YET TESTED/UPDATED FIXME
+
+If configured with CMake, building the test project ("make check" or building
+ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
+on your configuration options, possibly other test programs) in the build
+directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
+
+For manual testing with RunTest.bat, provided the build dir is a subdirectory
+of the source directory: Open command shell window. Chdir to the location
+of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
+"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
+
+To run only a particular test with RunTest.Bat provide a test number argument.
+
+Otherwise:
+
+1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
+ have been created.
+
+2. Edit RunTest.bat to indentify the full or relative location of
+ the pcre2 source (wherein which the testdata folder resides), e.g.:
+
+ set srcdir=C:\pcre2\pcre2-10.00
+
+3. In a Windows command environment, chdir to the location of your bat and
+ exe programs.
+
+4. Run RunTest.bat. Test outputs will automatically be compared to expected
+ results, and discrepancies will be identified in the console output.
+
+To independently test the just-in-time compiler, run pcre2_jit_test.exe.
+
+
+BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
+
+z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
+The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
+applications can be supported through UNIX System Services, and in such an
+environment PCRE2 can be built in the same way as in other systems. However, in
+native z/OS (without UNIX System Services) and in z/VM, special ports are
+required. For details, please see this web site:
+
+ http://www.zaconsultants.net
+
+There is also a mirror here:
+
+ http://www.vsoft-software.com/downloads.html
+
+The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
+course.
+
+==========================
+Last Updated: 28 September 2014
diff --git a/doc/html/README.txt b/doc/html/README.txt
index 7ad597a..95c8747 100644
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@@ -1 +1,832 @@
-This is a placeholder README file for a work in progress.
+README file for PCRE2 (Perl-compatible regular expression library)
+------------------------------------------------------------------
+
+PCRE2 is a re-implementation of the original PCRE library with an entirely new
+API. The latest release of PCRE2 is always available in three alternative
+formats from:
+
+FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
+
+ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.gz
+ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
+ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
+
+There is a mailing list for discussion about the development of PCRE (both the
+original and new APIs) at pcre-dev@exim.org. You can access the archives and
+subscribe or manage your subscription here:
+
+ https://lists.exim.org/mailman/listinfo/pcre-dev
+
+Please read the NEWS file if you are upgrading from a previous release.
+The contents of this README file are:
+
+ The PCRE2 APIs
+ Documentation for PCRE2
+ Contributions by users of PCRE2
+ Building PCRE2 on non-Unix-like systems
+ Building PCRE2 without using autotools
+ Building PCRE2 using autotools
+ Retrieving configuration information
+ Shared libraries
+ Cross-compiling using autotools
+ Making new tarballs
+ Testing PCRE2
+ Character tables
+ File manifest
+
+
+The PCRE2 APIs
+--------------
+
+PCRE2 is written in C, and it has its own API. There are three sets of
+functions, one for the 8-bit library, which processes strings of bytes, one for
+the 16-bit library, which processes strings of 16-bit values, and one for the
+32-bit library, which processes strings of 32-bit values. As this is a new API,
+there as yet no C++ wrappers.
+
+The distribution does contain a set of C wrapper functions for the 8-bit
+library that are based on the POSIX regular expression API (see the pcre2posix
+man page). These end up in the library called libpcre2posix. Note that this
+just provides a POSIX calling interface to PCRE2; the regular expressions
+themselves still follow Perl syntax and semantics. The POSIX API is restricted,
+and does not give full access to all of PCRE2's facilities.
+
+The header file for the POSIX-style functions is called pcre2posix.h. The
+official POSIX name is regex.h, but I did not want to risk possible problems
+with existing files of that name by distributing it that way. To use PCRE2 with
+an existing program that uses the POSIX API, pcre2posix.h will have to be
+renamed or pointed at by a link.
+
+If you are using the POSIX interface to PCRE2 and there is already a POSIX
+regex library installed on your system, as well as worrying about the regex.h
+header file (as mentioned above), you must also take care when linking programs
+to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
+pick up the POSIX functions of the same name from the other library.
+
+One way of avoiding this confusion is to compile PCRE2 with the addition of
+-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
+compiler flags (CFLAGS if you are using "configure" -- see below). This has the
+effect of renaming the functions so that the names no longer clash. Of course,
+you have to do the same thing for your applications, or write them using the
+new names.
+
+
+Documentation for PCRE2
+----------------------
+
+If you install PCRE2 in the normal way on a Unix-like system, you will end up
+with a set of man pages whose names all start with "pcre2". The one that is
+just called "pcre2" lists all the others. In addition to these man pages, the
+PCRE2 documentation is supplied in two other forms:
+
+ 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
+ doc/pcre2test.txt in the source distribution. The first of these is a
+ concatenation of the text forms of all the section 3 man pages except the
+ listing of pcre2demo.c and those that summarize individual functions. The
+ other two are the text forms of the section 1 man pages for the pcre2grep
+ and pcre2test commands. These text forms are provided for ease of scanning
+ with text editors or similar tools. They are installed in
+ <prefix>/share/doc/pcre2, where <prefix> is the installation prefix
+ (defaulting to /usr/local).
+
+ 2. A set of files containing all the documentation in HTML form, hyperlinked
+ in various ways, and rooted in a file called index.html, is distributed in
+ doc/html and installed in <prefix>/share/doc/pcre2/html.
+
+
+Building PCRE2 on non-Unix-like systems
+--------------------------------------
+
+For a non-Unix-like system, please read the comments in the file
+NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
+"make" you may be able to build PCRE2 using autotools in the same way as for
+many Unix-like systems.
+
+PCRE2 can also be configured using CMake, which can be run in various ways
+(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
+NON-AUTOTOOLS-BUILD has information about CMake.
+
+PCRE2 has been compiled on many different operating systems. It should be
+straightforward to build PCRE2 on any system that has a Standard C compiler and
+library, because it uses only Standard C functions.
+
+
+Building PCRE2 without using autotools
+-------------------------------------
+
+The use of autotools (in particular, libtool) is problematic in some
+environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
+file for ways of building PCRE2 without using autotools.
+
+
+Building PCRE2 using autotools
+-----------------------------
+
+The following instructions assume the use of the widely used "configure; make;
+make install" (autotools) process.
+
+To build PCRE2 on system that supports autotools, first run the "configure"
+command from the PCRE2 distribution directory, with your current directory set
+to the directory where you want the files to be created. This command is a
+standard GNU "autoconf" configuration script, for which generic instructions
+are supplied in the file INSTALL.
+
+Most commonly, people build PCRE2 within its own distribution directory, and in
+this case, on many systems, just running "./configure" is sufficient. However,
+the usual methods of changing standard defaults are available. For example:
+
+CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
+
+This command specifies that the C compiler should be run with the flags '-O2
+-Wall' instead of the default, and that "make install" should install PCRE2
+under /opt/local instead of the default /usr/local.
+
+If you want to build in a different directory, just run "configure" with that
+directory as current. For example, suppose you have unpacked the PCRE2 source
+into /source/pcre2/pcre2-xxx, but you want to build it in
+/build/pcre2/pcre2-xxx:
+
+cd /build/pcre2/pcre2-xxx
+/source/pcre2/pcre2-xxx/configure
+
+PCRE2 is written in C and is normally compiled as a C library. However, it is
+possible to build it as a C++ library, though the provided building apparatus
+does not have any features to support this.
+
+There are some optional features that can be included or omitted from the PCRE2
+library. They are also documented in the pcre2build man page.
+
+. By default, both shared and static libraries are built. You can change this
+ by adding one of these options to the "configure" command:
+
+ --disable-shared
+ --disable-static
+
+ (See also "Shared libraries on Unix-like systems" below.)
+
+. By default, only the 8-bit library is built. If you add --enable-pcre16 to
+ the "configure" command, the 16-bit library is also built. If you add
+ --enable-pcre32 to the "configure" command, the 32-bit library is also built.
+ If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
+ building the 8-bit library.
+
+. If you want to include support for just-in-time compiling, which can give
+ large performance improvements on certain platforms, add --enable-jit to the
+ "configure" command. This support is available only for certain hardware
+ architectures. If you try to enable it on an unsupported architecture, there
+ will be a compile time error. FIXME: NOT YET IMPLEMENTED.
+
+. When JIT support is enabled, pcre2grep automatically makes use of it, unless
+ you add --disable-pcre2grep-jit to the "configure" command.
+
+. If you want to make use of the support for UTF-8 Unicode character strings in
+ the 8-bit library, UTF-16 Unicode character strings in the 16-bit library,
+ and UTF-32 Unicode character strings in the 32-bit library, you must add
+ --enable-unicode to the "configure" command. Without it, the code for
+ handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
+ configure one library with UTF support and the other without in the same
+ configuration.
+
+ Even when --enable-unicode is included, the use of a UTF encoding still has
+ to be enabled by an option at run time. When PCRE2 is compiled with this
+ option, its input can only either be ASCII or UTF-8/16/32, even when running
+ on EBCDIC platforms. It is not possible to use both --enable-unicode and
+ --enable-ebcdic at the same time.
+
+ When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
+ includes support for the \P, \p, and \X sequences that recognize Unicode
+ character properties. However, only the basic two-letter properties such as
+ Lu are supported.
+
+. You can build PCRE2 to recognize either CR or LF or the sequence CRLF or any
+ of the preceding, or any of the Unicode newline sequences as indicating the
+ end of a line. Whatever you specify at build time is the default; the caller
+ of PCRE2 can change the selection at run time. The default newline indicator
+ is a single LF character (the Unix standard). You can specify the default
+ newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
+ or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
+ --enable-newline-is-any to the "configure" command, respectively.
+
+ If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
+ the standard tests will fail, because the lines in the test files end with
+ LF. Even if the files are edited to change the line endings, there are likely
+ to be some failures. With --enable-newline-is-anycrlf or
+ --enable-newline-is-any, many tests should succeed, but there may be some
+ failures.
+
+. By default, the sequence \R in a pattern matches any Unicode line ending
+ sequence. This is independent of the option specifying what PCRE2 considers
+ to be the end of a line (see above). However, the caller of PCRE2 can
+ restrict \R to match only CR, LF, or CRLF. You can make this the default by
+ adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
+
+. PCRE2 has a counter that limits the depth of nesting of parentheses in a
+ pattern. This limits the amount of system stack that a pattern uses when it
+ is compiled. The default is 250, but you can change it by setting, for
+ example,
+
+ --with-parens-nest-limit=500
+
+. PCRE2 has a counter that can be set to limit the amount of resources it uses
+ when matching a pattern. If the limit is exceeded during a match, the match
+ fails. The default is ten million. You can change the default by setting, for
+ example,
+
+ --with-match-limit=500000
+
+ on the "configure" command. This is just the default; individual calls to
+ pcre2_match() can supply their own value. There is more discussion on the
+ pcre2api man page.
+
+. There is a separate counter that limits the depth of recursive function calls
+ during a matching process. This also has a default of ten million, which is
+ essentially "unlimited". You can change the default by setting, for example,
+
+ --with-match-limit-recursion=500000
+
+ Recursive function calls use up the runtime stack; running out of stack can
+ cause programs to crash in strange ways. There is a discussion about stack
+ sizes in the pcre2stack man page.
+
+. In the 8-bit library, the default maximum compiled pattern size is around
+ 64K. You can increase this by adding --with-link-size=3 to the "configure"
+ command. PCRE2 then uses three bytes instead of two for offsets to different
+ parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
+ the same as --with-link-size=4, which (in both libraries) uses four-byte
+ offsets. Increasing the internal link size reduces performance. In the 32-bit
+ library, the link size setting is ignored, as 4-byte offsets are always used.
+
+. You can build PCRE2 so that its internal match() function that is called from
+ pcre2_match() does not call itself recursively. Instead, it uses memory
+ blocks obtained from the heap to save data that would otherwise be saved on
+ the stack. To build PCRE2 like this, use
+
+ --disable-stack-for-recursion
+
+ on the "configure" command. PCRE2 runs more slowly in this mode, but it may
+ be necessary in environments with limited stack sizes. This applies only to
+ the normal execution of the pcre2_match() function; if JIT support is being
+ successfully used, it is not relevant. Equally, it does not apply to
+ pcre2_dfa_match(), which does not use deeply nested recursion. There is a
+ discussion about stack sizes in the pcre2stack man page.
+
+. For speed, PCRE2 uses four tables for manipulating and identifying characters
+ whose code point values are less than 256. By default, it uses a set of
+ tables for ASCII encoding that is part of the distribution. If you specify
+
+ --enable-rebuild-chartables
+
+ a program called dftables is compiled and run in the default C locale when
+ you obey "make". It builds a source file called pcre2_chartables.c. If you do
+ not specify this option, pcre2_chartables.c is created as a copy of
+ pcre2_chartables.c.dist. See "Character tables" below for further
+ information.
+
+. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
+ character code (as opposed to ASCII/Unicode) by specifying
+
+ --enable-ebcdic
+
+ This automatically implies --enable-rebuild-chartables (see above). However,
+ when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
+ both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
+ which specifies that the code value for the EBCDIC NL character is 0x25
+ instead of the default 0x15.
+
+. In environments where valgrind is installed, if you specify
+
+ --enable-valgrind
+
+ PCRE2 will use valgrind annotations to mark certain memory regions as
+ unaddressable. This allows it to detect invalid memory accesses, and is
+ mostly useful for debugging PCRE2 itself.
+
+. In environments where the gcc compiler is used and lcov version 1.6 or above
+ is installed, if you specify
+
+ --enable-coverage
+
+ the build process implements a code coverage report for the test suite. The
+ report is generated by running "make coverage". If ccache is installed on
+ your system, it must be disabled when building PCRE2 for coverage reporting.
+ You can do this by setting the environment variable CCACHE_DISABLE=1 before
+ running "make" to build PCRE2. There is more information about coverage
+ reporting in the "pcre2build" documentation.
+
+. The pcre2grep program currently supports only 8-bit data files, and so
+ requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
+ libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
+ specifying one or both of
+
+ --enable-pcre2grep-libz
+ --enable-pcre2grep-libbz2
+
+ Of course, the relevant libraries must be installed on your system.
+
+. The default size (in bytes) of the internal buffer used by pcre2grep can be
+ set by, for example:
+
+ --with-pcre2grep-bufsize=51200
+
+ The value must be a plain integer. The default is 20480.
+
+. It is possible to compile pcre2test so that it links with the libreadline
+ or libedit libraries, by specifying, respectively,
+
+ --enable-pcre2test-libreadline or --enable-pcre2test-libedit
+
+ If this is done, when pcre2test's input is from a terminal, it reads it using
+ the readline() function. This provides line-editing and history facilities.
+ Note that libreadline is GPL-licenced, so if you distribute a binary of
+ pcre2test linked in this way, there may be licensing issues. These can be
+ avoided by linking with libedit (which has a BSD licence) instead.
+
+ Enabling libreadline causes the -lreadline option to be added to the
+ pcre2test build. In many operating environments with a sytem-installed
+ readline library this is sufficient. However, in some environments (e.g. if
+ an unmodified distribution version of readline is in use), it may be
+ necessary to specify something like LIBS="-lncurses" as well. This is
+ because, to quote the readline INSTALL, "Readline uses the termcap functions,
+ but does not link with the termcap or curses library itself, allowing
+ applications which link with readline the to choose an appropriate library."
+ If you get error messages about missing functions tgetstr, tgetent, tputs,
+ tgetflag, or tgoto, this is the problem, and linking with the ncurses library
+ should fix it.
+
+The "configure" script builds the following files for the basic C library:
+
+. Makefile the makefile that builds the library
+. src/config.h build-time configuration options for the library
+. src/pcre2.h the public PCRE2 header file
+. pcre2-config script that shows the building settings such as CFLAGS
+ that were set for "configure"
+. libpcre2-8.pc )
+. libpcre2-16.pc ) data for the pkg-config command
+. libpcre2-32.pc )
+. libpcre2-posix.pc )
+. libtool script that builds shared and/or static libraries
+
+Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
+tarballs under the names config.h.generic and pcre2.h.generic. These are
+provided for those who have to build PCRE2 without using "configure" or CMake.
+If you use "configure" or CMake, the .generic versions are not used.
+
+The "configure" script also creates config.status, which is an executable
+script that can be run to recreate the configuration, and config.log, which
+contains compiler output from tests that "configure" runs.
+
+Once "configure" has run, you can run "make". This builds whichever of the
+libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
+program called pcre2test. If you enabled JIT support with --enable-jit, another
+test program called pcre2_jit_test is built as well. FIXME: still to be
+implemented. If the 8-bit library is built, libpcre2-posix and the pcre2grep
+command are also built.
+
+The command "make check" runs all the appropriate tests. Details of the PCRE2
+tests are given below in a separate section of this document.
+
+You can use "make install" to install PCRE2 into live directories on your
+system. The following are installed (file names are all relative to the
+<prefix> that is set when "configure" is run):
+
+ Commands (bin):
+ pcre2test
+ pcre2grep (if 8-bit support is enabled)
+ pcre2-config
+
+ Libraries (lib):
+ libpcre2-8 (if 8-bit support is enabled)
+ libpcre2-16 (if 16-bit support is enabled)
+ libpcre2-32 (if 32-bit support is enabled)
+ libpcre2-posix (if 8-bit support is enabled)
+
+ Configuration information (lib/pkgconfig):
+ libpcre2-8.pc
+ libpcre2-16.pc
+ libpcre2-32.pc
+ libpcre2-posix.pc
+
+ Header files (include):
+ pcre2.h
+ pcre2posix.h
+
+ Man pages (share/man/man{1,3}):
+ pcre2grep.1
+ pcre2test.1
+ pcre2-config.1
+ pcre2.3
+ pcre2*.3 (lots more pages, all starting "pcre2")
+
+ HTML documentation (share/doc/pcre2/html):
+ index.html
+ *.html (lots more pages, hyperlinked from index.html)
+
+ Text file documentation (share/doc/pcre2):
+ AUTHORS
+ COPYING
+ ChangeLog
+ LICENCE
+ NEWS
+ README
+ pcre2.txt (a concatenation of the man(3) pages)
+ pcre2test.txt the pcre2test man page
+ pcre2grep.txt the pcre2grep man page
+ pcre2-config.txt the pcre2-config man page
+
+If you want to remove PCRE2 from your system, you can run "make uninstall".
+This removes all the files that "make install" installed. However, it does not
+remove any directories, because these are often shared with other programs.
+
+
+Retrieving configuration information
+------------------------------------
+
+Running "make install" installs the command pcre2-config, which can be used to
+recall information about the PCRE2 configuration and installation. For example:
+
+ pcre2-config --version
+
+prints the version number, and
+
+ pcre2-config --libs8
+
+outputs information about where the 8-bit library is installed. This command
+can be included in makefiles for programs that use PCRE2, saving the programmer
+from having to remember too many details. Run pcre2-config with no arguments to
+obtain a list of possible arguments.
+
+The pkg-config command is another system for saving and retrieving information
+about installed libraries. Instead of separate commands for each library, a
+single command is used. For example:
+
+ pkg-config --libs libpcre2-16
+
+The data is held in *.pc files that are installed in a directory called
+<prefix>/lib/pkgconfig.
+
+
+Shared libraries
+----------------
+
+The default distribution builds PCRE2 as shared libraries and static libraries,
+as long as the operating system supports shared libraries. Shared library
+support relies on the "libtool" script which is built as part of the
+"configure" process.
+
+The libtool script is used to compile and link both shared and static
+libraries. They are placed in a subdirectory called .libs when they are newly
+built. The programs pcre2test and pcre2grep are built to use these uninstalled
+libraries (by means of wrapper scripts in the case of shared libraries). When
+you use "make install" to install shared libraries, pcre2grep and pcre2test are
+automatically re-built to use the newly installed shared libraries before being
+installed themselves. However, the versions left in the build directory still
+use the uninstalled libraries.
+
+To build PCRE2 using static libraries only you must use --disable-shared when
+configuring it. For example:
+
+./configure --prefix=/usr/gnu --disable-shared
+
+Then run "make" in the usual way. Similarly, you can use --disable-static to
+build only shared libraries.
+
+
+Cross-compiling using autotools
+-------------------------------
+
+You can specify CC and CFLAGS in the normal way to the "configure" command, in
+order to cross-compile PCRE2 for some other host. However, you should NOT
+specify --enable-rebuild-chartables, because if you do, the dftables.c source
+file is compiled and run on the local host, in order to generate the inbuilt
+character tables (the pcre2_chartables.c file). This will probably not work,
+because dftables.c needs to be compiled with the local compiler, not the cross
+compiler.
+
+When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
+created by making a copy of pcre2_chartables.c.dist, which is a default set of
+tables that assumes ASCII code. Cross-compiling with the default tables should
+not be a problem.
+
+If you need to modify the character tables when cross-compiling, you should
+move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
+and run it on the local host to make a new version of pcre2_chartables.c.dist.
+Then when you cross-compile PCRE2 this new version of the tables will be used.
+
+
+Making new tarballs
+-------------------
+
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.
+
+If you have modified any of the man page sources in the doc directory, you
+should first run the PrepareRelease script before making a distribution. This
+script creates the .txt and HTML forms of the documentation from the man pages.
+
+
+Testing PCRE2
+------------
+
+To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
+There is another script called RunGrepTest that tests the options of the
+pcre2grep command. When JIT support is enabled, another test program called
+pcre2_jit_test is built. Both the scripts and all the program tests are run if
+you obey "make check". For other environments, see the instructions in
+NON-AUTOTOOLS-BUILD.
+
+The RunTest script runs the pcre2test test program (which is documented in its
+own man page) on each of the relevant testinput files in the testdata
+directory, and compares the output with the contents of the corresponding
+testoutput files. RunTest uses a file called testtry to hold the main output
+from pcre2test. Other files whose names begin with "test" are used as working
+files in some tests.
+
+Some tests are relevant only when certain build-time options were selected. For
+example, the tests for UTF-8/16/32 support are run only if --enable-unicode was
+used. RunTest outputs a comment when it skips a test.
+
+Many of the tests that are not skipped are run twice if JIT support is
+available. On the second run, JIT compilation is forced. This testing can be
+suppressed by putting "nojit" on the RunTest command line.
+
+The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
+libraries that are enabled. If you want to run just one set of tests, call
+RunTest with either the -8, -16 or -32 option.
+
+If valgrind is installed, you can run the tests under it by putting "valgrind"
+on the RunTest command line. To run pcre2test on just one or more specific test
+files, give their numbers as arguments to RunTest, for example:
+
+ RunTest 2 7 11
+
+You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
+end), or a number preceded by ~ to exclude a test. For example:
+
+ Runtest 3-15 ~10
+
+This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
+except test 13. Whatever order the arguments are in, the tests are always run
+in numerical order.
+
+You can also call RunTest with the single argument "list" to cause it to output
+a list of tests.
+
+The first two tests can always be run, as they expect only plain text strings
+(not UTF) and make no use of Unicode properties. The first test file can be fed
+directly into the perltest.pl script to check that Perl gives the same results.
+The only difference you should see is in the first few lines, where the Perl
+version is given instead of the PCRE2 version. The second set of tests check
+auxiliary functions, error detection, and run-time flags that are specific to
+PCRE2, as well as the POSIX wrapper API. It also uses the debugging flags to
+check some of the internals of pcre2_compile().
+
+If you build PCRE2 with a locale setting that is not the standard C locale, the
+character tables may be different (see next paragraph). In some cases, this may
+cause failures in the second set of tests. For example, in a locale where the
+isprint() function yields TRUE for characters in the range 128-255, the use of
+[:isascii:] inside a character class defines a different set of characters, and
+this shows up in this test as a difference in the compiled code, which is being
+listed for checking. Where the comparison test output contains [\x00-\x7f] the
+test will contain [\x00-\xff], and similarly in some other cases. This is not a
+bug in PCRE2.
+
+The third set of tests checks pcre2_maketables(), the facility for building a
+set of character tables for a specific locale and using them instead of the
+default tables. The script uses the "locale" command to check for the
+availability of the "fr_FR", "french", or "fr" locale, and uses the first one
+that it finds. If the "locale" command fails, or if its output doesn't include
+"fr_FR", "french", or "fr" in the list of available locales, the third test
+cannot be run, and a comment is output to say why. If running this test
+produces an error like this
+
+ ** Failed to set locale "fr_FR"
+
+it means that the given locale is not available on your system, despite being
+listed by "locale". This does not mean that PCRE2 is broken. There are three
+alternative output files for the third test, because three different versions
+of the French locale have been encountered. The test passes if its output
+matches any one of them.
+
+The fourth and fifth tests check UTF and Unicode property support, the fourth
+being compatible with the perltest.pl script, and the fifth checking
+PCRE2-specific things.
+
+The sixth and seventh tests check the pcre2_dfa_match() alternative matching
+function, in non-UTF mode and UTF-mode with Unicode property support,
+respectively.
+
+The eighth test checks some internal offsets and code size features; it is
+run only when the default "link size" of 2 is set (in other cases the sizes
+change) and when Unicode support is enabled.
+
+The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
+twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
+generate different output in 8-bit mode. Each pair are for general cases and
+Unicode support, respectively. The thirteenth test checks the handling of
+non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
+modes.
+
+The fourteenth test is run only when JIT support is not available, and the
+fifteenth test is run only when JIT support is available. They test some
+JIT-specific features such as information output from pcre2test about JIT
+compilation.
+
+The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
+POSIX interface to the 8-bit library, withouth and with Unicode support,
+respectively.
+
+
+Character tables
+----------------
+
+For speed, PCRE2 uses four tables for manipulating and identifying characters
+whose code point values are less than 256. By default, a set of tables that is
+built into the library is used. The pcre2_maketables() function can be called
+by an application to create a new set of tables in the current locale. This are
+passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
+compile context.
+
+The source file called pcre2_chartables.c contains the default set of tables.
+By default, this is created as a copy of pcre2_chartables.c.dist, which
+contains tables for ASCII coding. However, if --enable-rebuild-chartables is
+specified for ./configure, a different version of pcre2_chartables.c is built
+by the program dftables (compiled from dftables.c), which uses the ANSI C
+character handling functions such as isalnum(), isalpha(), isupper(),
+islower(), etc. to build the table sources. This means that the default C
+locale which is set for your system will control the contents of these default
+tables. You can change the default tables by editing pcre2_chartables.c and
+then re-building PCRE2. If you do this, you should take care to ensure that the
+file does not get automatically re-generated. The best way to do this is to
+move pcre2_chartables.c.dist out of the way and replace it with your customized
+tables.
+
+When the dftables program is run as a result of --enable-rebuild-chartables,
+it uses the default C locale that is set on your system. It does not pay
+attention to the LC_xxx environment variables. In other words, it uses the
+system's default locale rather than whatever the compiling user happens to have
+set. If you really do want to build a source set of character tables in a
+locale that is specified by the LC_xxx variables, you can run the dftables
+program by hand with the -L option. For example:
+
+ ./dftables -L pcre2_chartables.c.special
+
+The first two 256-byte tables provide lower casing and case flipping functions,
+respectively. The next table consists of three 32-byte bit maps which identify
+digits, "word" characters, and white space, respectively. These are used when
+building 32-byte bit maps that represent character classes for code points less
+than 256. The final 256-byte table has bits indicating various character types,
+as follows:
+
+ 1 white space character
+ 2 letter
+ 4 decimal digit
+ 8 hexadecimal digit
+ 16 alphanumeric or '_'
+ 128 regular expression metacharacter or binary zero
+
+You should not alter the set of characters that contain the 128 bit, as that
+will cause PCRE2 to malfunction.
+
+
+File manifest
+-------------
+
+The distribution should contain the files listed below.
+
+(A) Source files for the PCRE2 library functions and their headers are found in
+ the src directory:
+
+ src/dftables.c auxiliary program for building pcre2_chartables.c
+ when --enable-rebuild-chartables is specified
+
+ src/pcre2_chartables.c.dist a default set of character tables that assume
+ ASCII coding; unless --enable-rebuild-chartables is
+ specified, used by copying to pcre2_chartables.c
+
+ src/pcre2posix.c )
+ src/pcre2_auto_possess.c )
+ src/pcre2_compile.c )
+ src/pcre2_config.c )
+ src/pcre2_context.c )
+ src/pcre2_dfa_match.c )
+ src/pcre2_error.c )
+ src/pcre2_exec.c )
+ src/pcre2_jit_compile.c )
+ src/pcre2_jit_match.c ) sources for the functions in the library,
+ src/pcre2_jit_misc.c ) and some internal functions that they use
+ src/pcre2_maketables.c )
+ src/pcre2_match.c )
+ src/pcre2_match_data.c )
+ src/pcre2_newline.c )
+ src/pcre2_ord2utf.c )
+ src/pcre2_pattern_info.c )
+ src/pcre2_string_utils.c )
+ src/pcre2_study.c )
+ src/pcre2_substring.c )
+ src/pcre2_tables.c )
+ src/pcre2_ucd.c )
+ src/pcre2_valid_utf.c )
+ src/pcre2_xclass.c )
+
+ src/pcre2_printint.c debugging function that is used by pcre2test,
+
+ src/config.h.in template for config.h, when built by "configure"
+ src/pcre2.h.in template for pcre2.h when built by "configure"
+ src/pcre2posix.h header for the external POSIX wrapper API
+ src/pcre2_internal.h header for internal use
+ src/pcre2_intmodedep.h a mode-specific internal header
+ src/pcre2_ucp.h header for Unicode property handling
+
+ sljit/* 16 files that make up the JIT compiler FIXME
+
+(B) Source files for programs that use PCRE2:
+
+ src/pcre2demo.c simple demonstration of coding calls to PCRE2
+ src/pcre2grep.c source of a grep utility that uses PCRE2
+ src/pcre2test.c comprehensive test program
+
+(C) Auxiliary files:
+
+ 132html script to turn "man" pages into HTML
+ AUTHORS information about the author of PCRE2
+ ChangeLog log of changes to the code
+ CleanTxt script to clean nroff output for txt man pages
+ Detrail script to remove trailing spaces
+ HACKING some notes about the internals of PCRE2
+ INSTALL generic installation instructions
+ LICENCE conditions for the use of PCRE2
+ COPYING the same, using GNU's standard name
+ Makefile.in ) template for Unix Makefile, which is built by
+ ) "configure"
+ Makefile.am ) the automake input that was used to create
+ ) Makefile.in
+ NEWS important changes in this release
+ NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
+ PrepareRelease script to make preparations for "make dist"
+ README this file
+ RunTest a Unix shell script for running tests
+ RunGrepTest a Unix shell script for pcre2grep tests
+ aclocal.m4 m4 macros (generated by "aclocal")
+ config.guess ) files used by libtool,
+ config.sub ) used only when building a shared library
+ configure a configuring shell script (built by autoconf)
+ configure.ac ) the autoconf input that was used to build
+ ) "configure" and config.h
+ depcomp ) script to find program dependencies, generated by
+ ) automake
+ doc/*.3 man page sources for PCRE2
+ doc/*.1 man page sources for pcre2grep and pcre2test
+ doc/index.html.src the base HTML page
+ doc/html/* HTML documentation
+ doc/pcre2.txt plain text version of the man pages
+ doc/pcre2test.txt plain text documentation of test program
+ doc/perltest.txt plain text documentation of Perl test program
+ install-sh a shell script for installing files
+ libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
+ libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
+ libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
+ libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
+ ltmain.sh file used to build a libtool script
+ missing ) common stub for a few missing GNU programs while
+ ) installing, generated by automake
+ mkinstalldirs script for making install directories
+ perltest.pl Perl test program
+ pcre2-config.in source of script which retains PCRE2 information
+ pcre2_jit_test.c test program for the JIT compiler
+ testdata/testinput* test data for main library tests
+ testdata/testoutput* expected test results
+ testdata/grep* input and output for pcre2grep tests
+ testdata/* other supporting test files
+
+(D) Auxiliary files for cmake support
+
+ cmake/COPYING-CMAKE-SCRIPTS
+ cmake/FindPackageHandleStandardArgs.cmake
+ cmake/FindEditline.cmake
+ cmake/FindReadline.cmake
+ CMakeLists.txt
+ config-cmake.h.in
+
+(E) Auxiliary files for VPASCAL FIXME FIXME
+
+ makevp.bat
+ makevp_c.txt
+ makevp_l.txt
+ pcre2gexp.pas
+
+(F) Auxiliary files for building PCRE2 "by hand"
+
+ pcre2.h.generic ) a version of the public PCRE2 header file
+ ) for use in non-"configure" environments
+ config.h.generic ) a version of config.h for use in non-"configure"
+ ) environments
+
+(F) Miscellaneous
+
+ RunTest.bat a script for running tests under Windows FIXME
+
+Philip Hazel
+Email local part: ph10
+Email domain: cam.ac.uk
+Last updated: 27 October 2014
diff --git a/doc/html/pcre2-config.html b/doc/html/pcre2-config.html
new file mode 100644
index 0000000..b71d760
--- /dev/null
+++ b/doc/html/pcre2-config.html
@@ -0,0 +1,102 @@
+<html>
+<head>
+<title>pcre2-config specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2-config man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">OPTIONS</a>
+<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
+<li><a name="TOC5" href="#SEC5">AUTHOR</a>
+<li><a name="TOC6" href="#SEC6">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>pcre2-config [--prefix] [--exec-prefix] [--version]</b>
+<b> [--libs8] [--libs16] [--libs32] [--libs-posix]</b>
+<b> [--cflags] [--cflags-posix]</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+<b>pcre2-config</b> returns the configuration of the installed PCRE2 libraries
+and the options required to compile a program to use them. Some of the options
+apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
+not available for libraries that have not been built. If an unavailable option
+is encountered, the "usage" information is output.
+</P>
+<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
+<P>
+<b>--prefix</b>
+Writes the directory prefix used in the PCRE2 installation for architecture
+independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
+systems) to the standard output.
+</P>
+<P>
+<b>--exec-prefix</b>
+Writes the directory prefix used in the PCRE2 installation for architecture
+dependent files (normally the same as <b>--prefix</b>) to the standard output.
+</P>
+<P>
+<b>--version</b>
+Writes the version number of the installed PCRE2 libraries to the standard
+output.
+</P>
+<P>
+<b>--libs8</b>
+Writes to the standard output the command line options required to link
+with the 8-bit PCRE2 library (<b>-lpcre2-8</b> on many systems).
+</P>
+<P>
+<b>--libs16</b>
+Writes to the standard output the command line options required to link
+with the 16-bit PCRE2 library (<b>-lpcre2-16</b> on many systems).
+</P>
+<P>
+<b>--libs32</b>
+Writes to the standard output the command line options required to link
+with the 32-bit PCRE2 library (<b>-lpcre2-32</b> on many systems).
+</P>
+<P>
+<b>--libs-posix</b>
+Writes to the standard output the command line options required to link with
+PCRE2's POSIX API wrapper library (<b>-lpcre2-posix</b> <b>-lpcre2-8</b> on many
+systems).
+</P>
+<P>
+<b>--cflags</b>
+Writes to the standard output the command line options required to compile
+files that use PCRE2 (this may include some <b>-I</b> options, but is blank on
+many systems).
+</P>
+<P>
+<b>--cflags-posix</b>
+Writes to the standard output the command line options required to compile
+files that use PCRE2's POSIX API wrapper library (this may include some
+<b>-I</b> options, but is blank on many systems).
+</P>
+<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2(3)</b>
+</P>
+<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
+<P>
+This manual page was originally written by Mark Baker for the Debian GNU/Linux
+system. It has been subsequently revised as a generic PCRE2 man page.
+</P>
+<br><a name="SEC6" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html
new file mode 100644
index 0000000..f8672b8
--- /dev/null
+++ b/doc/html/pcre2.html
@@ -0,0 +1,182 @@
+<html>
+<head>
+<title>pcre2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2 man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
+<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
+<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
+<li><a name="TOC4" href="#SEC4">AUTHOR</a>
+<li><a name="TOC5" href="#SEC5">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
+<P>
+PCRE2 is the name used for a revised API for the PCRE library, which is a set
+of functions, written in C, that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences. Some
+features that appeared in Python and the original PCRE before they appeared in
+Perl are also available using the Python syntax, there is some support for one
+or two .NET and Oniguruma syntax items, and there are options for requesting
+some minor changes that give better ECMAScript (aka JavaScript) compatibility.
+</P>
+<P>
+The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
+code units, which means that up to three separate libraries may be installed.
+The original work to extend PCRE to 16-bit and 32-bit code units was done by
+Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
+can be interpreted either as one character per code unit, or as UTF-encoded
+Unicode, with support for Unicode general category properties. Unicode is
+optional at build time, and must be enabled explicitly at run time. The version
+of Unicode in use can be discovered by running
+<pre>
+ pcre2test -C
+</PRE>
+</P>
+<P>
+The three libraries contain identical sets of functions, with names ending in
+_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
+by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
+one code unit width can be written using generic names such as
+<b>pcre2_compile()</b>, and the documentation is written assuming that this is
+the case.
+</P>
+<P>
+In addition to the Perl-compatible matching function, PCRE2 contains an
+alternative function that matches the same compiled patterns in a different
+way. In certain circumstances, the alternative function has some advantages.
+For a discussion of the two matching algorithms, see the
+<a href="pcre2matching.html"><b>pcre2matching</b></a>
+page.
+</P>
+<P>
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE2 are given in separate documents. See the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+and
+<a href="pcre2compat.html"><b>pcre2compat</b></a>
+pages. There is a syntax summary in the
+<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
+page.
+</P>
+<P>
+Some features of PCRE2 can be included, excluded, or changed when the library
+is built. The
+<a href="pcre2_config.html"><b>pcre2_config()</b></a>
+function makes it possible for a client to discover which features are
+available. The features themselves are described in the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+page. Documentation about building PCRE2 for various operating systems can be
+found in the
+<a href="README.txt"><b>README</b></a>
+and
+<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
+files in the source distribution.
+</P>
+<P>
+The libraries contains a number of undocumented internal functions and data
+tables that are used by more than one of the exported external functions, but
+which are not intended for use by external callers. Their names all begin with
+"_pcre2", which hopefully will not provoke any name clashes. In some
+environments, it is possible to control which external symbols are exported
+when a shared library is built, and in these cases the undocumented symbols are
+not exported.
+</P>
+<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
+<P>
+If you are using PCRE2 in a non-UTF application that permits users to supply
+arbitrary patterns for compilation, you should be aware of a feature that
+allows users to turn on UTF support from within a pattern, provided that PCRE2
+was built with Unicode support. For example, an 8-bit pattern that begins with
+"(*UTF)" turns on UTF-8 mode, which interprets patterns and subjects as strings
+of UTF-8 code units instead of individual 8-bit characters. This causes both
+the pattern and any data against which it is matched to be checked for UTF-8
+validity. If the data string is very long, such a check might use sufficiently
+many resources as to cause your application to lose performance.
+</P>
+<P>
+One way of guarding against this possibility is to use the
+<b>pcre2_pattern_info()</b> function to check the compiled pattern's options for
+UTF. Alternatively, you can set the PCRE2_NEVER_UTF option at compile time.
+This causes an compile time error if a pattern contains a UTF-setting sequence.
+</P>
+<P>
+If your application is one that supports UTF, be aware that validity checking
+can take time. If the same data string is to be matched many times, you can use
+the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
+running redundant checks.
+</P>
+<P>
+Another way that performance can be hit is by running a pattern that has a very
+large search tree against a string that will never match. Nested unlimited
+repeats in a pattern are a common example. PCRE2 provides some protection
+against this: see the <b>pcre2_set_match_limit()</b> function in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page.
+</P>
+<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
+<P>
+The user documentation for PCRE2 comprises a number of different sections. In
+the "man" format, each of these is a separate "man page". In the HTML format,
+each is a separate page, linked from the index page. In the plain text format,
+the descriptions of the <b>pcre2grep</b> and <b>pcre2test</b> programs are in
+files called <b>pcre2grep.txt</b> and <b>pcre2test.txt</b>, respectively. The
+remaining sections, except for the <b>pcre2demo</b> section (which is a program
+listing), and the short pages for individual functions, are concatenated in
+<b>pcre2.txt</b>, for ease of searching. The sections are as follows:
+<pre>
+ pcre2 this document FIXME CHECK THIS LIST
+ pcre2-config show PCRE2 installation configuration information
+ pcre2api details of PCRE2's native C API
+ pcre2build building PCRE2
+ pcre2callout details of the callout feature
+ pcre2compat discussion of Perl compatibility
+ pcre2demo a demonstration C program that uses PCRE2
+ pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
+ pcre2jit discussion of the just-in-time optimization support
+ pcre2limits details of size and other limits
+ pcre2matching discussion of the two matching algorithms
+ pcre2partial details of the partial matching facility
+ pcre2pattern syntax and semantics of supported regular expressions
+ pcre2perform discussion of performance issues
+ pcre2posix the POSIX-compatible C API for the 8-bit library
+ pcre2sample discussion of the pcre2demo program
+ pcre2stack discussion of stack usage
+ pcre2syntax quick syntax reference
+ pcre2test description of the <b>pcre2test</b> testing command
+ pcre2unicode discussion of Unicode and UTF support
+</pre>
+In the "man" and HTML formats, there is also a short page for each C library
+function, listing its arguments and results.
+</P>
+<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<P>
+Putting an actual email address here is a spam magnet. If you want to email me,
+use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+</P>
+<br><a name="SEC5" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
new file mode 100644
index 0000000..6528a38
--- /dev/null
+++ b/doc/html/pcre2grep.html
@@ -0,0 +1,759 @@
+<html>
+<head>
+<title>pcre2grep specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2grep man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
+<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
+<li><a name="TOC5" href="#SEC5">OPTIONS</a>
+<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
+<li><a name="TOC7" href="#SEC7">NEWLINES</a>
+<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
+<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
+<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
+<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
+<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
+<li><a name="TOC13" href="#SEC13">AUTHOR</a>
+<li><a name="TOC14" href="#SEC14">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>pcre2grep [options] [long options] [pattern] [path1 path2 ...]</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+<b>pcre2grep</b> searches files for character patterns, in the same way as other
+grep commands do, but it uses the PCRE2 regular expression library to support
+patterns that are compatible with the regular expressions of Perl 5. See
+<a href="pcre2syntax.html"><b>pcre2syntax</b>(3)</a>
+for a quick-reference summary of pattern syntax, or
+<a href="pcre2pattern.html"><b>pcre2pattern</b>(3)</a>
+for a full description of the syntax and semantics of the regular expressions
+that PCRE2 supports.
+</P>
+<P>
+Patterns, whether supplied on the command line or in a separate file, are given
+without delimiters. For example:
+<pre>
+ pcre2grep Thursday /etc/motd
+</pre>
+If you attempt to use delimiters (for example, by surrounding a pattern with
+slashes, as is common in Perl scripts), they are interpreted as part of the
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed quotes are required if a
+pattern contains white space or shell metacharacters.
+</P>
+<P>
+The first argument that follows any option settings is treated as the single
+pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
+Conversely, when one or both of these options are used to specify patterns, all
+arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
+argument pattern must be provided.
+</P>
+<P>
+If no files are specified, <b>pcre2grep</b> reads the standard input. The
+standard input can also be referenced by a name consisting of a single hyphen.
+For example:
+<pre>
+ pcre2grep some-pattern /file1 - /file3
+</pre>
+By default, each line that matches a pattern is copied to the standard
+output, and if there is more than one file, the file name is output at the
+start of each line, followed by a colon. However, there are options that can
+change how <b>pcre2grep</b> behaves. In particular, the <b>-M</b> option makes it
+possible to search for patterns that span line boundaries. What defines a line
+boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
+</P>
+<P>
+The amount of memory used for buffering files that are being scanned is
+controlled by a parameter that can be set by the <b>--buffer-size</b> option.
+The default value for this parameter is specified when <b>pcre2grep</b> is built,
+with the default default being 20K. A block of memory three times this size is
+used (to allow for buffering "before" and "after" lines). An error occurs if a
+line overflows the buffer.
+</P>
+<P>
+Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
+(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
+each line in the order in which they are defined, except that all the <b>-e</b>
+patterns are tried before the <b>-f</b> patterns.
+</P>
+<P>
+By default, as soon as one pattern matches a line, no further patterns are
+considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
+matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
+<b>--line-offsets</b> is used to output only the part of the line that matched
+(either shown literally, or as an offset), scanning resumes immediately
+following the match, so that further matches on the same line can be found. If
+there are multiple patterns, they are all tried on the remainder of the line,
+but patterns that follow the one that matched are not tried on the earlier part
+of the line.
+</P>
+<P>
+This behaviour means that the order in which multiple patterns are specified
+can affect the output when one of the above options is used. This is no longer
+the same behaviour as GNU grep, which now manages to display earlier matches
+for later patterns (as long as there is no overlap).
+</P>
+<P>
+Patterns that can match an empty string are accepted, but empty string
+matches are never recognized. An example is the pattern "(super)?(man)?", in
+which all components are optional. This pattern finds all occurrences of both
+"super" and "man"; the output differs from matching with "super|man" when only
+the matching substrings are being shown.
+</P>
+<P>
+If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
+<b>pcre2grep</b> uses the value to set a locale when calling the PCRE2 library.
+The <b>--locale</b> option can be used to override this.
+</P>
+<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
+<P>
+It is possible to compile <b>pcre2grep</b> so that it uses <b>libz</b> or
+<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
+respectively. You can find out whether your binary has support for one or both
+of these file types by running it with the <b>--help</b> option. If the
+appropriate support is not present, files are treated as plain text. The
+standard input is always so treated.
+</P>
+<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
+<P>
+By default, a file that contains a binary zero byte within the first 1024 bytes
+is identified as a binary file, and is processed specially. (GNU grep also
+identifies binary files in this manner.) See the <b>--binary-files</b> option
+for a means of changing the way binary files are handled.
+</P>
+<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
+<P>
+The order in which some of the options appear can affect the output. For
+example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
+names. Whichever comes later in the command line will be the one that takes
+effect. Similarly, except where noted below, if an option is given twice, the
+later setting is used. Numerical values for options may be followed by K or M,
+to signify multiplication by 1024 or 1024*1024 respectively.
+</P>
+<P>
+<b>--</b>
+This terminates the list of options. It is useful if the next item on the
+command line starts with a hyphen but is not an option. This allows for the
+processing of patterns and filenames that start with hyphens.
+</P>
+<P>
+<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
+Output <i>number</i> lines of context after each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
+guarantees to have up to 8K of following text available for context output.
+</P>
+<P>
+<b>-a</b>, <b>--text</b>
+Treat binary files as text. This is equivalent to
+<b>--binary-files</b>=<i>text</i>.
+</P>
+<P>
+<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
+Output <i>number</i> lines of context before each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
+guarantees to have up to 8K of preceding text available for context output.
+</P>
+<P>
+<b>--binary-files=</b><i>word</i>
+Specify how binary files are to be processed. If the word is "binary" (the
+default), pattern matching is performed on binary files, but the only output is
+"Binary file &#60;name&#62; matches" when a match succeeds. If the word is "text",
+which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
+processed in the same way as any other file. In this case, when a match
+succeeds, the output may be binary garbage, which can have nasty effects if
+sent to a terminal. If the word is "without-match", which is equivalent to the
+<b>-I</b> option, binary files are not processed at all; they are assumed not to
+be of interest.
+</P>
+<P>
+<b>--buffer-size=</b><i>number</i>
+Set the parameter that controls how much memory is used for buffering files
+that are being scanned.
+</P>
+<P>
+<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
+Output <i>number</i> lines of context both before and after each matching line.
+This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
+</P>
+<P>
+<b>-c</b>, <b>--count</b>
+Do not output individual lines from the files that are being scanned; instead
+output the number of lines that would otherwise have been shown. If no lines
+are selected, the number zero is output. If several files are are being
+scanned, a count is output for each of them. However, if the
+<b>--files-with-matches</b> option is also used, only those files whose counts
+are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
+<b>-B</b>, and <b>-C</b> options are ignored.
+</P>
+<P>
+<b>--colour</b>, <b>--color</b>
+If this option is given without any data, it is equivalent to "--colour=auto".
+If data is required, it must be given in the same shell item, separated by an
+equals sign.
+</P>
+<P>
+<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
+This option specifies under what circumstances the parts of a line that matched
+a pattern should be coloured in the output. By default, the output is not
+coloured. The value (which is optional, see above) may be "never", "always", or
+"auto". In the latter case, colouring happens only if the standard output is
+connected to a terminal. More resources are used when colouring is enabled,
+because <b>pcre2grep</b> has to search for all possible matches in a line, not
+just one, in order to colour them all.
+<br>
+<br>
+The colour that is used can be specified by setting the environment variable
+PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
+</P>
+<P>
+<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
+If an input path is not a regular file or a directory, "action" specifies how
+it is to be processed. Valid values are "read" (the default) or "skip"
+(silently skip the path).
+</P>
+<P>
+<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
+If an input path is a directory, "action" specifies how it is to be processed.
+Valid values are "read" (the default in non-Windows environments, for
+compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
+"skip" (silently skip the path, the default in Windows environments). In the
+"read" case, directories are read as if they were ordinary files. In some
+operating systems the effect of reading a directory like this is an immediate
+end-of-file; in others it may provoke an error.
+</P>
+<P>
+<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is no limit to the number of patterns. They are applied to each
+line in the order in which they are defined until one matches.
+<br>
+<br>
+If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
+followed by the patterns from the file(s), independent of the order in which
+these options are specified. Note that multiple use of <b>-e</b> is not the same
+as a single pattern with alternatives. For example, X|Y finds the first
+character in a line that is X or Y, whereas if the two patterns are given
+separately, with X first, <b>pcre2grep</b> finds X if it is present, even if it
+follows Y in the line. It finds Y only if there is no X in the line. This
+matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
+of the line that matched.
+</P>
+<P>
+<b>--exclude</b>=<i>pattern</i>
+Files (but not directories) whose names match the pattern are skipped without
+being processed. This applies to all files, whether listed on the command line,
+obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
+PCRE2 regular expression, and is matched against the final component of the file
+name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
+apply to this pattern. The option may be given any number of times in order to
+specify multiple patterns. If a file name matches both an <b>--include</b>
+and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
+option.
+</P>
+<P>
+<b>--exclude-from=</b><i>filename</i>
+Treat each non-empty line of the file as the data for an <b>--exclude</b>
+option. What constitutes a newline when reading the file is the operating
+system's default. The <b>--newline</b> option has no effect on this option. This
+option may be given more than once in order to specify a number of files to
+read.
+</P>
+<P>
+<b>--exclude-dir</b>=<i>pattern</i>
+Directories whose names match the pattern are skipped without being processed,
+whatever the setting of the <b>--recursive</b> option. This applies to all
+directories, whether listed on the command line, obtained from
+<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE2
+regular expression, and is matched against the final component of the directory
+name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
+apply to this pattern. The option may be given any number of times in order to
+specify more than one pattern. If a directory matches both <b>--include-dir</b>
+and <b>--exclude-dir</b>, it is excluded. There is no short form for this
+option.
+</P>
+<P>
+<b>-F</b>, <b>--fixed-strings</b>
+Interpret each data-matching pattern as a list of fixed strings, separated by
+newlines, instead of as a regular expression. What constitutes a newline for
+this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
+as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
+They apply to each of the fixed strings. A line is selected if any of the fixed
+strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the <b>--include</b> or
+<b>--exclude</b> options.
+</P>
+<P>
+<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
+Read patterns from the file, one per line, and match them against
+each line of input. What constitutes a newline when reading the file is the
+operating system's default. The <b>--newline</b> option has no effect on this
+option. Trailing white space is removed from each line, and blank lines are
+ignored. An empty file contains no patterns and therefore matches nothing. See
+also the comments about multiple patterns versus a single pattern with
+alternatives in the description of <b>-e</b> above.
+<br>
+<br>
+If this option is given more than once, all the specified files are
+read. A data line is output if any of the patterns match it. A filename can
+be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
+specified on the command line using <b>-e</b> may also be present; they are
+tested before the file's patterns. However, no other pattern is taken from the
+command line; all arguments are treated as the names of paths to be searched.
+</P>
+<P>
+<b>--file-list</b>=<i>filename</i>
+Read a list of files and/or directories that are to be scanned from the given
+file, one per line. Trailing white space is removed from each line, and blank
+lines are ignored. These paths are processed before any that are listed on the
+command line. The filename can be given as "-" to refer to the standard input.
+If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
+read first. This is useful only when the standard input is a terminal, from
+which further lines (the list of files) can be read after an end-of-file
+indication. If this option is given more than once, all the specified files are
+read.
+</P>
+<P>
+<b>--file-offsets</b>
+Instead of showing lines or parts of lines that match, show each match as an
+offset from the start of the file and a length, separated by a comma. In this
+mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with <b>--line-offsets</b>
+and <b>--only-matching</b>.
+</P>
+<P>
+<b>-H</b>, <b>--with-filename</b>
+Force the inclusion of the filename at the start of output lines when searching
+a single file. By default, the filename is not shown in this case. For matching
+lines, the filename is followed by a colon; for context lines, a hyphen
+separator is used. If a line number is also being output, it follows the file
+name.
+</P>
+<P>
+<b>-h</b>, <b>--no-filename</b>
+Suppress the output filenames when searching multiple files. By default,
+filenames are shown when multiple files are searched. For matching lines, the
+filename is followed by a colon; for context lines, a hyphen separator is used.
+If a line number is also being output, it follows the file name.
+</P>
+<P>
+<b>--help</b>
+Output a help message, giving brief details of the command options and file
+type support, and then exit. Anything else on the command line is
+ignored.
+</P>
+<P>
+<b>-I</b>
+Treat binary files as never matching. This is equivalent to
+<b>--binary-files</b>=<i>without-match</i>.
+</P>
+<P>
+<b>-i</b>, <b>--ignore-case</b>
+Ignore upper/lower case distinctions during comparisons.
+</P>
+<P>
+<b>--include</b>=<i>pattern</i>
+If any <b>--include</b> patterns are specified, the only files that are
+processed are those that match one of the patterns (and do not match an
+<b>--exclude</b> pattern). This option does not affect directories, but it
+applies to all files, whether listed on the command line, obtained from
+<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
+expression, and is matched against the final component of the file name, not
+the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
+this pattern. The option may be given any number of times. If a file name
+matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
+There is no short form for this option.
+</P>
+<P>
+<b>--include-from=</b><i>filename</i>
+Treat each non-empty line of the file as the data for an <b>--include</b>
+option. What constitutes a newline for this purpose is the operating system's
+default. The <b>--newline</b> option has no effect on this option. This option
+may be given any number of times; all the files are read.
+</P>
+<P>
+<b>--include-dir</b>=<i>pattern</i>
+If any <b>--include-dir</b> patterns are specified, the only directories that
+are processed are those that match one of the patterns (and do not match an
+<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+on the command line, obtained from <b>--file-list</b>, or by scanning a parent
+directory. The pattern is a PCRE2 regular expression, and is matched against the
+final component of the directory name, not the entire path. The <b>-F</b>,
+<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
+given any number of times. If a directory matches both <b>--include-dir</b> and
+<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
+</P>
+<P>
+<b>-L</b>, <b>--files-without-match</b>
+Instead of outputting lines from the files, just output the names of the files
+that do not contain any lines that would have been output. Each file name is
+output once, on a separate line.
+</P>
+<P>
+<b>-l</b>, <b>--files-with-matches</b>
+Instead of outputting lines from the files, just output the names of the files
+containing lines that would have been output. Each file name is output
+once, on a separate line. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches.
+</P>
+<P>
+<b>--label</b>=<i>name</i>
+This option supplies a name to be used for the standard input when file names
+are being output. If not supplied, "(standard input)" is used. There is no
+short form for this option.
+</P>
+<P>
+<b>--line-buffered</b>
+When this option is given, input is read and processed line by line, and the
+output is flushed after each write. By default, input is read in large chunks,
+unless <b>pcre2grep</b> can determine that it is reading from a terminal (which
+is currently possible only in Unix-like environments). Output to terminal is
+normally automatically flushed by the operating system. This option can be
+useful when the input or output is attached to a pipe and you do not want
+<b>pcre2grep</b> to buffer up large amounts of data. However, its use will affect
+performance, and the <b>-M</b> (multiline) option ceases to work.
+</P>
+<P>
+<b>--line-offsets</b>
+Instead of showing lines or parts of lines that match, show each match as a
+line number, the offset from the start of the line, and a length. The line
+number is terminated by a colon (as usual; see the <b>-n</b> option), and the
+offset and length are separated by a comma. In this mode, no context is shown.
+That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
+more than one match in a line, each of them is shown separately. This option is
+mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
+</P>
+<P>
+<b>--locale</b>=<i>locale-name</i>
+This option specifies a locale to be used for pattern matching. It overrides
+the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
+locale is specified, the PCRE2 library's default (usually the "C" locale) is
+used. There is no short form for this option.
+</P>
+<P>
+<b>--match-limit</b>=<i>number</i>
+Processing some regular expression patterns can require a very large amount of
+memory, leading in some cases to a program crash if not enough is available.
+Other patterns may take a very long time to search for all possible matching
+strings. The <b>pcre2_exec()</b> function that is called by <b>pcre2grep</b> to do
+the matching has two parameters that can limit the resources that it uses.
+<br>
+<br>
+The <b>--match-limit</b> option provides a means of limiting resource usage
+when processing patterns that are not going to match, but which have a very
+large number of possibilities in their search trees. The classic example is a
+pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
+called <b>match()</b> which it calls repeatedly (sometimes recursively). The
+limit set by <b>--match-limit</b> is imposed on the number of times this
+function is called during a match, which has the effect of limiting the amount
+of backtracking that can take place.
+<br>
+<br>
+The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
+instead of limiting the total number of times that <b>match()</b> is called, it
+limits the depth of recursive calls, which in turn limits the amount of memory
+that can be used. The recursion depth is a smaller number than the total number
+of calls, because not all calls to <b>match()</b> are recursive. This limit is
+of use only if it is set smaller than <b>--match-limit</b>.
+<br>
+<br>
+There are no short forms for these options. The default settings are specified
+when the PCRE2 library is compiled, with the default default being 10 million.
+</P>
+<P>
+<b>-M</b>, <b>--multiline</b>
+Allow patterns to match more than one line. When this option is given, patterns
+may usefully contain literal newline characters and internal occurrences of ^
+and $ characters. The output for a successful match may consist of more than
+one line, the last of which is the one in which the match ended. If the matched
+string ends with a newline sequence the output ends at the end of that line.
+<br>
+<br>
+When this option is set, the PCRE2 library is called in "multiline" mode.
+There is a limit to the number of lines that can be matched, imposed by the way
+that <b>pcre2grep</b> buffers the input file as it scans it. However,
+<b>pcre2grep</b> ensures that at least 8K characters or the rest of the document
+(whichever is the shorter) are available for forward matching, and similarly
+the previous 8K characters (or all the previous characters, if fewer than 8K)
+are guaranteed to be available for lookbehind assertions. This option does not
+work when input is read line by line (see \fP--line-buffered\fP.)
+</P>
+<P>
+<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
+The PCRE2 library supports five different conventions for indicating
+the ends of lines. They are the single-character sequences CR (carriage return)
+and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
+which recognizes any of the preceding three types, and an "any" convention, in
+which any Unicode line ending sequence is assumed to end a line. The Unicode
+sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
+(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
+PS (paragraph separator, U+2029).
+<br>
+<br>
+When the PCRE2 library is built, a default line-ending sequence is specified.
+This is normally the standard sequence for the operating system. Unless
+otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
+The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
+makes it possible to use <b>pcre2grep</b> to scan files that have come from other
+environments without having to modify their line endings. If the data that is
+being scanned does not agree with the convention set by this option,
+<b>pcre2grep</b> may behave in strange ways. Note that this option does not
+apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
+<b>--include-from</b> options, which are expected to use the operating system's
+standard newline sequence.
+</P>
+<P>
+<b>-n</b>, <b>--line-number</b>
+Precede each output line by its line number in the file, followed by a colon
+for matching lines or a hyphen for context lines. If the filename is also being
+output, it precedes the line number. This option is forced if
+<b>--line-offsets</b> is used.
+</P>
+<P>
+<b>--no-jit</b>
+If the PCRE2 library is built with support for just-in-time compiling (which
+speeds up matching), <b>pcre2grep</b> automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+</P>
+<P>
+<b>-o</b>, <b>--only-matching</b>
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
+<b>-C</b> options are ignored. If there is more than one match in a line, each
+of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
+sense of the match to find non-matching lines), no output is generated, but the
+return code is set appropriately. If the matched portion of the line is empty,
+nothing is output unless the file name or line number are being printed, in
+which case they are shown on an otherwise empty line. This option is mutually
+exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
+</P>
+<P>
+<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 32 capturing parentheses are supported, and -o0 is
+equivalent to <b>-o</b> without a number. Because these options can be given
+without an argument (see above), if an argument is present, it must be given in
+the same shell item, for example, -o3 or --only-matching=2. The comments given
+for the non-argument case above also apply to this case. If the specified
+capturing parentheses do not exist in the pattern, or were not set in the
+match, nothing is output unless the file name or line number are being printed.
+<br>
+<br>
+If this option is given multiple times, multiple substrings are output, in the
+order the options are given. For example, -o3 -o1 -o3 causes the substrings
+matched by capturing parentheses 3 and 1 and then 3 again to be output. By
+default, there is no separator (but see the next option).
+</P>
+<P>
+<b>--om-separator</b>=<i>text</i>
+Specify a separating string for multiple occurrences of <b>-o</b>. The default
+is an empty string. Separating strings are never coloured.
+</P>
+<P>
+<b>-q</b>, <b>--quiet</b>
+Work quietly, that is, display nothing except error messages. The exit
+status indicates whether or not any matches were found.
+</P>
+<P>
+<b>-r</b>, <b>--recursive</b>
+If any given path is a directory, recursively scan the files it contains,
+taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
+directory is read as a normal file; in some operating systems this gives an
+immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
+option to "recurse".
+</P>
+<P>
+<b>--recursion-limit</b>=<i>number</i>
+See <b>--match-limit</b> above.
+</P>
+<P>
+<b>-s</b>, <b>--no-messages</b>
+Suppress error messages about non-existent or unreadable files. Such files are
+quietly skipped. However, the return code is still 2, even if matches were
+found in other files.
+</P>
+<P>
+<b>-u</b>, <b>--utf-8</b>
+Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
+with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
+<b>--include</b> options) and all subject lines that are scanned must be valid
+strings of UTF-8 characters.
+</P>
+<P>
+<b>-V</b>, <b>--version</b>
+Write the version numbers of <b>pcre2grep</b> and the PCRE2 library to the
+standard output and then exit. Anything else on the command line is
+ignored.
+</P>
+<P>
+<b>-v</b>, <b>--invert-match</b>
+Invert the sense of the match, so that lines which do <i>not</i> match any of
+the patterns are the ones that are found.
+</P>
+<P>
+<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
+Force the patterns to match only whole words. This is equivalent to having \b
+at the start and end of the pattern. This option applies only to the patterns
+that are matched against the contents of files; it does not apply to patterns
+specified by any of the <b>--include</b> or <b>--exclude</b> options.
+</P>
+<P>
+<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
+Force the patterns to be anchored (each must start matching at the beginning of
+a line) and in addition, require them to match entire lines. This is equivalent
+to having ^ and $ characters at the start and end of each alternative branch in
+every pattern. This option applies only to the patterns that are matched
+against the contents of files; it does not apply to patterns specified by any
+of the <b>--include</b> or <b>--exclude</b> options.
+</P>
+<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
+<P>
+The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
+order, for a locale. The first one that is set is used. This can be overridden
+by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
+(usually the "C" locale) is used.
+</P>
+<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
+<P>
+The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
+different newline conventions from the default. Any parts of the input files
+that are written to the standard output are copied identically, with whatever
+newline sequences they have in the input. However, the setting of this option
+does not affect the interpretation of files specified by the <b>-f</b>,
+<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
+the operating system's standard newline sequence, nor does it affect the way in
+which <b>pcre2grep</b> writes informational messages to the standard error and
+output streams. For these it uses the string "\n" to indicate newlines,
+relying on the C I/O library to convert this to an appropriate sequence.
+</P>
+<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
+<P>
+Many of the short and long forms of <b>pcre2grep</b>'s options are the same
+as in the GNU <b>grep</b> program. Any long option of the form
+<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
+(PCRE2 terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
+<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
+<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
+<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
+<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
+capturing parentheses number.
+</P>
+<P>
+Although most of the common options work the same way, a few are different in
+<b>pcre2grep</b>. For example, the <b>--include</b> option's argument is a glob
+for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
+<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
+without counts, but <b>pcre2grep</b> gives the counts.
+</P>
+<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
+<P>
+There are four different ways in which an option with data can be specified.
+If a short form option is used, the data may follow immediately, or (with one
+exception) in the next command line item. For example:
+<pre>
+ -f/some/file
+ -f /some/file
+</pre>
+The exception is the <b>-o</b> option, which may appear with or without data.
+Because of this, if data is present, it must follow immediately in the same
+item, for example -o3.
+</P>
+<P>
+If a long form option is used, the data may appear in the same command line
+item, separated by an equals character, or (with two exceptions) it may appear
+in the next command line item. For example:
+<pre>
+ --file=/some/file
+ --file /some/file
+</pre>
+Note, however, that if you want to supply a file name beginning with ~ as data
+in a shell command, and have the shell expand ~ to a home directory, you must
+separate the file name from the option, because the shell does not treat ~
+specially unless it is at the start of an item.
+</P>
+<P>
+The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
+<b>--only-matching</b> options, for which the data is optional. If one of these
+options does have data, it must be given in the first form, using an equals
+character. Otherwise <b>pcre2grep</b> will assume that it has no data.
+</P>
+<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
+<P>
+It is possible to supply a regular expression that takes a very long time to
+fail to match certain lines. Such patterns normally involve nested indefinite
+repeats, for example: (a+)*\d when matched against a line of a's with no final
+digit. The PCRE2 matching function has a resource limit that causes it to abort
+in these circumstances. If this happens, <b>pcre2grep</b> outputs an error
+message and the line that caused the problem to the standard error stream. If
+there are more than 20 such errors, <b>pcre2grep</b> gives up.
+</P>
+<P>
+The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the overall
+resource limit; there is a second option called <b>--recursion-limit</b> that
+sets a limit on the amount of memory (usually stack) that is used (see the
+discussion of these options above).
+</P>
+<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
+<P>
+Exit status is 0 if any matches were found, 1 if no matches were found, and 2
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+<b>-s</b> option to suppress error messages about inaccessible files does not
+affect the return code.
+</P>
+<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2test</b>(1).
+</P>
+<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC14" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 28 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/pcre2-config.1 b/doc/pcre2-config.1
new file mode 100644
index 0000000..7fa0a09
--- /dev/null
+++ b/doc/pcre2-config.1
@@ -0,0 +1,86 @@
+.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+pcre2-config - program to return PCRE2 configuration
+.SH SYNOPSIS
+.rs
+.sp
+.nf
+.B pcre2-config [--prefix] [--exec-prefix] [--version]
+.B " [--libs8] [--libs16] [--libs32] [--libs-posix]"
+.B " [--cflags] [--cflags-posix]"
+.fi
+.
+.
+.SH DESCRIPTION
+.rs
+.sp
+\fBpcre2-config\fP returns the configuration of the installed PCRE2 libraries
+and the options required to compile a program to use them. Some of the options
+apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
+not available for libraries that have not been built. If an unavailable option
+is encountered, the "usage" information is output.
+.
+.
+.SH OPTIONS
+.rs
+.TP 10
+\fB--prefix\fP
+Writes the directory prefix used in the PCRE2 installation for architecture
+independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some
+systems) to the standard output.
+.TP 10
+\fB--exec-prefix\fP
+Writes the directory prefix used in the PCRE2 installation for architecture
+dependent files (normally the same as \fB--prefix\fP) to the standard output.
+.TP 10
+\fB--version\fP
+Writes the version number of the installed PCRE2 libraries to the standard
+output.
+.TP 10
+\fB--libs8\fP
+Writes to the standard output the command line options required to link
+with the 8-bit PCRE2 library (\fB-lpcre2-8\fP on many systems).
+.TP 10
+\fB--libs16\fP
+Writes to the standard output the command line options required to link
+with the 16-bit PCRE2 library (\fB-lpcre2-16\fP on many systems).
+.TP 10
+\fB--libs32\fP
+Writes to the standard output the command line options required to link
+with the 32-bit PCRE2 library (\fB-lpcre2-32\fP on many systems).
+.TP 10
+\fB--libs-posix\fP
+Writes to the standard output the command line options required to link with
+PCRE2's POSIX API wrapper library (\fB-lpcre2-posix\fP \fB-lpcre2-8\fP on many
+systems).
+.TP 10
+\fB--cflags\fP
+Writes to the standard output the command line options required to compile
+files that use PCRE2 (this may include some \fB-I\fP options, but is blank on
+many systems).
+.TP 10
+\fB--cflags-posix\fP
+Writes to the standard output the command line options required to compile
+files that use PCRE2's POSIX API wrapper library (this may include some
+\fB-I\fP options, but is blank on many systems).
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2(3)\fP
+.
+.
+.SH AUTHOR
+.rs
+.sp
+This manual page was originally written by Mark Baker for the Debian GNU/Linux
+system. It has been subsequently revised as a generic PCRE2 man page.
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+.fi
diff --git a/doc/pcre2-config.txt b/doc/pcre2-config.txt
new file mode 100644
index 0000000..8ddea2a
--- /dev/null
+++ b/doc/pcre2-config.txt
@@ -0,0 +1,81 @@
+PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1)
+
+
+
+NAME
+ pcre2-config - program to return PCRE2 configuration
+
+SYNOPSIS
+
+ pcre2-config [--prefix] [--exec-prefix] [--version]
+ [--libs8] [--libs16] [--libs32] [--libs-posix]
+ [--cflags] [--cflags-posix]
+
+
+DESCRIPTION
+
+ pcre2-config returns the configuration of the installed PCRE2 libraries
+ and the options required to compile a program to use them. Some of the
+ options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
+ respectively, and are not available for libraries that have not been
+ built. If an unavailable option is encountered, the "usage" information
+ is output.
+
+
+OPTIONS
+
+ --prefix Writes the directory prefix used in the PCRE2 installation
+ for architecture independent files (/usr on many systems,
+ /usr/local on some systems) to the standard output.
+
+ --exec-prefix
+ Writes the directory prefix used in the PCRE2 installation
+ for architecture dependent files (normally the same as --pre-
+ fix) to the standard output.
+
+ --version Writes the version number of the installed PCRE2 libraries to
+ the standard output.
+
+ --libs8 Writes to the standard output the command line options
+ required to link with the 8-bit PCRE2 library (-lpcre2-8 on
+ many systems).
+
+ --libs16 Writes to the standard output the command line options
+ required to link with the 16-bit PCRE2 library (-lpcre2-16 on
+ many systems).
+
+ --libs32 Writes to the standard output the command line options
+ required to link with the 32-bit PCRE2 library (-lpcre2-32 on
+ many systems).
+
+ --libs-posix
+ Writes to the standard output the command line options
+ required to link with PCRE2's POSIX API wrapper library
+ (-lpcre2-posix -lpcre2-8 on many systems).
+
+ --cflags Writes to the standard output the command line options
+ required to compile files that use PCRE2 (this may include
+ some -I options, but is blank on many systems).
+
+ --cflags-posix
+ Writes to the standard output the command line options
+ required to compile files that use PCRE2's POSIX API wrapper
+ library (this may include some -I options, but is blank on
+ many systems).
+
+
+SEE ALSO
+
+ pcre2(3)
+
+
+AUTHOR
+
+ This manual page was originally written by Mark Baker for the Debian
+ GNU/Linux system. It has been subsequently revised as a generic PCRE2
+ man page.
+
+
+REVISION
+
+ Last updated: 28 September 2014
diff --git a/doc/pcre2.3 b/doc/pcre2.3
new file mode 100644
index 0000000..aaa71d3
--- /dev/null
+++ b/doc/pcre2.3
@@ -0,0 +1,180 @@
+.TH PCRE2 3 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH INTRODUCTION
+.rs
+.sp
+PCRE2 is the name used for a revised API for the PCRE library, which is a set
+of functions, written in C, that implement regular expression pattern matching
+using the same syntax and semantics as Perl, with just a few differences. Some
+features that appeared in Python and the original PCRE before they appeared in
+Perl are also available using the Python syntax, there is some support for one
+or two .NET and Oniguruma syntax items, and there are options for requesting
+some minor changes that give better ECMAScript (aka JavaScript) compatibility.
+.P
+The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
+code units, which means that up to three separate libraries may be installed.
+The original work to extend PCRE to 16-bit and 32-bit code units was done by
+Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
+can be interpreted either as one character per code unit, or as UTF-encoded
+Unicode, with support for Unicode general category properties. Unicode is
+optional at build time, and must be enabled explicitly at run time. The version
+of Unicode in use can be discovered by running
+.sp
+ pcre2test -C
+.P
+The three libraries contain identical sets of functions, with names ending in
+_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
+by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
+one code unit width can be written using generic names such as
+\fBpcre2_compile()\fP, and the documentation is written assuming that this is
+the case.
+.P
+In addition to the Perl-compatible matching function, PCRE2 contains an
+alternative function that matches the same compiled patterns in a different
+way. In certain circumstances, the alternative function has some advantages.
+For a discussion of the two matching algorithms, see the
+.\" HREF
+\fBpcre2matching\fP
+.\"
+page.
+.P
+Details of exactly which Perl regular expression features are and are not
+supported by PCRE2 are given in separate documents. See the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+and
+.\" HREF
+\fBpcre2compat\fP
+.\"
+pages. There is a syntax summary in the
+.\" HREF
+\fBpcre2syntax\fP
+.\"
+page.
+.P
+Some features of PCRE2 can be included, excluded, or changed when the library
+is built. The
+.\" HREF
+\fBpcre2_config()\fP
+.\"
+function makes it possible for a client to discover which features are
+available. The features themselves are described in the
+.\" HREF
+\fBpcre2build\fP
+.\"
+page. Documentation about building PCRE2 for various operating systems can be
+found in the
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+and
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS_BUILD\fP
+.\"
+files in the source distribution.
+.P
+The libraries contains a number of undocumented internal functions and data
+tables that are used by more than one of the exported external functions, but
+which are not intended for use by external callers. Their names all begin with
+"_pcre2", which hopefully will not provoke any name clashes. In some
+environments, it is possible to control which external symbols are exported
+when a shared library is built, and in these cases the undocumented symbols are
+not exported.
+.
+.
+.SH "SECURITY CONSIDERATIONS"
+.rs
+.sp
+If you are using PCRE2 in a non-UTF application that permits users to supply
+arbitrary patterns for compilation, you should be aware of a feature that
+allows users to turn on UTF support from within a pattern, provided that PCRE2
+was built with Unicode support. For example, an 8-bit pattern that begins with
+"(*UTF)" turns on UTF-8 mode, which interprets patterns and subjects as strings
+of UTF-8 code units instead of individual 8-bit characters. This causes both
+the pattern and any data against which it is matched to be checked for UTF-8
+validity. If the data string is very long, such a check might use sufficiently
+many resources as to cause your application to lose performance.
+.P
+One way of guarding against this possibility is to use the
+\fBpcre2_pattern_info()\fP function to check the compiled pattern's options for
+UTF. Alternatively, you can set the PCRE2_NEVER_UTF option at compile time.
+This causes an compile time error if a pattern contains a UTF-setting sequence.
+.P
+If your application is one that supports UTF, be aware that validity checking
+can take time. If the same data string is to be matched many times, you can use
+the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
+running redundant checks.
+.P
+Another way that performance can be hit is by running a pattern that has a very
+large search tree against a string that will never match. Nested unlimited
+repeats in a pattern are a common example. PCRE2 provides some protection
+against this: see the \fBpcre2_set_match_limit()\fP function in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page.
+.
+.
+.SH "USER DOCUMENTATION"
+.rs
+.sp
+The user documentation for PCRE2 comprises a number of different sections. In
+the "man" format, each of these is a separate "man page". In the HTML format,
+each is a separate page, linked from the index page. In the plain text format,
+the descriptions of the \fBpcre2grep\fP and \fBpcre2test\fP programs are in
+files called \fBpcre2grep.txt\fP and \fBpcre2test.txt\fP, respectively. The
+remaining sections, except for the \fBpcre2demo\fP section (which is a program
+listing), and the short pages for individual functions, are concatenated in
+\fBpcre2.txt\fP, for ease of searching. The sections are as follows:
+.sp
+ pcre2 this document FIXME CHECK THIS LIST
+ pcre2-config show PCRE2 installation configuration information
+ pcre2api details of PCRE2's native C API
+ pcre2build building PCRE2
+ pcre2callout details of the callout feature
+ pcre2compat discussion of Perl compatibility
+ pcre2demo a demonstration C program that uses PCRE2
+ pcre2grep description of the \fBpcre2grep\fP command (8-bit only)
+ pcre2jit discussion of the just-in-time optimization support
+ pcre2limits details of size and other limits
+ pcre2matching discussion of the two matching algorithms
+ pcre2partial details of the partial matching facility
+.\" JOIN
+ pcre2pattern syntax and semantics of supported
+ regular expressions
+ pcre2perform discussion of performance issues
+ pcre2posix the POSIX-compatible C API for the 8-bit library
+ pcre2sample discussion of the pcre2demo program
+ pcre2stack discussion of stack usage
+ pcre2syntax quick syntax reference
+ pcre2test description of the \fBpcre2test\fP testing command
+ pcre2unicode discussion of Unicode and UTF support
+.sp
+In the "man" and HTML formats, there is also a short page for each C library
+function, listing its arguments and results.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.P
+Putting an actual email address here is a spam magnet. If you want to email me,
+use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2build.3 b/doc/pcre2build.3
new file mode 100644
index 0000000..2146777
--- /dev/null
+++ b/doc/pcre2build.3
@@ -0,0 +1,490 @@
+.TH PCRE2BUILD 3 "28 Sepember 2014" "PCRE2 10.00"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.
+.
+.SH "BUILDING PCRE2"
+.rs
+.sp
+PCRE2 is distributed with a \fBconfigure\fP script that can be used to build
+the library in Unix-like environments using the applications known as
+Autotools. Also in the distribution are files to support building using
+\fBCMake\fP instead of \fBconfigure\fP. The text file
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+contains general information about building with Autotools (some of which is
+repeated below), and also has some comments about building on various operating
+systems. There is a lot more information about building PCRE2 without using
+Autotools (including information about using \fBCMake\fP and building "by
+hand") in the text file called
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS-BUILD\fP.
+.\"
+You should consult this file as well as the
+.\" HTML <a href="README.txt">
+.\" </a>
+\fBREADME\fP
+.\"
+file if you are building in a non-Unix-like environment.
+.
+.
+.SH "PCRE2 BUILD-TIME OPTIONS"
+.rs
+.sp
+The rest of this document describes the optional features of PCRE2 that can be
+selected when the library is compiled. It assumes use of the \fBconfigure\fP
+script, where the optional features are selected or deselected by providing
+options to \fBconfigure\fP before running the \fBmake\fP command. However, the
+same options can be selected in both Unix-like and non-Unix-like environments
+if you are using \fBCMake\fP instead of \fBconfigure\fP to build PCRE2.
+.P
+If you are not using Autotools or \fBCMake\fP, option selection can be done by
+editing the \fBconfig.h\fP file, or by passing parameter settings to the
+compiler, as described in
+.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
+.\" </a>
+\fBNON-AUTOTOOLS-BUILD\fP.
+.\"
+.P
+The complete list of options for \fBconfigure\fP (which includes the standard
+ones such as the selection of the installation directory) can be obtained by
+running
+.sp
+ ./configure --help
+.sp
+The following sections include descriptions of options whose names begin with
+--enable or --disable. These settings specify changes to the defaults for the
+\fBconfigure\fP command. Because of the way that \fBconfigure\fP works,
+--enable and --disable always come in pairs, so the complementary option always
+exists as well, but as it specifies the default, it is not described.
+.
+.
+.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
+.rs
+.sp
+By default, a library called \fBlibpcre2-8\fP is built, containing functions
+that take string arguments contained in vectors of bytes, interpreted either as
+single-byte characters, or UTF-8 strings. You can also build two other
+libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
+strings that are contained in vectors of 16-bit and 32-bit code units,
+respectively. These can be interpreted either as single-unit characters or
+UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
+the following to the \fBconfigure\fP command:
+.sp
+ --enable-pcre16
+ --enable-pcre32
+.sp
+If you do not want the 8-bit library, add
+.sp
+ --disable-pcre8
+.sp
+as well. At least one of the three libraries must be built. Note that the POSIX
+wrapper is for the 8-bit library only, and that \fBpcre2grep\fP is an 8-bit
+program. Neither of these are built if you select only the 16-bit or 32-bit
+libraries.
+.
+.
+.SH "BUILDING SHARED AND STATIC LIBRARIES"
+.rs
+.sp
+The Autotools PCRE2 building process uses \fBlibtool\fP to build both shared
+and static libraries by default. You can suppress one of these by adding one of
+.sp
+ --disable-shared
+ --disable-static
+.sp
+to the \fBconfigure\fP command, as required.
+.
+.
+.SH "Unicode and UTF SUPPORT"
+.rs
+.sp
+To build PCRE2 with support for Unicode and UTF character strings, add
+.sp
+ --enable-unicode
+.sp
+to the \fBconfigure\fP command. This setting applies to all three libraries,
+adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
+library, and support for UTF-32 to the to the 32-bit library.
+It is not possible to build one library with
+UTF support and another without in the same configuration.
+.P
+Of itself, this setting does not make PCRE2 treat strings as UTF-8, UTF-16 or
+UTF-32. As well as compiling PCRE2 with this option, you also have have to set
+the PCRE2_UTF option when you call \fBpcre2_compile()\fP to compile a pattern.
+.P
+If you set --enable-unicode when compiling in an EBCDIC environment, PCRE2
+expects its input to be either ASCII or UTF-8 (depending on the run-time
+option). It is not possible to support both EBCDIC and UTF-8 codes in the same
+version of the library. Consequently, --enable-unicode and --enable-ebcdic are
+mutually exclusive.
+.P
+UTF support allows the libraries to process character codepoints up to 0x10ffff
+in the strings that they handle. It also provides support for accessing the
+properties of such characters, using pattern escapes such as \eP, \ep, and \eX.
+Only the general category properties such as \fILu\fP and \fINd\fP are
+supported. Details are given in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+documentation.
+.
+.
+.SH "JUST-IN-TIME COMPILER SUPPORT"
+.rs
+.sp
+Just-in-time compiler support is included in the build by specifying
+.sp
+ --enable-jit
+.sp
+This support is available only for certain hardware architectures. If this
+option is set for an unsupported architecture, a compile time error occurs.
+See the
+.\" HREF
+\fBpcre2jit\fP
+.\"
+documentation for a discussion of JIT usage. When JIT support is enabled,
+pcre2grep automatically makes use of it, unless you add
+.sp
+ --disable-pcre2grep-jit
+.sp
+to the "configure" command.
+.
+.
+.SH "CODE VALUE OF NEWLINE"
+.rs
+.sp
+By default, PCRE2 interprets the linefeed (LF) character as indicating the end
+of a line. This is the normal newline character on Unix-like systems. You can
+compile PCRE2 to use carriage return (CR) instead, by adding
+.sp
+ --enable-newline-is-cr
+.sp
+to the \fBconfigure\fP command. There is also a --enable-newline-is-lf option,
+which explicitly specifies linefeed as the newline character.
+.sp
+Alternatively, you can specify that line endings are to be indicated by the two
+character sequence CRLF. If you want this, add
+.sp
+ --enable-newline-is-crlf
+.sp
+to the \fBconfigure\fP command. There is a fourth option, specified by
+.sp
+ --enable-newline-is-anycrlf
+.sp
+which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as
+indicating a line ending. Finally, a fifth option, specified by
+.sp
+ --enable-newline-is-any
+.sp
+causes PCRE2 to recognize any Unicode newline sequence.
+.P
+Whatever line ending convention is selected when PCRE2 is built can be
+overridden when the library functions are called. At build time it is
+conventional to use the standard for your operating system.
+.
+.
+.SH "WHAT \eR MATCHES"
+.rs
+.sp
+By default, the sequence \eR in a pattern matches any Unicode newline sequence,
+whatever has been selected as the line ending sequence. If you specify
+.sp
+ --enable-bsr-anycrlf
+.sp
+the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is
+selected when PCRE2 is built can be overridden when the library functions are
+called.
+.
+.
+.SH "HANDLING VERY LARGE PATTERNS"
+.rs
+.sp
+Within a compiled pattern, offset values are used to point from one part to
+another (for example, from an opening parenthesis to an alternation
+metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
+are used for these offsets, leading to a maximum size for a compiled pattern of
+around 64K. This is sufficient to handle all but the most gigantic patterns.
+Nevertheless, some people do want to process truly enormous patterns, so it is
+possible to compile PCRE2 to use three-byte or four-byte offsets by adding a
+setting such as
+.sp
+ --with-link-size=3
+.sp
+to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the
+16-bit library, a value of 3 is rounded up to 4. In these libraries, using
+longer offsets slows down the operation of PCRE2 because it has to load
+additional data when handling them. For the 32-bit library the value is always
+4 and cannot be overridden; the value of --with-link-size is ignored.
+.
+.
+.SH "AVOIDING EXCESSIVE STACK USAGE"
+.rs
+.sp
+When matching with the \fBpcre2_match()\fP function, PCRE2 implements
+backtracking by making recursive calls to an internal function called
+\fBmatch()\fP. In environments where the size of the stack is limited, this can
+severely limit PCRE2's operation. (The Unix environment does not usually suffer
+from this problem, but it may sometimes be necessary to increase the maximum
+stack size. There is a discussion in the
+.\" HREF
+\fBpcre2stack\fP
+.\"
+documentation.) An alternative approach to recursion that uses memory from the
+heap to remember data, instead of using recursive function calls, has been
+implemented to work round the problem of limited stack size. If you want to
+build a version of PCRE2 that works this way, add
+.sp
+ --disable-stack-for-recursion
+.sp
+to the \fBconfigure\fP command. By default, the system functions \fBmalloc()\fP
+and \fBfree()\fP are called to manage the heap memory that is required, but
+custom memory management functions can be called instead. PCRE2 runs noticeably
+more slowly when built in this way. This option affects only the
+\fBpcre2_match()\fP function; it is not relevant for \fBpcre2_dfa_match()\fP.
+.
+.
+.SH "LIMITING PCRE2 RESOURCE USAGE"
+.rs
+.sp
+Internally, PCRE2 has a function called \fBmatch()\fP, which it calls
+repeatedly (sometimes recursively) when matching a pattern with the
+\fBpcre2_match()\fP function. By controlling the maximum number of times this
+function may be called during a single matching operation, a limit can be
+placed on the resources used by a single call to \fBpcre2_match()\fP. The limit
+can be changed at run time, as described in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation. The default is 10 million, but this can be changed by adding a
+setting such as
+.sp
+ --with-match-limit=500000
+.sp
+to the \fBconfigure\fP command. This setting has no effect on the
+\fBpcre2_dfa_match()\fP matching function.
+.P
+In some environments it is desirable to limit the depth of recursive calls of
+\fBmatch()\fP more strictly than the total number of calls, in order to
+restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
+is specified) that is used. A second limit controls this; it defaults to the
+value that is set for --with-match-limit, which imposes no additional
+constraints. However, you can set a lower limit by adding, for example,
+.sp
+ --with-match-limit-recursion=10000
+.sp
+to the \fBconfigure\fP command. This value can also be overridden at run time.
+.
+.
+.SH "CREATING CHARACTER TABLES AT BUILD TIME"
+.rs
+.sp
+PCRE2 uses fixed tables for processing characters whose code points are less
+than 256. By default, PCRE2 is built with a set of tables that are distributed
+in the file \fIsrc/pcre2_chartables.c.dist\fP. These tables are for ASCII codes
+only. If you add
+.sp
+ --enable-rebuild-chartables
+.sp
+to the \fBconfigure\fP command, the distributed tables are no longer used.
+Instead, a program called \fBdftables\fP is compiled and run. This outputs the
+source for new set of tables, created in the default locale of your C run-time
+system. (This method of replacing the tables does not work if you are cross
+compiling, because \fBdftables\fP is run on the local host. If you need to
+create alternative tables when cross compiling, you will have to do so "by
+hand".)
+.
+.
+.SH "USING EBCDIC CODE"
+.rs
+.sp
+PCRE2 assumes by default that it will run in an environment where the character
+code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
+most computer operating systems. PCRE2 can, however, be compiled to run in an
+EBCDIC environment by adding
+.sp
+ --enable-ebcdic
+.sp
+to the \fBconfigure\fP command. This setting implies
+--enable-rebuild-chartables. You should only use it if you know that you are in
+an EBCDIC environment (for example, an IBM mainframe operating system). The
+--enable-ebcdic option is incompatible with --enable-unicode.
+.P
+The EBCDIC character that corresponds to an ASCII LF is assumed to have the
+value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
+such an environment you should use
+.sp
+ --enable-ebcdic-nl25
+.sp
+as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
+same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is \fInot\fP
+chosen as LF is made to correspond to the Unicode NEL character (which, in
+Unicode, is 0x85).
+.P
+The options that select newline behaviour, such as --enable-newline-is-cr,
+and equivalent run-time options, refer to these character values in an EBCDIC
+environment.
+.
+.
+.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT"
+.rs
+.sp
+By default, \fBpcre2grep\fP reads all files as plain text. You can build it so
+that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads
+them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of
+.sp
+ --enable-pcre2grep-libz
+ --enable-pcre2grep-libbz2
+.sp
+to the \fBconfigure\fP command. These options naturally require that the
+relevant libraries are installed on your system. Configuration will fail if
+they are not.
+.
+.
+.SH "PCRE2GREP BUFFER SIZE"
+.rs
+.sp
+\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
+scanning, in order to be able to output "before" and "after" lines when it
+finds a match. The size of the buffer is controlled by a parameter whose
+default value is 20K. The buffer itself is three times this size, but because
+of the way it is used for holding "before" lines, the longest line that is
+guaranteed to be processable is the parameter size. You can change the default
+parameter value by adding, for example,
+.sp
+ --with-pcre2grep-bufsize=50K
+.sp
+to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can, however,
+override this value by specifying a run-time option.
+.
+.
+.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
+.rs
+.sp
+If you add one of
+.sp
+ --enable-pcre2test-libreadline
+ --enable-pcre2test-libedit
+.sp
+to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
+\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
+from a terminal, it reads it using the \fBreadline()\fP function. This provides
+line-editing and history facilities. Note that \fBlibreadline\fP is
+GPL-licensed, so if you distribute a binary of \fBpcre2test\fP linked in this
+way, there may be licensing issues. These can be avoided by linking with
+\fBlibedit\fP (which has a BSD licence) instead.
+.P
+Setting this option causes the \fB-lreadline\fP option to be added to the
+\fBpcre2test\fP build. In many operating environments with a sytem-installed
+readline library this is sufficient. However, in some environments (e.g. if an
+unmodified distribution version of readline is in use), some extra
+configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
+this:
+.sp
+ "Readline uses the termcap functions, but does not link with
+ the termcap or curses library itself, allowing applications
+ which link with readline the to choose an appropriate library."
+.sp
+If your environment has not been set up so that an appropriate library is
+automatically included, you may need to add something like
+.sp
+ LIBS="-ncurses"
+.sp
+immediately before the \fBconfigure\fP command.
+.
+.
+.SH "DEBUGGING WITH VALGRIND SUPPORT"
+.rs
+.sp
+By adding the
+.sp
+ --enable-valgrind
+.sp
+option to to the \fBconfigure\fP command, PCRE2 will use valgrind annotations
+to mark certain memory regions as unaddressable. This allows it to detect
+invalid memory accesses, and is mostly useful for debugging PCRE2 itself.
+.
+.
+.SH "CODE COVERAGE REPORTING"
+.rs
+.sp
+If your C compiler is gcc, you can build a version of PCRE2 that can generate a
+code coverage report for its test suite. To enable this, you must install
+\fBlcov\fP version 1.6 or above. Then specify
+.sp
+ --enable-coverage
+.sp
+to the \fBconfigure\fP command and build PCRE2 in the usual way.
+.P
+Note that using \fBccache\fP (a caching C compiler) is incompatible with code
+coverage reporting. If you have configured \fBccache\fP to run automatically
+on your system, you must set the environment variable
+.sp
+ CCACHE_DISABLE=1
+.sp
+before running \fBmake\fP to build PCRE2, so that \fBccache\fP is not used.
+.P
+When --enable-coverage is used, the following addition targets are added to the
+\fIMakefile\fP:
+.sp
+ make coverage
+.sp
+This creates a fresh coverage report for the PCRE2 test suite. It is equivalent
+to running "make coverage-reset", "make coverage-baseline", "make check", and
+then "make coverage-report".
+.sp
+ make coverage-reset
+.sp
+This zeroes the coverage counters, but does nothing else.
+.sp
+ make coverage-baseline
+.sp
+This captures baseline coverage information.
+.sp
+ make coverage-report
+.sp
+This creates the coverage report.
+.sp
+ make coverage-clean-report
+.sp
+This removes the generated coverage report without cleaning the coverage data
+itself.
+.sp
+ make coverage-clean-data
+.sp
+This removes the captured coverage data without removing the coverage files
+created at compile time (*.gcno).
+.sp
+ make coverage-clean
+.sp
+This cleans all coverage data including the generated coverage report. For more
+information about code coverage, see the \fBgcov\fP and \fBlcov\fP
+documentation.
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2api\fP(3), \fBpcre2_config\fP(3).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2compat.3 b/doc/pcre2compat.3
new file mode 100644
index 0000000..d40742d
--- /dev/null
+++ b/doc/pcre2compat.3
@@ -0,0 +1,190 @@
+.TH PCRE2COMPAT 3 "28 September 2014" "PCRE2 10.0"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
+.rs
+.sp
+This document describes the differences in the ways that PCRE2 and Perl handle
+regular expressions. The differences described here are with respect to Perl
+versions 5.10 and above.
+.P
+1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+have are given in the
+.\" HREF
+\fBpcre2unicode\fP
+.\"
+page.
+.P
+2. PCRE2 allows repeat quantifiers only on parenthesized assertions, but they
+do not mean what you might think. For example, (?!a){3} does not assert that
+the next three characters are not "a". It just asserts that the next character
+is not "a" three times (in principle: PCRE2 optimizes this to run the assertion
+just once). Perl allows repeat quantifiers on other assertions such as \eb, but
+these do not seem to have any use.
+.P
+3. Capturing subpatterns that occur inside negative lookahead assertions are
+counted, but their entries in the offsets vector are never set. Perl sometimes
+(but not always) sets its numerical variables from inside negative assertions.
+.P
+4. The following Perl escape sequences are not supported: \el, \eu, \eL,
+\eU, and \eN when followed by a character name or Unicode value. (\eN on its
+own, matching a non-newline character, is supported.) In fact these are
+implemented by Perl's general string-handling and are not part of its pattern
+matching engine. If any of these are encountered by PCRE2, an error is
+generated by default. However, if the PCRE2_ALT_BSUX option is set,
+\eU and \eu are interpreted as ECMAScript interprets them.
+.P
+5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
+built with Unicode support. The properties that can be tested with \ep and \eP
+are limited to the general category properties such as Lu and Nd, script names
+such as Greek or Han, and the derived properties Any and L&. PCRE2 does support
+the Cs (surrogate) property, which Perl does not; the Perl documentation says
+"Because Perl hides the need for the user to understand the internal
+representation of Unicode characters, there is no need to implement the
+somewhat messy concept of surrogates."
+.P
+6. PCRE2 does support the \eQ...\eE escape for quoting substrings. Characters
+in between are treated as literals. This is slightly different from Perl in
+that $ and @ are also handled as literals inside the quotes. In Perl, they
+cause variable interpolation (but of course PCRE2 does not have variables).
+Note the following examples:
+.sp
+ Pattern PCRE2 matches Perl matches
+.sp
+.\" JOIN
+ \eQabc$xyz\eE abc$xyz abc followed by the
+ contents of $xyz
+ \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz
+ \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz
+.sp
+The \eQ...\eE sequence is recognized both inside and outside character classes.
+.P
+7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+constructions. However, there is support for recursive patterns. This is not
+available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE2 "callout"
+feature allows an external function to be called during pattern matching. See
+the
+.\" HREF
+\fBpcre2callout\fP
+.\"
+documentation for details.
+.P
+8. Subpatterns that are called as subroutines (whether or not recursively) are
+always treated as atomic groups in PCRE2. This is like Python, but unlike Perl.
+Captured values that are set outside a subroutine call can be reference from
+inside in PCRE2, but not in Perl. There is a discussion that explains these
+differences in more detail in the
+.\" HTML <a href="pcre2pattern.html#recursiondifference">
+.\" </a>
+section on recursion differences from Perl
+.\"
+in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+page.
+.P
+9. If any of the backtracking control verbs are used in a subpattern that is
+called as a subroutine (whether or not recursively), their effect is confined
+to that subpattern; it does not extend to the surrounding pattern. This is not
+always the case in Perl. In particular, if (*THEN) is present in a group that
+is called as a subroutine, its action is limited to that group, even if the
+group does not contain any | characters. Note that such subpatterns are
+processed as anchored at the point where they are tested.
+.P
+10. If a pattern contains more than one backtracking control verb, the first
+one that is backtracked onto acts. For example, in the pattern
+A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
+triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
+same as PCRE2, but there are examples where it differs.
+.P
+11. Most backtracking verbs in assertions have their normal actions. They are
+not confined to the assertion.
+.P
+12. There are some differences that are concerned with the settings of captured
+strings when part of a pattern is repeated. For example, matching "aba" against
+the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
+"b".
+.P
+13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
+names is not as general as Perl's. This is a consequence of the fact the PCRE2
+works internally just with numbers, using an external table to translate
+between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
+where the two capturing parentheses have the same number but different names,
+is not supported, and causes an error at compile time. If it were allowed, it
+would not be possible to distinguish which parentheses matched, because both
+names map to capturing subpattern number 1. To avoid this confusing situation,
+an error is given at compile time.
+.P
+14. Perl recognizes comments in some places that PCRE2 does not, for example,
+between the ( and ? at the start of a subpattern. If the /x modifier is set,
+Perl allows white space between ( and ? (though current Perls warn that this is
+deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
+.P
+15. Perl, when in warning mode, gives warnings for character classes such as
+[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
+warning features, so it gives an error in these cases because they are almost
+certainly user mistakes.
+.P
+16. In PCRE2, the upper/lower case character properties Lu and Ll are not
+affected when case-independent matching is specified. For example, \ep{Lu}
+always matches an upper case letter. I think Perl has changed in this respect;
+in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
+letters, regardless of case, when case independence is specified.
+.P
+17. PCRE2 provides some extensions to the Perl regular expression facilities.
+Perl 5.10 includes new features that are not in earlier versions of Perl, some
+of which (such as named parentheses) have been in PCRE2 for some time. This
+list is with respect to Perl 5.10:
+.sp
+(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
+each alternative branch of a lookbehind assertion can match a different length
+of string. Perl requires them all to have the same length.
+.sp
+(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
+meta-character matches only at the very end of the string.
+.sp
+(c) A backslash followed by a letter with no special meaning is faulted. (Perl
+can be made to issue a warning.)
+.sp
+(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
+inverted, that is, by default they are not greedy, but if followed by a
+question mark they are.
+.sp
+(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
+only at the first matching position in the subject string.
+.sp
+(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
+PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
+.sp
+(g) The \eR escape sequence can be restricted to match only CR, LF, or CRLF
+by the PCRE2_BSR_ANYCRLF option.
+.sp
+(h) The callout facility is PCRE2-specific.
+.sp
+(i) The partial matching facility is PCRE2-specific.
+.sp
+(j) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a
+different way and is not Perl-compatible.
+.sp
+(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
+a pattern that set overall options that cannot be changed within the pattern.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
new file mode 100644
index 0000000..3d6f22b
--- /dev/null
+++ b/doc/pcre2grep.1
@@ -0,0 +1,683 @@
+.TH PCRE2GREP 1 "28 September 2014" "PCRE2 10.00"
+.SH NAME
+pcre2grep - a grep with Perl-compatible regular expressions.
+.SH SYNOPSIS
+.B pcre2grep [options] [long options] [pattern] [path1 path2 ...]
+.
+.SH DESCRIPTION
+.rs
+.sp
+\fBpcre2grep\fP searches files for character patterns, in the same way as other
+grep commands do, but it uses the PCRE2 regular expression library to support
+patterns that are compatible with the regular expressions of Perl 5. See
+.\" HREF
+\fBpcre2syntax\fP(3)
+.\"
+for a quick-reference summary of pattern syntax, or
+.\" HREF
+\fBpcre2pattern\fP(3)
+.\"
+for a full description of the syntax and semantics of the regular expressions
+that PCRE2 supports.
+.P
+Patterns, whether supplied on the command line or in a separate file, are given
+without delimiters. For example:
+.sp
+ pcre2grep Thursday /etc/motd
+.sp
+If you attempt to use delimiters (for example, by surrounding a pattern with
+slashes, as is common in Perl scripts), they are interpreted as part of the
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed quotes are required if a
+pattern contains white space or shell metacharacters.
+.P
+The first argument that follows any option settings is treated as the single
+pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present.
+Conversely, when one or both of these options are used to specify patterns, all
+arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an
+argument pattern must be provided.
+.P
+If no files are specified, \fBpcre2grep\fP reads the standard input. The
+standard input can also be referenced by a name consisting of a single hyphen.
+For example:
+.sp
+ pcre2grep some-pattern /file1 - /file3
+.sp
+By default, each line that matches a pattern is copied to the standard
+output, and if there is more than one file, the file name is output at the
+start of each line, followed by a colon. However, there are options that can
+change how \fBpcre2grep\fP behaves. In particular, the \fB-M\fP option makes it
+possible to search for patterns that span line boundaries. What defines a line
+boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
+.P
+The amount of memory used for buffering files that are being scanned is
+controlled by a parameter that can be set by the \fB--buffer-size\fP option.
+The default value for this parameter is specified when \fBpcre2grep\fP is built,
+with the default default being 20K. A block of memory three times this size is
+used (to allow for buffering "before" and "after" lines). An error occurs if a
+line overflows the buffer.
+.P
+Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
+(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
+each line in the order in which they are defined, except that all the \fB-e\fP
+patterns are tried before the \fB-f\fP patterns.
+.P
+By default, as soon as one pattern matches a line, no further patterns are
+considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the
+matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
+\fB--line-offsets\fP is used to output only the part of the line that matched
+(either shown literally, or as an offset), scanning resumes immediately
+following the match, so that further matches on the same line can be found. If
+there are multiple patterns, they are all tried on the remainder of the line,
+but patterns that follow the one that matched are not tried on the earlier part
+of the line.
+.P
+This behaviour means that the order in which multiple patterns are specified
+can affect the output when one of the above options is used. This is no longer
+the same behaviour as GNU grep, which now manages to display earlier matches
+for later patterns (as long as there is no overlap).
+.P
+Patterns that can match an empty string are accepted, but empty string
+matches are never recognized. An example is the pattern "(super)?(man)?", in
+which all components are optional. This pattern finds all occurrences of both
+"super" and "man"; the output differs from matching with "super|man" when only
+the matching substrings are being shown.
+.P
+If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set,
+\fBpcre2grep\fP uses the value to set a locale when calling the PCRE2 library.
+The \fB--locale\fP option can be used to override this.
+.
+.
+.SH "SUPPORT FOR COMPRESSED FILES"
+.rs
+.sp
+It is possible to compile \fBpcre2grep\fP so that it uses \fBlibz\fP or
+\fBlibbz2\fP to read files whose names end in \fB.gz\fP or \fB.bz2\fP,
+respectively. You can find out whether your binary has support for one or both
+of these file types by running it with the \fB--help\fP option. If the
+appropriate support is not present, files are treated as plain text. The
+standard input is always so treated.
+.
+.
+.SH "BINARY FILES"
+.rs
+.sp
+By default, a file that contains a binary zero byte within the first 1024 bytes
+is identified as a binary file, and is processed specially. (GNU grep also
+identifies binary files in this manner.) See the \fB--binary-files\fP option
+for a means of changing the way binary files are handled.
+.
+.
+.SH OPTIONS
+.rs
+.sp
+The order in which some of the options appear can affect the output. For
+example, both the \fB-h\fP and \fB-l\fP options affect the printing of file
+names. Whichever comes later in the command line will be the one that takes
+effect. Similarly, except where noted below, if an option is given twice, the
+later setting is used. Numerical values for options may be followed by K or M,
+to signify multiplication by 1024 or 1024*1024 respectively.
+.TP 10
+\fB--\fP
+This terminates the list of options. It is useful if the next item on the
+command line starts with a hyphen but is not an option. This allows for the
+processing of patterns and filenames that start with hyphens.
+.TP
+\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
+Output \fInumber\fP lines of context after each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
+guarantees to have up to 8K of following text available for context output.
+.TP
+\fB-a\fP, \fB--text\fP
+Treat binary files as text. This is equivalent to
+\fB--binary-files\fP=\fItext\fP.
+.TP
+\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
+Output \fInumber\fP lines of context before each matching line. If filenames
+and/or line numbers are being output, a hyphen separator is used instead of a
+colon for the context lines. A line containing "--" is output between each
+group of lines, unless they are in fact contiguous in the input file. The value
+of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
+guarantees to have up to 8K of preceding text available for context output.
+.TP
+\fB--binary-files=\fP\fIword\fP
+Specify how binary files are to be processed. If the word is "binary" (the
+default), pattern matching is performed on binary files, but the only output is
+"Binary file <name> matches" when a match succeeds. If the word is "text",
+which is equivalent to the \fB-a\fP or \fB--text\fP option, binary files are
+processed in the same way as any other file. In this case, when a match
+succeeds, the output may be binary garbage, which can have nasty effects if
+sent to a terminal. If the word is "without-match", which is equivalent to the
+\fB-I\fP option, binary files are not processed at all; they are assumed not to
+be of interest.
+.TP
+\fB--buffer-size=\fP\fInumber\fP
+Set the parameter that controls how much memory is used for buffering files
+that are being scanned.
+.TP
+\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
+Output \fInumber\fP lines of context both before and after each matching line.
+This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value.
+.TP
+\fB-c\fP, \fB--count\fP
+Do not output individual lines from the files that are being scanned; instead
+output the number of lines that would otherwise have been shown. If no lines
+are selected, the number zero is output. If several files are are being
+scanned, a count is output for each of them. However, if the
+\fB--files-with-matches\fP option is also used, only those files whose counts
+are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
+\fB-B\fP, and \fB-C\fP options are ignored.
+.TP
+\fB--colour\fP, \fB--color\fP
+If this option is given without any data, it is equivalent to "--colour=auto".
+If data is required, it must be given in the same shell item, separated by an
+equals sign.
+.TP
+\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP
+This option specifies under what circumstances the parts of a line that matched
+a pattern should be coloured in the output. By default, the output is not
+coloured. The value (which is optional, see above) may be "never", "always", or
+"auto". In the latter case, colouring happens only if the standard output is
+connected to a terminal. More resources are used when colouring is enabled,
+because \fBpcre2grep\fP has to search for all possible matches in a line, not
+just one, in order to colour them all.
+.sp
+The colour that is used can be specified by setting the environment variable
+PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
+.TP
+\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
+If an input path is not a regular file or a directory, "action" specifies how
+it is to be processed. Valid values are "read" (the default) or "skip"
+(silently skip the path).
+.TP
+\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP
+If an input path is a directory, "action" specifies how it is to be processed.
+Valid values are "read" (the default in non-Windows environments, for
+compatibility with GNU grep), "recurse" (equivalent to the \fB-r\fP option), or
+"skip" (silently skip the path, the default in Windows environments). In the
+"read" case, directories are read as if they were ordinary files. In some
+operating systems the effect of reading a directory like this is an immediate
+end-of-file; in others it may provoke an error.
+.TP
+\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is no limit to the number of patterns. They are applied to each
+line in the order in which they are defined until one matches.
+.sp
+If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first,
+followed by the patterns from the file(s), independent of the order in which
+these options are specified. Note that multiple use of \fB-e\fP is not the same
+as a single pattern with alternatives. For example, X|Y finds the first
+character in a line that is X or Y, whereas if the two patterns are given
+separately, with X first, \fBpcre2grep\fP finds X if it is present, even if it
+follows Y in the line. It finds Y only if there is no X in the line. This
+matters only if you are using \fB-o\fP or \fB--colo(u)r\fP to show the part(s)
+of the line that matched.
+.TP
+\fB--exclude\fP=\fIpattern\fP
+Files (but not directories) whose names match the pattern are skipped without
+being processed. This applies to all files, whether listed on the command line,
+obtained from \fB--file-list\fP, or by scanning a directory. The pattern is a
+PCRE2 regular expression, and is matched against the final component of the file
+name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not
+apply to this pattern. The option may be given any number of times in order to
+specify multiple patterns. If a file name matches both an \fB--include\fP
+and an \fB--exclude\fP pattern, it is excluded. There is no short form for this
+option.
+.TP
+\fB--exclude-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--exclude\fP
+option. What constitutes a newline when reading the file is the operating
+system's default. The \fB--newline\fP option has no effect on this option. This
+option may be given more than once in order to specify a number of files to
+read.
+.TP
+\fB--exclude-dir\fP=\fIpattern\fP
+Directories whose names match the pattern are skipped without being processed,
+whatever the setting of the \fB--recursive\fP option. This applies to all
+directories, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a parent directory. The pattern is a PCRE2
+regular expression, and is matched against the final component of the directory
+name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not
+apply to this pattern. The option may be given any number of times in order to
+specify more than one pattern. If a directory matches both \fB--include-dir\fP
+and \fB--exclude-dir\fP, it is excluded. There is no short form for this
+option.
+.TP
+\fB-F\fP, \fB--fixed-strings\fP
+Interpret each data-matching pattern as a list of fixed strings, separated by
+newlines, instead of as a regular expression. What constitutes a newline for
+this purpose is controlled by the \fB--newline\fP option. The \fB-w\fP (match
+as a word) and \fB-x\fP (match whole line) options can be used with \fB-F\fP.
+They apply to each of the fixed strings. A line is selected if any of the fixed
+strings are found in it (subject to \fB-w\fP or \fB-x\fP, if present). This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the \fB--include\fP or
+\fB--exclude\fP options.
+.TP
+\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
+Read patterns from the file, one per line, and match them against
+each line of input. What constitutes a newline when reading the file is the
+operating system's default. The \fB--newline\fP option has no effect on this
+option. Trailing white space is removed from each line, and blank lines are
+ignored. An empty file contains no patterns and therefore matches nothing. See
+also the comments about multiple patterns versus a single pattern with
+alternatives in the description of \fB-e\fP above.
+.sp
+If this option is given more than once, all the specified files are
+read. A data line is output if any of the patterns match it. A filename can
+be given as "-" to refer to the standard input. When \fB-f\fP is used, patterns
+specified on the command line using \fB-e\fP may also be present; they are
+tested before the file's patterns. However, no other pattern is taken from the
+command line; all arguments are treated as the names of paths to be searched.
+.TP
+\fB--file-list\fP=\fIfilename\fP
+Read a list of files and/or directories that are to be scanned from the given
+file, one per line. Trailing white space is removed from each line, and blank
+lines are ignored. These paths are processed before any that are listed on the
+command line. The filename can be given as "-" to refer to the standard input.
+If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are
+read first. This is useful only when the standard input is a terminal, from
+which further lines (the list of files) can be read after an end-of-file
+indication. If this option is given more than once, all the specified files are
+read.
+.TP
+\fB--file-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as an
+offset from the start of the file and a length, separated by a comma. In this
+mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with \fB--line-offsets\fP
+and \fB--only-matching\fP.
+.TP
+\fB-H\fP, \fB--with-filename\fP
+Force the inclusion of the filename at the start of output lines when searching
+a single file. By default, the filename is not shown in this case. For matching
+lines, the filename is followed by a colon; for context lines, a hyphen
+separator is used. If a line number is also being output, it follows the file
+name.
+.TP
+\fB-h\fP, \fB--no-filename\fP
+Suppress the output filenames when searching multiple files. By default,
+filenames are shown when multiple files are searched. For matching lines, the
+filename is followed by a colon; for context lines, a hyphen separator is used.
+If a line number is also being output, it follows the file name.
+.TP
+\fB--help\fP
+Output a help message, giving brief details of the command options and file
+type support, and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-I\fP
+Treat binary files as never matching. This is equivalent to
+\fB--binary-files\fP=\fIwithout-match\fP.
+.TP
+\fB-i\fP, \fB--ignore-case\fP
+Ignore upper/lower case distinctions during comparisons.
+.TP
+\fB--include\fP=\fIpattern\fP
+If any \fB--include\fP patterns are specified, the only files that are
+processed are those that match one of the patterns (and do not match an
+\fB--exclude\fP pattern). This option does not affect directories, but it
+applies to all files, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
+expression, and is matched against the final component of the file name, not
+the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not apply to
+this pattern. The option may be given any number of times. If a file name
+matches both an \fB--include\fP and an \fB--exclude\fP pattern, it is excluded.
+There is no short form for this option.
+.TP
+\fB--include-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--include\fP
+option. What constitutes a newline for this purpose is the operating system's
+default. The \fB--newline\fP option has no effect on this option. This option
+may be given any number of times; all the files are read.
+.TP
+\fB--include-dir\fP=\fIpattern\fP
+If any \fB--include-dir\fP patterns are specified, the only directories that
+are processed are those that match one of the patterns (and do not match an
+\fB--exclude-dir\fP pattern). This applies to all directories, whether listed
+on the command line, obtained from \fB--file-list\fP, or by scanning a parent
+directory. The pattern is a PCRE2 regular expression, and is matched against the
+final component of the directory name, not the entire path. The \fB-F\fP,
+\fB-w\fP, and \fB-x\fP options do not apply to this pattern. The option may be
+given any number of times. If a directory matches both \fB--include-dir\fP and
+\fB--exclude-dir\fP, it is excluded. There is no short form for this option.
+.TP
+\fB-L\fP, \fB--files-without-match\fP
+Instead of outputting lines from the files, just output the names of the files
+that do not contain any lines that would have been output. Each file name is
+output once, on a separate line.
+.TP
+\fB-l\fP, \fB--files-with-matches\fP
+Instead of outputting lines from the files, just output the names of the files
+containing lines that would have been output. Each file name is output
+once, on a separate line. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches.
+.TP
+\fB--label\fP=\fIname\fP
+This option supplies a name to be used for the standard input when file names
+are being output. If not supplied, "(standard input)" is used. There is no
+short form for this option.
+.TP
+\fB--line-buffered\fP
+When this option is given, input is read and processed line by line, and the
+output is flushed after each write. By default, input is read in large chunks,
+unless \fBpcre2grep\fP can determine that it is reading from a terminal (which
+is currently possible only in Unix-like environments). Output to terminal is
+normally automatically flushed by the operating system. This option can be
+useful when the input or output is attached to a pipe and you do not want
+\fBpcre2grep\fP to buffer up large amounts of data. However, its use will affect
+performance, and the \fB-M\fP (multiline) option ceases to work.
+.TP
+\fB--line-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as a
+line number, the offset from the start of the line, and a length. The line
+number is terminated by a colon (as usual; see the \fB-n\fP option), and the
+offset and length are separated by a comma. In this mode, no context is shown.
+That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
+more than one match in a line, each of them is shown separately. This option is
+mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
+.TP
+\fB--locale\fP=\fIlocale-name\fP
+This option specifies a locale to be used for pattern matching. It overrides
+the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
+locale is specified, the PCRE2 library's default (usually the "C" locale) is
+used. There is no short form for this option.
+.TP
+\fB--match-limit\fP=\fInumber\fP
+Processing some regular expression patterns can require a very large amount of
+memory, leading in some cases to a program crash if not enough is available.
+Other patterns may take a very long time to search for all possible matching
+strings. The \fBpcre2_exec()\fP function that is called by \fBpcre2grep\fP to do
+the matching has two parameters that can limit the resources that it uses.
+.sp
+The \fB--match-limit\fP option provides a means of limiting resource usage
+when processing patterns that are not going to match, but which have a very
+large number of possibilities in their search trees. The classic example is a
+pattern that uses nested unlimited repeats. Internally, PCRE2 uses a function
+called \fBmatch()\fP which it calls repeatedly (sometimes recursively). The
+limit set by \fB--match-limit\fP is imposed on the number of times this
+function is called during a match, which has the effect of limiting the amount
+of backtracking that can take place.
+.sp
+The \fB--recursion-limit\fP option is similar to \fB--match-limit\fP, but
+instead of limiting the total number of times that \fBmatch()\fP is called, it
+limits the depth of recursive calls, which in turn limits the amount of memory
+that can be used. The recursion depth is a smaller number than the total number
+of calls, because not all calls to \fBmatch()\fP are recursive. This limit is
+of use only if it is set smaller than \fB--match-limit\fP.
+.sp
+There are no short forms for these options. The default settings are specified
+when the PCRE2 library is compiled, with the default default being 10 million.
+.TP
+\fB-M\fP, \fB--multiline\fP
+Allow patterns to match more than one line. When this option is given, patterns
+may usefully contain literal newline characters and internal occurrences of ^
+and $ characters. The output for a successful match may consist of more than
+one line, the last of which is the one in which the match ended. If the matched
+string ends with a newline sequence the output ends at the end of that line.
+.sp
+When this option is set, the PCRE2 library is called in "multiline" mode.
+There is a limit to the number of lines that can be matched, imposed by the way
+that \fBpcre2grep\fP buffers the input file as it scans it. However,
+\fBpcre2grep\fP ensures that at least 8K characters or the rest of the document
+(whichever is the shorter) are available for forward matching, and similarly
+the previous 8K characters (or all the previous characters, if fewer than 8K)
+are guaranteed to be available for lookbehind assertions. This option does not
+work when input is read line by line (see \fP--line-buffered\fP.)
+.TP
+\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
+The PCRE2 library supports five different conventions for indicating
+the ends of lines. They are the single-character sequences CR (carriage return)
+and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
+which recognizes any of the preceding three types, and an "any" convention, in
+which any Unicode line ending sequence is assumed to end a line. The Unicode
+sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
+(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
+PS (paragraph separator, U+2029).
+.sp
+When the PCRE2 library is built, a default line-ending sequence is specified.
+This is normally the standard sequence for the operating system. Unless
+otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
+The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
+makes it possible to use \fBpcre2grep\fP to scan files that have come from other
+environments without having to modify their line endings. If the data that is
+being scanned does not agree with the convention set by this option,
+\fBpcre2grep\fP may behave in strange ways. Note that this option does not
+apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options, which are expected to use the operating system's
+standard newline sequence.
+.TP
+\fB-n\fP, \fB--line-number\fP
+Precede each output line by its line number in the file, followed by a colon
+for matching lines or a hyphen for context lines. If the filename is also being
+output, it precedes the line number. This option is forced if
+\fB--line-offsets\fP is used.
+.TP
+\fB--no-jit\fP
+If the PCRE2 library is built with support for just-in-time compiling (which
+speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
+\fB-o\fP, \fB--only-matching\fP
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
+\fB-C\fP options are ignored. If there is more than one match in a line, each
+of them is shown separately. If \fB-o\fP is combined with \fB-v\fP (invert the
+sense of the match to find non-matching lines), no output is generated, but the
+return code is set appropriately. If the matched portion of the line is empty,
+nothing is output unless the file name or line number are being printed, in
+which case they are shown on an otherwise empty line. This option is mutually
+exclusive with \fB--file-offsets\fP and \fB--line-offsets\fP.
+.TP
+\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 32 capturing parentheses are supported, and -o0 is
+equivalent to \fB-o\fP without a number. Because these options can be given
+without an argument (see above), if an argument is present, it must be given in
+the same shell item, for example, -o3 or --only-matching=2. The comments given
+for the non-argument case above also apply to this case. If the specified
+capturing parentheses do not exist in the pattern, or were not set in the
+match, nothing is output unless the file name or line number are being printed.
+.sp
+If this option is given multiple times, multiple substrings are output, in the
+order the options are given. For example, -o3 -o1 -o3 causes the substrings
+matched by capturing parentheses 3 and 1 and then 3 again to be output. By
+default, there is no separator (but see the next option).
+.TP
+\fB--om-separator\fP=\fItext\fP
+Specify a separating string for multiple occurrences of \fB-o\fP. The default
+is an empty string. Separating strings are never coloured.
+.TP
+\fB-q\fP, \fB--quiet\fP
+Work quietly, that is, display nothing except error messages. The exit
+status indicates whether or not any matches were found.
+.TP
+\fB-r\fP, \fB--recursive\fP
+If any given path is a directory, recursively scan the files it contains,
+taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a
+directory is read as a normal file; in some operating systems this gives an
+immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
+option to "recurse".
+.TP
+\fB--recursion-limit\fP=\fInumber\fP
+See \fB--match-limit\fP above.
+.TP
+\fB-s\fP, \fB--no-messages\fP
+Suppress error messages about non-existent or unreadable files. Such files are
+quietly skipped. However, the return code is still 2, even if matches were
+found in other files.
+.TP
+\fB-u\fP, \fB--utf-8\fP
+Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
+with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
+\fB--include\fP options) and all subject lines that are scanned must be valid
+strings of UTF-8 characters.
+.TP
+\fB-V\fP, \fB--version\fP
+Write the version numbers of \fBpcre2grep\fP and the PCRE2 library to the
+standard output and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-v\fP, \fB--invert-match\fP
+Invert the sense of the match, so that lines which do \fInot\fP match any of
+the patterns are the ones that are found.
+.TP
+\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
+Force the patterns to match only whole words. This is equivalent to having \eb
+at the start and end of the pattern. This option applies only to the patterns
+that are matched against the contents of files; it does not apply to patterns
+specified by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
+Force the patterns to be anchored (each must start matching at the beginning of
+a line) and in addition, require them to match entire lines. This is equivalent
+to having ^ and $ characters at the start and end of each alternative branch in
+every pattern. This option applies only to the patterns that are matched
+against the contents of files; it does not apply to patterns specified by any
+of the \fB--include\fP or \fB--exclude\fP options.
+.
+.
+.SH "ENVIRONMENT VARIABLES"
+.rs
+.sp
+The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that
+order, for a locale. The first one that is set is used. This can be overridden
+by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
+(usually the "C" locale) is used.
+.
+.
+.SH "NEWLINES"
+.rs
+.sp
+The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
+different newline conventions from the default. Any parts of the input files
+that are written to the standard output are copied identically, with whatever
+newline sequences they have in the input. However, the setting of this option
+does not affect the interpretation of files specified by the \fB-f\fP,
+\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use
+the operating system's standard newline sequence, nor does it affect the way in
+which \fBpcre2grep\fP writes informational messages to the standard error and
+output streams. For these it uses the string "\en" to indicate newlines,
+relying on the C I/O library to convert this to an appropriate sequence.
+.
+.
+.SH "OPTIONS COMPATIBILITY"
+.rs
+.sp
+Many of the short and long forms of \fBpcre2grep\fP's options are the same
+as in the GNU \fBgrep\fP program. Any long option of the form
+\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
+(PCRE2 terminology). However, the \fB--file-list\fP, \fB--file-offsets\fP,
+\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP,
+\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
+\fB--recursion-limit\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
+\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
+capturing parentheses number.
+.P
+Although most of the common options work the same way, a few are different in
+\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
+for GNU \fBgrep\fP, but a regular expression for \fBpcre2grep\fP. If both the
+\fB-c\fP and \fB-l\fP options are given, GNU grep lists only file names,
+without counts, but \fBpcre2grep\fP gives the counts.
+.
+.
+.SH "OPTIONS WITH DATA"
+.rs
+.sp
+There are four different ways in which an option with data can be specified.
+If a short form option is used, the data may follow immediately, or (with one
+exception) in the next command line item. For example:
+.sp
+ -f/some/file
+ -f /some/file
+.sp
+The exception is the \fB-o\fP option, which may appear with or without data.
+Because of this, if data is present, it must follow immediately in the same
+item, for example -o3.
+.P
+If a long form option is used, the data may appear in the same command line
+item, separated by an equals character, or (with two exceptions) it may appear
+in the next command line item. For example:
+.sp
+ --file=/some/file
+ --file /some/file
+.sp
+Note, however, that if you want to supply a file name beginning with ~ as data
+in a shell command, and have the shell expand ~ to a home directory, you must
+separate the file name from the option, because the shell does not treat ~
+specially unless it is at the start of an item.
+.P
+The exceptions to the above are the \fB--colour\fP (or \fB--color\fP) and
+\fB--only-matching\fP options, for which the data is optional. If one of these
+options does have data, it must be given in the first form, using an equals
+character. Otherwise \fBpcre2grep\fP will assume that it has no data.
+.
+.
+.SH "MATCHING ERRORS"
+.rs
+.sp
+It is possible to supply a regular expression that takes a very long time to
+fail to match certain lines. Such patterns normally involve nested indefinite
+repeats, for example: (a+)*\ed when matched against a line of a's with no final
+digit. The PCRE2 matching function has a resource limit that causes it to abort
+in these circumstances. If this happens, \fBpcre2grep\fP outputs an error
+message and the line that caused the problem to the standard error stream. If
+there are more than 20 such errors, \fBpcre2grep\fP gives up.
+.P
+The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the overall
+resource limit; there is a second option called \fB--recursion-limit\fP that
+sets a limit on the amount of memory (usually stack) that is used (see the
+discussion of these options above).
+.
+.
+.SH DIAGNOSTICS
+.rs
+.sp
+Exit status is 0 if any matches were found, 1 if no matches were found, and 2
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+\fB-s\fP option to suppress error messages about inaccessible files does not
+affect the return code.
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2test\fP(1).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
new file mode 100644
index 0000000..c4b177c
--- /dev/null
+++ b/doc/pcre2grep.txt
@@ -0,0 +1,741 @@
+PCRE2GREP(1) General Commands Manual PCRE2GREP(1)
+
+
+
+NAME
+ pcre2grep - a grep with Perl-compatible regular expressions.
+
+SYNOPSIS
+ pcre2grep [options] [long options] [pattern] [path1 path2 ...]
+
+
+DESCRIPTION
+
+ pcre2grep searches files for character patterns, in the same way as
+ other grep commands do, but it uses the PCRE2 regular expression
+ library to support patterns that are compatible with the regular
+ expressions of Perl 5. See pcre2syntax(3) for a quick-reference summary
+ of pattern syntax, or pcre2pattern(3) for a full description of the
+ syntax and semantics of the regular expressions that PCRE2 supports.
+
+ Patterns, whether supplied on the command line or in a separate file,
+ are given without delimiters. For example:
+
+ pcre2grep Thursday /etc/motd
+
+ If you attempt to use delimiters (for example, by surrounding a pattern
+ with slashes, as is common in Perl scripts), they are interpreted as
+ part of the pattern. Quotes can of course be used to delimit patterns
+ on the command line because they are interpreted by the shell, and
+ indeed quotes are required if a pattern contains white space or shell
+ metacharacters.
+
+ The first argument that follows any option settings is treated as the
+ single pattern to be matched when neither -e nor -f is present. Con-
+ versely, when one or both of these options are used to specify pat-
+ terns, all arguments are treated as path names. At least one of -e, -f,
+ or an argument pattern must be provided.
+
+ If no files are specified, pcre2grep reads the standard input. The
+ standard input can also be referenced by a name consisting of a single
+ hyphen. For example:
+
+ pcre2grep some-pattern /file1 - /file3
+
+ By default, each line that matches a pattern is copied to the standard
+ output, and if there is more than one file, the file name is output at
+ the start of each line, followed by a colon. However, there are options
+ that can change how pcre2grep behaves. In particular, the -M option
+ makes it possible to search for patterns that span line boundaries.
+ What defines a line boundary is controlled by the -N (--newline)
+ option.
+
+ The amount of memory used for buffering files that are being scanned is
+ controlled by a parameter that can be set by the --buffer-size option.
+ The default value for this parameter is specified when pcre2grep is
+ built, with the default default being 20K. A block of memory three
+ times this size is used (to allow for buffering "before" and "after"
+ lines). An error occurs if a line overflows the buffer.
+
+ Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the
+ greater. BUFSIZ is defined in <stdio.h>. When there is more than one
+ pattern (specified by the use of -e and/or -f), each pattern is applied
+ to each line in the order in which they are defined, except that all
+ the -e patterns are tried before the -f patterns.
+
+ By default, as soon as one pattern matches a line, no further patterns
+ are considered. However, if --colour (or --color) is used to colour the
+ matching substrings, or if --only-matching, --file-offsets, or --line-
+ offsets is used to output only the part of the line that matched
+ (either shown literally, or as an offset), scanning resumes immediately
+ following the match, so that further matches on the same line can be
+ found. If there are multiple patterns, they are all tried on the
+ remainder of the line, but patterns that follow the one that matched
+ are not tried on the earlier part of the line.
+
+ This behaviour means that the order in which multiple patterns are
+ specified can affect the output when one of the above options is used.
+ This is no longer the same behaviour as GNU grep, which now manages to
+ display earlier matches for later patterns (as long as there is no
+ overlap).
+
+ Patterns that can match an empty string are accepted, but empty string
+ matches are never recognized. An example is the pattern
+ "(super)?(man)?", in which all components are optional. This pattern
+ finds all occurrences of both "super" and "man"; the output differs
+ from matching with "super|man" when only the matching substrings are
+ being shown.
+
+ If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses
+ the value to set a locale when calling the PCRE2 library. The --locale
+ option can be used to override this.
+
+
+SUPPORT FOR COMPRESSED FILES
+
+ It is possible to compile pcre2grep so that it uses libz or libbz2 to
+ read files whose names end in .gz or .bz2, respectively. You can find
+ out whether your binary has support for one or both of these file types
+ by running it with the --help option. If the appropriate support is not
+ present, files are treated as plain text. The standard input is always
+ so treated.
+
+
+BINARY FILES
+
+ By default, a file that contains a binary zero byte within the first
+ 1024 bytes is identified as a binary file, and is processed specially.
+ (GNU grep also identifies binary files in this manner.) See the
+ --binary-files option for a means of changing the way binary files are
+ handled.
+
+
+OPTIONS
+
+ The order in which some of the options appear can affect the output.
+ For example, both the -h and -l options affect the printing of file
+ names. Whichever comes later in the command line will be the one that
+ takes effect. Similarly, except where noted below, if an option is
+ given twice, the later setting is used. Numerical values for options
+ may be followed by K or M, to signify multiplication by 1024 or
+ 1024*1024 respectively.
+
+ -- This terminates the list of options. It is useful if the next
+ item on the command line starts with a hyphen but is not an
+ option. This allows for the processing of patterns and file-
+ names that start with hyphens.
+
+ -A number, --after-context=number
+ Output number lines of context after each matching line. If
+ filenames and/or line numbers are being output, a hyphen sep-
+ arator is used instead of a colon for the context lines. A
+ line containing "--" is output between each group of lines,
+ unless they are in fact contiguous in the input file. The
+ value of number is expected to be relatively small. However,
+ pcre2grep guarantees to have up to 8K of following text
+ available for context output.
+
+ -a, --text
+ Treat binary files as text. This is equivalent to --binary-
+ files=text.
+
+ -B number, --before-context=number
+ Output number lines of context before each matching line. If
+ filenames and/or line numbers are being output, a hyphen sep-
+ arator is used instead of a colon for the context lines. A
+ line containing "--" is output between each group of lines,
+ unless they are in fact contiguous in the input file. The
+ value of number is expected to be relatively small. However,
+ pcre2grep guarantees to have up to 8K of preceding text
+ available for context output.
+
+ --binary-files=word
+ Specify how binary files are to be processed. If the word is
+ "binary" (the default), pattern matching is performed on
+ binary files, but the only output is "Binary file <name>
+ matches" when a match succeeds. If the word is "text", which
+ is equivalent to the -a or --text option, binary files are
+ processed in the same way as any other file. In this case,
+ when a match succeeds, the output may be binary garbage,
+ which can have nasty effects if sent to a terminal. If the
+ word is "without-match", which is equivalent to the -I
+ option, binary files are not processed at all; they are
+ assumed not to be of interest.
+
+ --buffer-size=number
+ Set the parameter that controls how much memory is used for
+ buffering files that are being scanned.
+
+ -C number, --context=number
+ Output number lines of context both before and after each
+ matching line. This is equivalent to setting both -A and -B
+ to the same value.
+
+ -c, --count
+ Do not output individual lines from the files that are being
+ scanned; instead output the number of lines that would other-
+ wise have been shown. If no lines are selected, the number
+ zero is output. If several files are are being scanned, a
+ count is output for each of them. However, if the --files-
+ with-matches option is also used, only those files whose
+ counts are greater than zero are listed. When -c is used, the
+ -A, -B, and -C options are ignored.
+
+ --colour, --color
+ If this option is given without any data, it is equivalent to
+ "--colour=auto". If data is required, it must be given in
+ the same shell item, separated by an equals sign.
+
+ --colour=value, --color=value
+ This option specifies under what circumstances the parts of a
+ line that matched a pattern should be coloured in the output.
+ By default, the output is not coloured. The value (which is
+ optional, see above) may be "never", "always", or "auto". In
+ the latter case, colouring happens only if the standard out-
+ put is connected to a terminal. More resources are used when
+ colouring is enabled, because pcre2grep has to search for all
+ possible matches in a line, not just one, in order to colour
+ them all.
+
+ The colour that is used can be specified by setting the envi-
+ ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The
+ value of this variable should be a string of two numbers,
+ separated by a semicolon. They are copied directly into the
+ control string for setting colour on a terminal, so it is
+ your responsibility to ensure that they make sense. If nei-
+ ther of the environment variables is set, the default is
+ "1;31", which gives red.
+
+ -D action, --devices=action
+ If an input path is not a regular file or a directory,
+ "action" specifies how it is to be processed. Valid values
+ are "read" (the default) or "skip" (silently skip the path).
+
+ -d action, --directories=action
+ If an input path is a directory, "action" specifies how it is
+ to be processed. Valid values are "read" (the default in
+ non-Windows environments, for compatibility with GNU grep),
+ "recurse" (equivalent to the -r option), or "skip" (silently
+ skip the path, the default in Windows environments). In the
+ "read" case, directories are read as if they were ordinary
+ files. In some operating systems the effect of reading a
+ directory like this is an immediate end-of-file; in others it
+ may provoke an error.
+
+ -e pattern, --regex=pattern, --regexp=pattern
+ Specify a pattern to be matched. This option can be used mul-
+ tiple times in order to specify several patterns. It can also
+ be used as a way of specifying a single pattern that starts
+ with a hyphen. When -e is used, no argument pattern is taken
+ from the command line; all arguments are treated as file
+ names. There is no limit to the number of patterns. They are
+ applied to each line in the order in which they are defined
+ until one matches.
+
+ If -f is used with -e, the command line patterns are matched
+ first, followed by the patterns from the file(s), independent
+ of the order in which these options are specified. Note that
+ multiple use of -e is not the same as a single pattern with
+ alternatives. For example, X|Y finds the first character in a
+ line that is X or Y, whereas if the two patterns are given
+ separately, with X first, pcre2grep finds X if it is present,
+ even if it follows Y in the line. It finds Y only if there is
+ no X in the line. This matters only if you are using -o or
+ --colo(u)r to show the part(s) of the line that matched.
+
+ --exclude=pattern
+ Files (but not directories) whose names match the pattern are
+ skipped without being processed. This applies to all files,
+ whether listed on the command line, obtained from --file-
+ list, or by scanning a directory. The pattern is a PCRE2 reg-
+ ular expression, and is matched against the final component
+ of the file name, not the entire path. The -F, -w, and -x
+ options do not apply to this pattern. The option may be given
+ any number of times in order to specify multiple patterns. If
+ a file name matches both an --include and an --exclude pat-
+ tern, it is excluded. There is no short form for this option.
+
+ --exclude-from=filename
+ Treat each non-empty line of the file as the data for an
+ --exclude option. What constitutes a newline when reading the
+ file is the operating system's default. The --newline option
+ has no effect on this option. This option may be given more
+ than once in order to specify a number of files to read.
+
+ --exclude-dir=pattern
+ Directories whose names match the pattern are skipped without
+ being processed, whatever the setting of the --recursive
+ option. This applies to all directories, whether listed on
+ the command line, obtained from --file-list, or by scanning a
+ parent directory. The pattern is a PCRE2 regular expression,
+ and is matched against the final component of the directory
+ name, not the entire path. The -F, -w, and -x options do not
+ apply to this pattern. The option may be given any number of
+ times in order to specify more than one pattern. If a direc-
+ tory matches both --include-dir and --exclude-dir, it is
+ excluded. There is no short form for this option.
+
+ -F, --fixed-strings
+ Interpret each data-matching pattern as a list of fixed
+ strings, separated by newlines, instead of as a regular
+ expression. What constitutes a newline for this purpose is
+ controlled by the --newline option. The -w (match as a word)
+ and -x (match whole line) options can be used with -F. They
+ apply to each of the fixed strings. A line is selected if any
+ of the fixed strings are found in it (subject to -w or -x, if
+ present). This option applies only to the patterns that are
+ matched against the contents of files; it does not apply to
+ patterns specified by any of the --include or --exclude
+ options.
+
+ -f filename, --file=filename
+ Read patterns from the file, one per line, and match them
+ against each line of input. What constitutes a newline when
+ reading the file is the operating system's default. The
+ --newline option has no effect on this option. Trailing white
+ space is removed from each line, and blank lines are ignored.
+ An empty file contains no patterns and therefore matches
+ nothing. See also the comments about multiple patterns versus
+ a single pattern with alternatives in the description of -e
+ above.
+
+ If this option is given more than once, all the specified
+ files are read. A data line is output if any of the patterns
+ match it. A filename can be given as "-" to refer to the
+ standard input. When -f is used, patterns specified on the
+ command line using -e may also be present; they are tested
+ before the file's patterns. However, no other pattern is
+ taken from the command line; all arguments are treated as the
+ names of paths to be searched.
+
+ --file-list=filename
+ Read a list of files and/or directories that are to be
+ scanned from the given file, one per line. Trailing white
+ space is removed from each line, and blank lines are ignored.
+ These paths are processed before any that are listed on the
+ command line. The filename can be given as "-" to refer to
+ the standard input. If --file and --file-list are both spec-
+ ified as "-", patterns are read first. This is useful only
+ when the standard input is a terminal, from which further
+ lines (the list of files) can be read after an end-of-file
+ indication. If this option is given more than once, all the
+ specified files are read.
+
+ --file-offsets
+ Instead of showing lines or parts of lines that match, show
+ each match as an offset from the start of the file and a
+ length, separated by a comma. In this mode, no context is
+ shown. That is, the -A, -B, and -C options are ignored. If
+ there is more than one match in a line, each of them is shown
+ separately. This option is mutually exclusive with --line-
+ offsets and --only-matching.
+
+ -H, --with-filename
+ Force the inclusion of the filename at the start of output
+ lines when searching a single file. By default, the filename
+ is not shown in this case. For matching lines, the filename
+ is followed by a colon; for context lines, a hyphen separator
+ is used. If a line number is also being output, it follows
+ the file name.
+
+ -h, --no-filename
+ Suppress the output filenames when searching multiple files.
+ By default, filenames are shown when multiple files are
+ searched. For matching lines, the filename is followed by a
+ colon; for context lines, a hyphen separator is used. If a
+ line number is also being output, it follows the file name.
+
+ --help Output a help message, giving brief details of the command
+ options and file type support, and then exit. Anything else
+ on the command line is ignored.
+
+ -I Treat binary files as never matching. This is equivalent to
+ --binary-files=without-match.
+
+ -i, --ignore-case
+ Ignore upper/lower case distinctions during comparisons.
+
+ --include=pattern
+ If any --include patterns are specified, the only files that
+ are processed are those that match one of the patterns (and
+ do not match an --exclude pattern). This option does not
+ affect directories, but it applies to all files, whether
+ listed on the command line, obtained from --file-list, or by
+ scanning a directory. The pattern is a PCRE2 regular expres-
+ sion, and is matched against the final component of the file
+ name, not the entire path. The -F, -w, and -x options do not
+ apply to this pattern. The option may be given any number of
+ times. If a file name matches both an --include and an
+ --exclude pattern, it is excluded. There is no short form
+ for this option.
+
+ --include-from=filename
+ Treat each non-empty line of the file as the data for an
+ --include option. What constitutes a newline for this purpose
+ is the operating system's default. The --newline option has
+ no effect on this option. This option may be given any number
+ of times; all the files are read.
+
+ --include-dir=pattern
+ If any --include-dir patterns are specified, the only direc-
+ tories that are processed are those that match one of the
+ patterns (and do not match an --exclude-dir pattern). This
+ applies to all directories, whether listed on the command
+ line, obtained from --file-list, or by scanning a parent
+ directory. The pattern is a PCRE2 regular expression, and is
+ matched against the final component of the directory name,
+ not the entire path. The -F, -w, and -x options do not apply
+ to this pattern. The option may be given any number of times.
+ If a directory matches both --include-dir and --exclude-dir,
+ it is excluded. There is no short form for this option.
+
+ -L, --files-without-match
+ Instead of outputting lines from the files, just output the
+ names of the files that do not contain any lines that would
+ have been output. Each file name is output once, on a sepa-
+ rate line.
+
+ -l, --files-with-matches
+ Instead of outputting lines from the files, just output the
+ names of the files containing lines that would have been out-
+ put. Each file name is output once, on a separate line.
+ Searching normally stops as soon as a matching line is found
+ in a file. However, if the -c (count) option is also used,
+ matching continues in order to obtain the correct count, and
+ those files that have at least one match are listed along
+ with their counts. Using this option with -c is a way of sup-
+ pressing the listing of files with no matches.
+
+ --label=name
+ This option supplies a name to be used for the standard input
+ when file names are being output. If not supplied, "(standard
+ input)" is used. There is no short form for this option.
+
+ --line-buffered
+ When this option is given, input is read and processed line
+ by line, and the output is flushed after each write. By
+ default, input is read in large chunks, unless pcre2grep can
+ determine that it is reading from a terminal (which is cur-
+ rently possible only in Unix-like environments). Output to
+ terminal is normally automatically flushed by the operating
+ system. This option can be useful when the input or output is
+ attached to a pipe and you do not want pcre2grep to buffer up
+ large amounts of data. However, its use will affect perfor-
+ mance, and the -M (multiline) option ceases to work.
+
+ --line-offsets
+ Instead of showing lines or parts of lines that match, show
+ each match as a line number, the offset from the start of the
+ line, and a length. The line number is terminated by a colon
+ (as usual; see the -n option), and the offset and length are
+ separated by a comma. In this mode, no context is shown.
+ That is, the -A, -B, and -C options are ignored. If there is
+ more than one match in a line, each of them is shown sepa-
+ rately. This option is mutually exclusive with --file-offsets
+ and --only-matching.
+
+ --locale=locale-name
+ This option specifies a locale to be used for pattern match-
+ ing. It overrides the value in the LC_ALL or LC_CTYPE envi-
+ ronment variables. If no locale is specified, the PCRE2
+ library's default (usually the "C" locale) is used. There is
+ no short form for this option.
+
+ --match-limit=number
+ Processing some regular expression patterns can require a
+ very large amount of memory, leading in some cases to a pro-
+ gram crash if not enough is available. Other patterns may
+ take a very long time to search for all possible matching
+ strings. The pcre2_exec() function that is called by
+ pcre2grep to do the matching has two parameters that can
+ limit the resources that it uses.
+
+ The --match-limit option provides a means of limiting
+ resource usage when processing patterns that are not going to
+ match, but which have a very large number of possibilities in
+ their search trees. The classic example is a pattern that
+ uses nested unlimited repeats. Internally, PCRE2 uses a func-
+ tion called match() which it calls repeatedly (sometimes
+ recursively). The limit set by --match-limit is imposed on
+ the number of times this function is called during a match,
+ which has the effect of limiting the amount of backtracking
+ that can take place.
+
+ The --recursion-limit option is similar to --match-limit, but
+ instead of limiting the total number of times that match() is
+ called, it limits the depth of recursive calls, which in turn
+ limits the amount of memory that can be used. The recursion
+ depth is a smaller number than the total number of calls,
+ because not all calls to match() are recursive. This limit is
+ of use only if it is set smaller than --match-limit.
+
+ There are no short forms for these options. The default set-
+ tings are specified when the PCRE2 library is compiled, with
+ the default default being 10 million.
+
+ -M, --multiline
+ Allow patterns to match more than one line. When this option
+ is given, patterns may usefully contain literal newline char-
+ acters and internal occurrences of ^ and $ characters. The
+ output for a successful match may consist of more than one
+ line, the last of which is the one in which the match ended.
+ If the matched string ends with a newline sequence the output
+ ends at the end of that line.
+
+ When this option is set, the PCRE2 library is called in "mul-
+ tiline" mode. There is a limit to the number of lines that
+ can be matched, imposed by the way that pcre2grep buffers the
+ input file as it scans it. However, pcre2grep ensures that at
+ least 8K characters or the rest of the document (whichever is
+ the shorter) are available for forward matching, and simi-
+ larly the previous 8K characters (or all the previous charac-
+ ters, if fewer than 8K) are guaranteed to be available for
+ lookbehind assertions. This option does not work when input
+ is read line by line (see --line-buffered.)
+
+ -N newline-type, --newline=newline-type
+ The PCRE2 library supports five different conventions for
+ indicating the ends of lines. They are the single-character
+ sequences CR (carriage return) and LF (linefeed), the two-
+ character sequence CRLF, an "anycrlf" convention, which rec-
+ ognizes any of the preceding three types, and an "any" con-
+ vention, in which any Unicode line ending sequence is assumed
+ to end a line. The Unicode sequences are the three just men-
+ tioned, plus VT (vertical tab, U+000B), FF (form feed,
+ U+000C), NEL (next line, U+0085), LS (line separator,
+ U+2028), and PS (paragraph separator, U+2029).
+
+ When the PCRE2 library is built, a default line-ending
+ sequence is specified. This is normally the standard
+ sequence for the operating system. Unless otherwise specified
+ by this option, pcre2grep uses the library's default. The
+ possible values for this option are CR, LF, CRLF, ANYCRLF, or
+ ANY. This makes it possible to use pcre2grep to scan files
+ that have come from other environments without having to mod-
+ ify their line endings. If the data that is being scanned
+ does not agree with the convention set by this option,
+ pcre2grep may behave in strange ways. Note that this option
+ does not apply to files specified by the -f, --exclude-from,
+ or --include-from options, which are expected to use the
+ operating system's standard newline sequence.
+
+ -n, --line-number
+ Precede each output line by its line number in the file, fol-
+ lowed by a colon for matching lines or a hyphen for context
+ lines. If the filename is also being output, it precedes the
+ line number. This option is forced if --line-offsets is used.
+
+ --no-jit If the PCRE2 library is built with support for just-in-time
+ compiling (which speeds up matching), pcre2grep automatically
+ makes use of this, unless it was explicitly disabled at build
+ time. This option can be used to disable the use of JIT at
+ run time. It is provided for testing and working round prob-
+ lems. It should never be needed in normal use.
+
+ -o, --only-matching
+ Show only the part of the line that matched a pattern instead
+ of the whole line. In this mode, no context is shown. That
+ is, the -A, -B, and -C options are ignored. If there is more
+ than one match in a line, each of them is shown separately.
+ If -o is combined with -v (invert the sense of the match to
+ find non-matching lines), no output is generated, but the
+ return code is set appropriately. If the matched portion of
+ the line is empty, nothing is output unless the file name or
+ line number are being printed, in which case they are shown
+ on an otherwise empty line. This option is mutually exclusive
+ with --file-offsets and --line-offsets.
+
+ -onumber, --only-matching=number
+ Show only the part of the line that matched the capturing
+ parentheses of the given number. Up to 32 capturing parenthe-
+ ses are supported, and -o0 is equivalent to -o without a num-
+ ber. Because these options can be given without an argument
+ (see above), if an argument is present, it must be given in
+ the same shell item, for example, -o3 or --only-matching=2.
+ The comments given for the non-argument case above also apply
+ to this case. If the specified capturing parentheses do not
+ exist in the pattern, or were not set in the match, nothing
+ is output unless the file name or line number are being
+ printed.
+
+ If this option is given multiple times, multiple substrings
+ are output, in the order the options are given. For example,
+ -o3 -o1 -o3 causes the substrings matched by capturing paren-
+ theses 3 and 1 and then 3 again to be output. By default,
+ there is no separator (but see the next option).
+
+ --om-separator=text
+ Specify a separating string for multiple occurrences of -o.
+ The default is an empty string. Separating strings are never
+ coloured.
+
+ -q, --quiet
+ Work quietly, that is, display nothing except error messages.
+ The exit status indicates whether or not any matches were
+ found.
+
+ -r, --recursive
+ If any given path is a directory, recursively scan the files
+ it contains, taking note of any --include and --exclude set-
+ tings. By default, a directory is read as a normal file; in
+ some operating systems this gives an immediate end-of-file.
+ This option is a shorthand for setting the -d option to
+ "recurse".
+
+ --recursion-limit=number
+ See --match-limit above.
+
+ -s, --no-messages
+ Suppress error messages about non-existent or unreadable
+ files. Such files are quietly skipped. However, the return
+ code is still 2, even if matches were found in other files.
+
+ -u, --utf-8
+ Operate in UTF-8 mode. This option is available only if PCRE2
+ has been compiled with UTF-8 support. All patterns (including
+ those for any --exclude and --include options) and all sub-
+ ject lines that are scanned must be valid strings of UTF-8
+ characters.
+
+ -V, --version
+ Write the version numbers of pcre2grep and the PCRE2 library
+ to the standard output and then exit. Anything else on the
+ command line is ignored.
+
+ -v, --invert-match
+ Invert the sense of the match, so that lines which do not
+ match any of the patterns are the ones that are found.
+
+ -w, --word-regex, --word-regexp
+ Force the patterns to match only whole words. This is equiva-
+ lent to having \b at the start and end of the pattern. This
+ option applies only to the patterns that are matched against
+ the contents of files; it does not apply to patterns speci-
+ fied by any of the --include or --exclude options.
+
+ -x, --line-regex, --line-regexp
+ Force the patterns to be anchored (each must start matching
+ at the beginning of a line) and in addition, require them to
+ match entire lines. This is equivalent to having ^ and $
+ characters at the start and end of each alternative branch in
+ every pattern. This option applies only to the patterns that
+ are matched against the contents of files; it does not apply
+ to patterns specified by any of the --include or --exclude
+ options.
+
+
+ENVIRONMENT VARIABLES
+
+ The environment variables LC_ALL and LC_CTYPE are examined, in that
+ order, for a locale. The first one that is set is used. This can be
+ overridden by the --locale option. If no locale is set, the PCRE2
+ library's default (usually the "C" locale) is used.
+
+
+NEWLINES
+
+ The -N (--newline) option allows pcre2grep to scan files with different
+ newline conventions from the default. Any parts of the input files that
+ are written to the standard output are copied identically, with what-
+ ever newline sequences they have in the input. However, the setting of
+ this option does not affect the interpretation of files specified by
+ the -f, --exclude-from, or --include-from options, which are assumed to
+ use the operating system's standard newline sequence, nor does it
+ affect the way in which pcre2grep writes informational messages to the
+ standard error and output streams. For these it uses the string "\n" to
+ indicate newlines, relying on the C I/O library to convert this to an
+ appropriate sequence.
+
+
+OPTIONS COMPATIBILITY
+
+ Many of the short and long forms of pcre2grep's options are the same as
+ in the GNU grep program. Any long option of the form --xxx-regexp (GNU
+ terminology) is also available as --xxx-regex (PCRE2 terminology). How-
+ ever, the --file-list, --file-offsets, --include-dir, --line-offsets,
+ --locale, --match-limit, -M, --multiline, -N, --newline, --om-separa-
+ tor, --recursion-limit, -u, and --utf-8 options are specific to
+ pcre2grep, as is the use of the --only-matching option with a capturing
+ parentheses number.
+
+ Although most of the common options work the same way, a few are dif-
+ ferent in pcre2grep. For example, the --include option's argument is a
+ glob for GNU grep, but a regular expression for pcre2grep. If both the
+ -c and -l options are given, GNU grep lists only file names, without
+ counts, but pcre2grep gives the counts.
+
+
+OPTIONS WITH DATA
+
+ There are four different ways in which an option with data can be spec-
+ ified. If a short form option is used, the data may follow immedi-
+ ately, or (with one exception) in the next command line item. For exam-
+ ple:
+
+ -f/some/file
+ -f /some/file
+
+ The exception is the -o option, which may appear with or without data.
+ Because of this, if data is present, it must follow immediately in the
+ same item, for example -o3.
+
+ If a long form option is used, the data may appear in the same command
+ line item, separated by an equals character, or (with two exceptions)
+ it may appear in the next command line item. For example:
+
+ --file=/some/file
+ --file /some/file
+
+ Note, however, that if you want to supply a file name beginning with ~
+ as data in a shell command, and have the shell expand ~ to a home
+ directory, you must separate the file name from the option, because the
+ shell does not treat ~ specially unless it is at the start of an item.
+
+ The exceptions to the above are the --colour (or --color) and --only-
+ matching options, for which the data is optional. If one of these
+ options does have data, it must be given in the first form, using an
+ equals character. Otherwise pcre2grep will assume that it has no data.
+
+
+MATCHING ERRORS
+
+ It is possible to supply a regular expression that takes a very long
+ time to fail to match certain lines. Such patterns normally involve
+ nested indefinite repeats, for example: (a+)*\d when matched against a
+ line of a's with no final digit. The PCRE2 matching function has a
+ resource limit that causes it to abort in these circumstances. If this
+ happens, pcre2grep outputs an error message and the line that caused
+ the problem to the standard error stream. If there are more than 20
+ such errors, pcre2grep gives up.
+
+ The --match-limit option of pcre2grep can be used to set the overall
+ resource limit; there is a second option called --recursion-limit that
+ sets a limit on the amount of memory (usually stack) that is used (see
+ the discussion of these options above).
+
+
+DIAGNOSTICS
+
+ Exit status is 0 if any matches were found, 1 if no matches were found,
+ and 2 for syntax errors, overlong lines, non-existent or inaccessible
+ files (even if matches were found in other files) or too many matching
+ errors. Using the -s option to suppress error messages about inaccessi-
+ ble files does not affect the return code.
+
+
+SEE ALSO
+
+ pcre2pattern(3), pcre2syntax(3), pcre2test(1).
+
+
+AUTHOR
+
+ Philip Hazel
+ University Computing Service
+ Cambridge CB2 3QH, England.
+
+
+REVISION
+
+ Last updated: 28 September 2014
+ Copyright (c) 1997-2014 University of Cambridge.
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index f85f25e..71022ac 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -2997,7 +2997,7 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
}
if (do_colour)
{
- char *cs = getenv("pcre2grep_COLOUR");
+ char *cs = getenv("PCRE2GREP_COLOUR");
if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
if (cs != NULL) colour_string = cs;
}
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 19fadd1..1aa40b4 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -5094,9 +5094,9 @@ printf(" 32-bit support\n");
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
if (rc != 0)
- printf(" UTF support (Unicode version %s)\n", uversion);
+ printf(" UTF and UCP support (Unicode version %s)\n", uversion);
else
- printf(" No UTF support\n");
+ printf(" No UTF or UCP support\n");
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
if (rc != 0)
{