Documentation scripts

git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@79 6239d852-aaf2-0410-a92c-79f79f948069
author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2014-09-23 11:35:51 +0000
committer: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2014-09-23 11:35:51 +0000
commit: fd8438eb9b6bec69a456b69a7dece77aadc06a36 (patch)
tree: b0f09f3d92934ea3ad0570599c861891cf360362
parent: cf3d2f48e3a1281a47cd544cfd2457b8342037f9 (diff)
download: pcre2-fd8438eb9b6bec69a456b69a7dece77aadc06a36.tar.gz
19 files changed, 10412 insertions, 6 deletions
diff --git a/132html b/132html
new file mode 100755
index 0000000..85baab9
--- /dev/null
+++ b/132html
@@ -0,0 +1,313 @@
+#! /usr/bin/perl -w
+
+# Script to turn PCRE2 man pages into HTML
+
+
+# Subroutine to handle font changes and other escapes
+
+sub do_line {
+my($s) = $_[0];
+
+$s =~ s/</&#60;/g;                   # Deal with < and >
+$s =~ s/>/&#62;/g;
+$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
+$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
+$s =~ s"\\e"\\"g;
+$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
+$s;
+}
+
+# Subroutine to ensure not in a paragraph
+
+sub end_para {
+if ($inpara)
+  {
+  print TEMP "</PRE>\n" if ($inpre);
+  print TEMP "</P>\n";
+  }
+$inpara = $inpre = 0;
+$wrotetext = 0;
+}
+
+# Subroutine to start a new paragraph
+
+sub new_para {
+&end_para();
+print TEMP "<P>\n";
+$inpara = 1;
+}
+
+
+# Main program
+
+$innf = 0;
+$inpara = 0;
+$inpre = 0;
+$wrotetext = 0;
+$toc = 0;
+$ref = 1;
+
+while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
+  {
+  $toc = 1 if $ARGV[0] eq "-toc";
+  shift;
+  }
+
+# Initial output to STDOUT
+
+print <<End ;
+<html>
+<head>
+<title>$ARGV[0] specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>$ARGV[0] man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+End
+
+print "<ul>\n" if ($toc);
+
+open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
+
+while (<STDIN>)
+  {
+  # Handle lines beginning with a dot
+
+  if (/^\./)
+    {
+    # Some of the PCRE2 man pages used to contain instances of .br. However,
+    # they should have all been removed because they cause trouble in some
+    # (other) automated systems that translate man pages to HTML. Complain if
+    # we find .br or .in (another macro that is deprecated).
+
+    if (/^\.br/ || /^\.in/)
+      {
+      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
+      print STDERR "*** $_\n";
+      die "*** Processing abandoned\n";
+      }
+
+    # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
+
+    elsif (/^\.nf/)
+      {
+      $innf = 1;
+      }
+
+    elsif (/^\.fi/)
+      {
+      $innf = 0;
+      }
+
+    # Handling .sp is subtle. If it is inside a literal section, do nothing if
+    # the next line is a non literal text line; similarly, if not inside a
+    # literal section, do nothing if a literal follows, unless we are inside
+    # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
+    # literal sections will do the spacing. Always skip if no previous output.
+
+    elsif (/^\.sp/)
+      {
+      if ($wrotetext)
+        {
+        $_ = <STDIN>;
+        if ($inpre)
+          {
+          print TEMP "\n" if (/^[\s.]/);
+          }
+        else
+          {
+          print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
+          }
+        redo;    # Now process the lookahead line we just read
+        }
+      }
+    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
+      {
+      &new_para();
+      }
+    elsif (/^\.SH\s*("?)(.*)\1/)
+      {
+      # Ignore the NAME section
+      if ($2 =~ /^NAME\b/)
+        {
+        <STDIN>;
+        next;
+        }
+
+      &end_para();
+      my($title) = &do_line($2);
+      if ($toc)
+        {
+        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
+          $ref, $ref);
+        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
+          $ref, $ref);
+        $ref++;
+        }
+      else
+        {
+        print TEMP "<br><b>\n$title\n</b><br>\n";
+        }
+      }
+    elsif (/^\.SS\s*("?)(.*)\1/)
+      {
+      &end_para();
+      my($title) = &do_line($2);
+      print TEMP "<br><b>\n$title\n</b><br>\n";
+      }
+    elsif (/^\.B\s*(.*)/)
+      {
+      &new_para() if (!$inpara);
+      $_ = &do_line($1);
+      s/"(.*?)"/$1/g;
+      print TEMP "<b>$_</b>\n";
+      $wrotetext = 1;
+      }
+    elsif (/^\.I\s*(.*)/)
+      {
+      &new_para() if (!$inpara);
+      $_ = &do_line($1);
+      s/"(.*?)"/$1/g;
+      print TEMP "<i>$_</i>\n";
+      $wrotetext = 1;
+      }
+
+    # A comment that starts "HREF" takes the next line as a name that
+    # is turned into a hyperlink, using the text given, which might be
+    # in a special font. If it ends in () or (digits) or punctuation, they
+    # aren't part of the link.
+
+    elsif (/^\.\\"\s*HREF/)
+      {
+      $_=<STDIN>;
+      chomp;
+      $_ = &do_line($_);
+      $_ =~ s/\s+$//;
+      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
+      print TEMP "<a href=\"$1.html\">$_</a>\n";
+      }
+
+    # A comment that starts "HTML" inserts literal HTML
+
+    elsif (/^\.\\"\s*HTML\s*(.*)/)
+      {
+      print TEMP $1;
+      }
+
+    # A comment that starts < inserts that HTML at the end of the
+    # *next* input line - so as not to get a newline between them.
+
+    elsif (/^\.\\"\s*(<.*>)/)
+      {
+      my($markup) = $1;
+      $_=<STDIN>;
+      chomp;
+      $_ = &do_line($_);
+      $_ =~ s/\s+$//;
+      print TEMP "$_$markup\n";
+      }
+
+    # A comment that starts JOIN joins the next two lines together, with one
+    # space between them. Then that line is processed. This is used in some
+    # displays where two lines are needed for the "man" version. JOINSH works
+    # the same, except that it assumes this is a shell command, so removes
+    # continuation backslashes.
+
+    elsif (/^\.\\"\s*JOIN(SH)?/)
+      {
+      my($one,$two);
+      $one = <STDIN>;
+      $two = <STDIN>;
+      $one =~ s/\s*\\e\s*$// if (defined($1));
+      chomp($one);
+      $two =~ s/^\s+//;
+      $_ = "$one $two";
+      redo;            # Process the joined lines
+      }
+
+    # .EX/.EE are used in the pcredemo page to bracket the entire program,
+    # which is unmodified except for turning backslash into "\e".
+
+    elsif (/^\.EX\s*$/)
+      {
+      print TEMP "<PRE>\n";
+      while (<STDIN>)
+        {
+        last if /^\.EE\s*$/;
+        s/\\e/\\/g;
+        s/&/&amp;/g;
+        s/</&lt;/g;
+        s/>/&gt;/g;
+        print TEMP;
+        }
+      }
+
+    # Ignore anything not recognized
+
+    next;
+    }
+
+  # Line does not begin with a dot. Replace blank lines with new paragraphs
+
+  if (/^\s*$/)
+    {
+    &end_para() if ($wrotetext);
+    next;
+    }
+
+  # Convert fonts changes and output an ordinary line. Ensure that indented
+  # lines are marked as literal.
+
+  $_ = &do_line($_);
+  &new_para() if (!$inpara);
+
+  if (/^\s/)
+    {
+    if (!$inpre)
+      {
+      print TEMP "<pre>\n";
+      $inpre = 1;
+      }
+    }
+  elsif ($inpre)
+    {
+    print TEMP "</pre>\n";
+    $inpre = 0;
+    }
+
+  # Add <br> to the end of a non-literal line if we are within .nf/.fi
+
+  $_ .= "<br>\n" if (!$inpre && $innf);
+
+  print TEMP;
+  $wrotetext = 1;
+  }
+
+# The TOC, if present, will have been written - terminate it
+
+print "</ul>\n" if ($toc);
+
+# Copy the remainder to the standard output
+
+close(TEMP);
+open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
+
+print while (<TEMP>);
+
+print <<End ;
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+End
+
+close(TEMP);
+unlink("/tmp/$$");
+
+# End
diff --git a/CheckMan b/CheckMan
new file mode 100755
index 0000000..5686746
--- /dev/null
+++ b/CheckMan
@@ -0,0 +1,67 @@
+#! /usr/bin/perl
+
+# A script to scan PCRE2's man pages to check for typos in the control
+# sequences. I use only a small set of the available repertoire, so it is 
+# straightforward to check that nothing else has slipped in by mistake. This
+# script should be called in the doc directory.
+
+$yield = 0;
+
+while (scalar(@ARGV) > 0)
+  {
+  $line = 0; 
+  $file = shift @ARGV;
+    
+  open (IN, $file) || die "Failed to open $file\n";
+  
+  while (<IN>)
+    {  
+    $line++; 
+    if (/^\s*$/)
+      {
+      printf "Empty line $line of $file\n";
+      $yield = 1;  
+      }   
+    elsif (/^\./)
+      {
+      if (!/^\.\s*$|
+            ^\.B\s+\S| 
+            ^\.TH\s\S|
+            ^\.SH\s\S|
+            ^\.SS\s\S|
+            ^\.TP(?:\s?\d+)?\s*$|
+            ^\.SM\s*$|
+            ^\.br\s*$| 
+            ^\.rs\s*$| 
+            ^\.sp\s*$| 
+            ^\.nf\s*$| 
+            ^\.fi\s*$| 
+            ^\.P\s*$| 
+            ^\.PP\s*$| 
+            ^\.\\"(?:\ HREF)?\s*$|
+            ^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
+            ^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
+            ^\.\\"\s<\/a>\s*$|
+            ^\.\\"\sJOINSH\s*$|
+            ^\.\\"\sJOIN\s*$/x  
+         )
+        {
+        printf "Bad control line $line of $file\n";
+        $yield = 1;
+        }
+      }
+    else
+      {
+      if (/\\[^ef]|\\f[^IBP]/)
+        {
+        printf "Bad backslash in line $line of $file\n";  
+        $yield = 1; 
+        } 
+      }   
+    }
+     
+  close(IN);   
+  }
+  
+exit $yield;
+# End  
diff --git a/CleanTxt b/CleanTxt
new file mode 100755
index 0000000..1f42519
--- /dev/null
+++ b/CleanTxt
@@ -0,0 +1,113 @@
+#! /usr/bin/perl -w
+
+# Script to take the output of nroff -man and remove all the backspacing and
+# the page footers and the screen commands etc so that it is more usefully
+# readable online. In fact, in the latest nroff, intermediate footers don't
+# seem to be generated any more.
+
+$blankcount = 0;
+$lastwascut = 0;
+$firstheader = 1;
+
+# Input on STDIN; output to STDOUT.
+
+while (<STDIN>)
+  {
+  s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
+  s/.\x8//g;         # Remove "char, backspace"
+
+  # Handle header lines. Retain only the first one we encounter, but remove
+  # the blank line that follows. Any others (e.g. at end of document) and the
+  # following blank line are dropped.
+
+  if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
+    {
+    if ($firstheader)
+      {
+      $firstheader = 0;
+      print;
+      $lastprinted = $_;
+      $lastwascut = 0;
+      }
+    $_=<STDIN>;       # Remove a blank that follows
+    next;
+    }
+
+  # Count runs of empty lines
+
+  if (/^\s*$/)
+    {
+    $blankcount++;
+    $lastwascut = 0;
+    next;
+    }
+
+  # If a chunk of lines has been cut out (page footer) and the next line
+  # has a different indentation, put back one blank line.
+
+  if ($lastwascut && $blankcount < 1 && defined($lastprinted))
+    {
+    ($a) = $lastprinted =~ /^(\s*)/;
+    ($b) = $_ =~ /^(\s*)/;
+    $blankcount++ if ($a ne $b);
+    }
+
+  # We get here only when we have a non-blank line in hand. If it was preceded
+  # by 3 or more blank lines, read the next 3 lines and see if they are blank.
+  # If so, remove all 7 lines, and remember that we have just done a cut.
+
+  if ($blankcount >= 3)
+    {
+    for ($i = 0; $i < 3; $i++)
+      {
+      $next[$i] = <STDIN>;
+      $next[$i] = "" if !defined $next[$i];
+      $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
+      $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
+      }
+
+    # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
+
+    if ($next[0] =~ /^\s*$/ &&
+        $next[1] =~ /^\s*$/ &&
+        $next[2] =~ /^\s*$/)
+      {
+      $blankcount -= 3;
+      $lastwascut = 1;
+      }
+
+    # Otherwise output the saved blanks, the current, and the next three
+    # lines. Remember the last printed line.
+
+    else
+      {
+      for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
+      print;
+      for ($i = 0; $i < 3; $i++)
+        {
+        $next[$i] =~ s/.\x8//g;
+        print $next[$i];
+        $lastprinted = $_;
+        }
+      $lastwascut = 0;
+      $blankcount = 0;
+      }
+    }
+
+  # This non-blank line is not preceded by 3 or more blank lines. Output
+  # any blanks there are, and the line. Remember it. Force two blank lines
+  # before headings.
+
+  else
+    {
+    $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
+      defined($lastprinted);
+    for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
+    print;
+    $lastprinted = $_;
+    $lastwascut = 0;
+    $blankcount = 0;
+    }
+  }
+
+# End
diff --git a/Detrail b/Detrail
new file mode 100755
index 0000000..1c5c7e9
--- /dev/null
+++ b/Detrail
@@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+# This is a script for removing trailing whitespace from lines in files that
+# are listed on the command line.
+
+# This subroutine does the work for one file.
+
+sub detrail {
+my($file) = $_[0];
+my($changed) = 0;
+open(IN, "$file") || die "Can't open $file for input";
+@lines = <IN>;
+close(IN);
+foreach (@lines)
+  {
+  if (/\s+\n$/)
+    {
+    s/\s+\n$/\n/;
+    $changed = 1;
+    }
+  }
+if ($changed)
+  {
+  open(OUT, ">$file") || die "Can't open $file for output";
+  print OUT @lines;
+  close(OUT);
+  }
+}
+
+# This is the main program
+
+$, = "";   # Output field separator
+for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
+
+# End
diff --git a/PrepareRelease b/PrepareRelease
new file mode 100755
index 0000000..c92d7f9
--- /dev/null
+++ b/PrepareRelease
@@ -0,0 +1,265 @@
+#/bin/sh
+
+# Script to prepare the files for building a PCRE2 release. It does some
+# processing of the documentation, detrails files, and creates pcre2.h.generic
+# and config.h.generic (for use by builders who can't run ./configure).
+
+# You must run this script before runnning "make dist". If its first argument
+# is "doc", it stops after preparing the documentation. There are no other
+# arguments. The script makes use of the following files:
+
+# 132html     A Perl script that converts a .1 or .3 man page into HTML. It
+#             "knows" the relevant troff constructs that are used in the PCRE2
+#             man pages.
+
+# CheckMan    A Perl script that checks man pages for typos in the mark up.
+
+# CleanTxt    A Perl script that cleans up the output of "nroff -man" by
+#             removing backspaces and other redundant text so as to produce
+#             a readable .txt file.
+
+# Detrail     A Perl script that removes trailing spaces from files.
+
+# doc/index.html.src
+#             A file that is copied as index.html into the doc/html directory
+#             when the HTML documentation is built. It works like this so that
+#             doc/html can be deleted and re-created from scratch.
+
+# README & NON-AUTOTOOLS-BUILD
+#             These files are copied into the doc/html directory, with .txt
+#             extensions so that they can by hyperlinked from the HTML 
+#             documentation, because some people just go to the HTML without
+#             looking for text files.
+
+
+# First, sort out the documentation. Remove pcre2demo.3 first because it won't
+# pass the markup check (it is created below, using markup that none of the
+# other pages use).
+
+cd doc
+echo Processing documentation
+
+/bin/rm -f pcre2demo.3
+
+# Check the remaining man pages
+
+perl ../CheckMan *.1 *.3
+if [ $? != 0 ] ; then exit 1; fi
+
+# Make Text form of the documentation. It needs some mangling to make it
+# tidy for online reading. Concatenate all the .3 stuff, but omit the
+# individual function pages.
+
+cat <<End >pcre2.txt
+-----------------------------------------------------------------------------
+This file contains a concatenation of the PCRE2 man pages, converted to plain
+text format for ease of searching with a text editor, or for use on systems
+that do not have a man page processor. The small individual files that give
+synopses of each function in the library have not been included. Neither has
+the pcre2demo program. There are separate text files for the pcre2grep and
+pcre2test commands.
+-----------------------------------------------------------------------------
+
+
+End
+
+echo "Making pcre2.txt"
+for file in pcre2api pcre2callout pcre2unicode ; do
+
+#for file in pcre pcre16 pcre32 pcrebuild pcrematching \
+#            pcrecompat pcrepattern pcresyntax pcrejit pcrepartial \
+#            pcreprecompile pcreperform pcreposix pcrecpp pcresample \
+#            pcrelimits pcrestack ; do
+ 
+  echo "  Processing $file.3"
+  nroff -c -man $file.3 >$file.rawtxt
+  perl ../CleanTxt <$file.rawtxt >>pcre2.txt
+  /bin/rm $file.rawtxt
+  echo "------------------------------------------------------------------------------" >>pcre2.txt
+  if [ "$file" != "pcre2sample" ] ; then
+    echo " " >>pcre2.txt
+    echo " " >>pcre2.txt
+  fi
+done
+
+# The three commands
+for file in pcre2test ; do
+# for file in pcre2test pcre2grep pcre-config ; do
+  echo Making $file.txt
+  nroff -c -man $file.1 >$file.rawtxt
+  perl ../CleanTxt <$file.rawtxt >$file.txt
+  /bin/rm $file.rawtxt
+done
+
+
+# Make pcre2demo.3 from the pcre2demo.c source file
+
+echo "Making pcre2demo.3"
+perl <<"END" >pcre2demo.3
+  open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
+  open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
+  print OUT ".\\\" Start example.\n" .
+            ".de EX\n" .
+            ".  nr mE \\\\n(.f\n" .
+            ".  nf\n" .
+            ".  nh\n" .
+            ".  ft CW\n" .
+            "..\n" .
+            ".\n" .
+            ".\n" .
+            ".\\\" End example.\n" .
+            ".de EE\n" .
+            ".  ft \\\\n(mE\n" .
+            ".  fi\n" .
+            ".  hy \\\\n(HY\n" .
+            "..\n" .
+            ".\n" .
+            ".EX\n" ;
+  while (<IN>)
+    {
+    s/\\/\\e/g;
+    print OUT;
+    }
+  print OUT ".EE\n";
+  close(IN);
+  close(OUT);
+END
+if [ $? != 0 ] ; then exit 1; fi
+
+
+# Make HTML form of the documentation.
+
+echo "Making HTML documentation"
+/bin/rm html/*
+cp index.html.src html/index.html
+cp ../README html/README.txt
+# cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
+
+for file in *.1 ; do
+  base=`basename $file .1`
+  echo "  Making $base.html"
+  perl ../132html -toc $base <$file >html/$base.html
+done
+
+# Exclude table of contents for function summaries. It seems that expr
+# forces an anchored regex. Also exclude them for small pages that have
+# only one section.
+
+for file in *.3 ; do
+  base=`basename $file .3`
+  toc=-toc
+  if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
+  if [ "$base" = "pcre2sample" ]  || \
+     [ "$base" = "pcre2stack" ]   || \
+     [ "$base" = "pcre2compat" ]  || \
+     [ "$base" = "pcre2limits" ]  || \
+     [ "$base" = "pcre2perform" ] || \
+     [ "$base" = "pcre2unicode" ] ; then
+    toc=""
+  fi
+  echo "  Making $base.html"
+  perl ../132html $toc $base <$file >html/$base.html
+  if [ $? != 0 ] ; then exit 1; fi
+done
+
+# End of documentation processing; stop if only documentation required.
+
+cd ..
+echo Documentation done
+if [ "$1" = "doc" ] ; then exit; fi
+
+# FIXME pro tem only do docs
+exit
+
+# These files are detrailed; do not detrail the test data because there may be
+# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
+# line endings and the detrail script removes all trailing white space. The
+# configure files are also omitted from the detrailing. We don't bother with
+# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
+# common file, because they aren't going to change.
+
+files="\
+  Makefile.am \
+  Makefile.in \
+  configure.ac \
+  README \
+  LICENCE \
+  COPYING \
+  AUTHORS \
+  NEWS \
+  NON-UNIX-USE \
+  NON-AUTOTOOLS-BUILD \
+  INSTALL \
+  132html \
+  CleanTxt \
+  Detrail \
+  ChangeLog \
+  CMakeLists.txt \
+  RunGrepTest \
+  RunTest \
+  pcre-config.in \
+  libpcre.pc.in \
+  libpcre16.pc.in \
+  libpcre32.pc.in \
+  libpcreposix.pc.in \
+  libpcrecpp.pc.in \
+  config.h.in \
+  pcre_chartables.c.dist \
+  pcredemo.c \
+  pcregrep.c \
+  pcretest.c \
+  dftables.c \
+  pcreposix.c \
+  pcreposix.h \
+  pcre.h.in \
+  pcre_internal.h \
+  pcre_byte_order.c \
+  pcre_compile.c \
+  pcre_config.c \
+  pcre_dfa_exec.c \
+  pcre_exec.c \
+  pcre_fullinfo.c \
+  pcre_get.c \
+  pcre_globals.c \
+  pcre_jit_compile.c \
+  pcre_jit_test.c \
+  pcre_maketables.c \
+  pcre_newline.c \
+  pcre_ord2utf8.c \
+  pcre16_ord2utf16.c \
+  pcre32_ord2utf32.c \
+  pcre_printint.c \
+  pcre_refcount.c \
+  pcre_string_utils.c \
+  pcre_study.c \
+  pcre_tables.c \
+  pcre_valid_utf8.c \
+  pcre_version.c \
+  pcre_xclass.c \
+  pcre16_utf16_utils.c \
+  pcre32_utf32_utils.c \
+  pcre16_valid_utf16.c \
+  pcre32_valid_utf32.c \
+  pcre_scanner.cc \
+  pcre_scanner.h \
+  pcre_scanner_unittest.cc \
+  pcrecpp.cc \
+  pcrecpp.h \
+  pcrecpparg.h.in \
+  pcrecpp_unittest.cc \
+  pcre_stringpiece.cc \
+  pcre_stringpiece.h.in \
+  pcre_stringpiece_unittest.cc \
+  perltest.pl \
+  ucp.h \
+  makevp.bat \
+  pcre.def \
+  libpcre.def \
+  libpcreposix.def"
+
+echo Detrailing
+perl ./Detrail $files doc/p* doc/html/*
+
+echo Done
+
+#End
diff --git a/doc/html/README.txt b/doc/html/README.txt
new file mode 100644
index 0000000..7ad597a
--- /dev/null
+++ b/doc/html/README.txt
@@ -0,0 +1 @@
+This is a placeholder README file for a work in progress.
diff --git a/doc/html/index.html b/doc/html/index.html
new file mode 100644
index 0000000..4e264ec
--- /dev/null
+++ b/doc/html/index.html
@@ -0,0 +1,177 @@
+<html>
+<!-- This is a manually maintained file that is the root of the HTML version of 
+     the PCRE2 documentation. When the HTML documents are built from the man 
+     page versions, the entire doc/html directory is emptied, this file is then 
+     copied into doc/html/index.html, and the remaining files therein are 
+     created by the 132html script.
+-->      
+<head>
+<title>PCRE2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
+<p>
+The HTML documentation for PCRE2 consists of a number of pages that are listed
+below in alphabetical order. If you are new to PCRE2, please read the first one
+first.
+</p>
+
+<table>
+<tr><td><a href="pcre2.html">pcre</a></td>
+    <td>&nbsp;&nbsp;Introductory page</td></tr>
+
+<tr><td><a href="pcre2-config.html">pcre-config</a></td>
+    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
+
+<tr><td><a href="pcre2api.html">pcreapi</a></td>
+    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
+
+<tr><td><a href="pcre2build.html">pcrebuild</a></td>
+    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
+
+<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
+    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
+
+<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
+    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
+
+<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
+    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
+
+<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
+
+<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
+<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
+    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
+
+<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
+    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
+
+<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
+    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
+
+<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
+    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
+
+<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
+    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
+
+<tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
+    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
+
+<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
+    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
+
+<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
+    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
+
+<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
+    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
+
+<tr><td><a href="pcre2test.html">pcre2test</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
+
+<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
+</table>
+
+<p>
+There are also individual pages that summarize the interface for each function
+in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
+functions.
+</p>
+
+<table>    
+
+<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
+    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
+
+<tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
+
+<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
+    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
+
+<tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
+    <td>&nbsp;&nbsp;Free study data</td></tr>
+
+<tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
+    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
+
+<tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
+    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
+
+<tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
+    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
+
+<tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
+    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
+
+<tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
+    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
+
+<tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
+    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
+
+<tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
+    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
+    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
+    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
+
+<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
+    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
+    
+<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
+    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_study.html">pcre2_study</a></td>
+    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_version.html">pcre2_version</a></td>
+    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
+</table>
+
+</html>
+
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
new file mode 100644
index 0000000..dd95b4c
--- /dev/null
+++ b/doc/html/pcre2api.html
@@ -0,0 +1,2659 @@
+<html>
+<head>
+<title>pcre2api specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2api man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">PCRE2 NATIVE API BASIC FUNCTIONS</a>
+<li><a name="TOC2" href="#SEC2">PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS</a>
+<li><a name="TOC3" href="#SEC3">PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS</a>
+<li><a name="TOC4" href="#SEC4">PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS</a>
+<li><a name="TOC5" href="#SEC5">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a>
+<li><a name="TOC6" href="#SEC6">PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS</a>
+<li><a name="TOC7" href="#SEC7">PCRE2 NATIVE API JIT FUNCTIONS</a>
+<li><a name="TOC8" href="#SEC8">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
+<li><a name="TOC9" href="#SEC9">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
+<li><a name="TOC10" href="#SEC10">PCRE2 API OVERVIEW</a>
+<li><a name="TOC11" href="#SEC11">NEWLINES</a>
+<li><a name="TOC12" href="#SEC12">MULTITHREADING</a>
+<li><a name="TOC13" href="#SEC13">PCRE2 CONTEXTS</a>
+<li><a name="TOC14" href="#SEC14">CHECKING BUILD-TIME OPTIONS</a>
+<li><a name="TOC15" href="#SEC15">COMPILING A PATTERN</a>
+<li><a name="TOC16" href="#SEC16">COMPILATION ERROR CODES</a>
+<li><a name="TOC17" href="#SEC17">JUST-IN-TIME (JIT) COMPILATION</a>
+<li><a name="TOC18" href="#SEC18">LOCALE SUPPORT</a>
+<li><a name="TOC19" href="#SEC19">INFORMATION ABOUT A COMPILED PATTERN</a>
+<li><a name="TOC20" href="#SEC20">THE MATCH DATA BLOCK</a>
+<li><a name="TOC21" href="#SEC21">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
+<li><a name="TOC22" href="#SEC22">NEWLINE HANDLING WHEN MATCHING</a>
+<li><a name="TOC23" href="#SEC23">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
+<li><a name="TOC24" href="#SEC24">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
+<li><a name="TOC25" href="#SEC25">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
+<li><a name="TOC26" href="#SEC26">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
+<li><a name="TOC27" href="#SEC27">DUPLICATE SUBPATTERN NAMES</a>
+<li><a name="TOC28" href="#SEC28">FINDING ALL POSSIBLE MATCHES</a>
+<li><a name="TOC29" href="#SEC29">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
+<li><a name="TOC30" href="#SEC30">SEE ALSO</a>
+<li><a name="TOC31" href="#SEC31">AUTHOR</a>
+<li><a name="TOC32" href="#SEC32">REVISION</a>
+</ul>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+<br>
+<br>
+PCRE2 is a new API for PCRE. This document contains a description of all its
+functions. See the
+<a href="pcre2.html"><b>pcre2</b></a>
+document for an overview of all the PCRE2 documentation.
+</P>
+<br><a name="SEC1" href="#TOC1">PCRE2 NATIVE API BASIC FUNCTIONS</a><br>
+<P>
+<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
+<b>  uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
+<b>  pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
+<br>
+<br>
+<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_match_data_create_from_pattern(pcre2_code *<i>code</i>,</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>,</b>
+<b>  int *<i>workspace</i>, PCRE2_SIZE <i>wscount</i>);</b>
+<br>
+<br>
+<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<br><a name="SEC2" href="#TOC1">PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS</a><br>
+<P>
+<b>PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<br><a name="SEC3" href="#TOC1">PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS</a><br>
+<P>
+<b>pcre2_general_context *pcre2_general_context_create(</b>
+<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
+<br>
+<br>
+<b>pcre2_general_context *pcre2_general_context_copy(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
+</P>
+<br><a name="SEC4" href="#TOC1">PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS</a><br>
+<P>
+<b>pcre2_compile_context *pcre2_compile_context_create(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_compile_context *pcre2_compile_context_copy(</b>
+<b>  pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_bsr_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  const unsigned char *<i>tables</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_newline_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  int (*<i>guard_function</i>)(uint32_t));</b>
+</P>
+<br><a name="SEC5" href="#TOC1">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a><br>
+<P>
+<b>pcre2_match_context *pcre2_match_context_create(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_match_context *pcre2_match_context_copy(</b>
+<b>  pcre2_match_context *<i>mcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_bsr_match(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
+<b>  void *<i>callout_data</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_newline_match(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_recursion_limit(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_recursion_memory_management(</b>
+<b>  pcre2_match_context *<i>mcontext</i>,</b>
+<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
+</P>
+<br><a name="SEC6" href="#TOC1">PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
+<P>
+<b>int pcre2_substring_copy_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_UCHAR *<i>buffer</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_copy_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
+<b>  PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>void pcre2_substring_free(PCRE2_UCHAR *<i>buffer</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_get_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_get_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_UCHAR **<i>bufferptr</i>,</b>
+<b>  PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_length_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_SIZE *<i>length</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>);</b>
+<br>
+<br>
+<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
+<b>"  PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
+</P>
+<br><a name="SEC7" href="#TOC1">PCRE2 NATIVE API JIT FUNCTIONS</a><br>
+<P>
+<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
+<br>
+<br>
+<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>, pcre2_jit_stack *<i>jit_stack</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
+<b>  PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_stack_assign(const pcre2_code *<i>code</i>,</b>
+<b>  pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
+</P>
+<br><a name="SEC8" href="#TOC1">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a><br>
+<P>
+<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
+<b>  PCRE2_SIZE <i>bufflen</i>);</b>
+<br>
+<br>
+<b>const unsigned char *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
+<br>
+<br>
+<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>, PCRE2_SIZE <i>length</i>);</b>
+</P>
+<br><a name="SEC9" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
+<P>
+There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
+units, respectively. However, there is just one header file, <b>pcre2.h</b>.
+This contains the function prototypes and other definitions for all three
+libraries. One, two, or all three can be installed simultaneously. On Unix-like
+systems the libraries are called <b>libpcre2-8</b>, <b>libpcre2-16</b>, and
+<b>libpcre2-32</b>, and they can also co-exist with the original PCRE libraries.
+</P>
+<P>
+Character strings are passed to and from a PCRE2 library as a sequence of
+unsigned integers in code units of the appropriate width. Every PCRE2 function
+comes in three different forms, one for each library, for example:
+<pre>
+  <b>pcre2_compile_8()</b>
+  <b>pcre2_compile_16()</b>
+  <b>pcre2_compile_32()</b>
+</pre>
+There are also three different sets of data types:
+<pre>
+  <b>PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32</b>
+  <b>PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32</b>
+</pre>
+The UCHAR types define unsigned code units of the appropriate widths. For
+example, PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR types are
+constant pointers to the equivalent UCHAR types, that is, they are pointers to
+vectors of unsigned code units.
+</P>
+<P>
+Many applications use only one code unit width. For their convenience, macros
+are defined whose names are the generic forms such as <b>pcre2_compile()</b> and
+PCRE2_SPTR. These macros use the value of the macro PCRE2_CODE_UNIT_WIDTH to
+generate the appropriate width-specific function and macro names.
+PCRE2_CODE_UNIT_WIDTH is not defined by default. An application must define it
+to be 8, 16, or 32 before including <b>pcre2.h</b> in order to make use of the
+generic names.
+</P>
+<P>
+Applications that use more than one code unit width can be linked with more
+than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to be 0 before
+including <b>pcre2.h</b>, and then use the real function names. Any code that is
+to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is
+unknown should also use the real function names. (Unfortunately, it is not
+possible in C code to save and restore the value of a macro.)
+</P>
+<P>
+If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a 
+compiler error occurs.
+</P>
+<P>
+When using multiple libraries in an application, you must take care when
+processing any particular pattern to use only functions from a single library.
+For example, if you want to run a match using a pattern that was compiled with
+<b>pcre2_compile_16()</b>, you must do so with <b>pcre2_match_16()</b>, not
+<b>pcre2_match_8()</b>.
+</P>
+<P>
+In the function summaries above, and in the rest of this document and other
+PCRE2 documents, functions and data types are described using their generic
+names, without the 8, 16, or 32 suffix.
+</P>
+<br><a name="SEC10" href="#TOC1">PCRE2 API OVERVIEW</a><br>
+<P>
+PCRE2 has its own native API, which is described in this document. There are
+also some wrapper functions for the 8-bit library that correspond to the
+POSIX regular expression API, but they do not give access to all the
+functionality. They are described in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+documentation. Both these APIs define a set of C function calls.
+</P>
+<P>
+The native API C data types, function prototypes, option values, and error
+codes are defined in the header file <b>pcre2.h</b>, which contains definitions
+of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers for the
+library. Applications can use these to include support for different releases
+of PCRE2.
+</P>
+<P>
+In a Windows environment, if you want to statically link an application program
+against a non-dll PCRE2 library, you must define PCRE2_STATIC before including
+<b>pcre2.h</b>.
+</P>
+<P>
+The functions <b>pcre2_compile()</b>, and <b>pcre2_match()</b> are used for
+compiling and matching regular expressions in a Perl-compatible manner. A
+sample program that demonstrates the simplest way of using them is provided in
+the file called <i>pcre2demo.c</i> in the PCRE2 source distribution. A listing
+of this program is given in the
+<a href="pcre2demo.html"><b>pcre2demo</b></a>
+documentation, and the
+<a href="pcre2sample.html"><b>pcre2sample</b></a>
+documentation describes how to compile and run it.
+</P>
+<P>
+Just-in-time compiler support is an optional feature of PCRE2 that can be built
+in appropriate hardware environments. It greatly speeds up the matching
+performance of many patterns. Programs can request that it be used if
+available, by calling <b>pcre2_jit_compile()</b> after a pattern has been
+successfully compiled by <b>pcre2_compile()</b>. This does nothing if JIT
+support is not available.
+</P>
+<P>
+More complicated programs might need to make use of the specialist functions
+<b>pcre2_jit_stack_alloc()</b>, <b>pcre2_jit_stack_free()</b>, and
+<b>pcre2_jit_stack_assign()</b> in order to control the JIT code's memory usage.
+</P>
+<P>
+JIT matching is automatically used by <b>pcre2_match()</b> if it is available.
+There is also a direct interface for JIT matching, which gives improved
+performance. The JIT-specific functions are discussed in the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation.
+</P>
+<P>
+A second matching function, <b>pcre2_dfa_exec()</b>, which is not
+Perl-compatible, is also provided. This uses a different algorithm for the
+matching. The alternative algorithm finds all possible matches (at a given
+point in the subject), and scans the subject just once (unless there are
+lookbehind assertions). However, this algorithm does not return captured
+substrings. A description of the two matching algorithms and their advantages
+and disadvantages is given in the
+<a href="pcre2matching.html"><b>pcre2matching</b></a>
+documentation. There is no JIT support for <b>pcre2_dfa_match()</b>.
+</P>
+<P>
+In addition to the main compiling and matching functions, there are convenience
+functions for extracting captured substrings from a subject string that is
+matched by <b>pcre2_match()</b>. They are:
+<pre>
+  <b>pcre2_substring_copy_byname()</b>
+  <b>pcre2_substring_copy_bynumber()</b>
+  <b>pcre2_substring_get_byname()</b>
+  <b>pcre2_substring_get_bynumber()</b>
+  <b>pcre2_substring_list_get()</b>
+  <b>pcre2_substring_length_byname()</b>
+  <b>pcre2_substring_length_bynumber()</b>
+  <b>pcre2_substring_nametable_scan()</b>
+  <b>pcre2_substring_number_from_name()</b>
+</pre>
+<b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
+provided, to free the memory used for extracted strings.
+</P>
+<P>
+There are functions for finding out information about a compiled pattern
+(<b>pcre2_pattern_info()</b>) and about the configuration with which PCRE2 was
+built (<b>pcre2_config()</b>).
+<a name="newlines"></a></P>
+<br><a name="SEC11" href="#TOC1">NEWLINES</a><br>
+<P>
+PCRE2 supports five different conventions for indicating line breaks in
+strings: a single CR (carriage return) character, a single LF (linefeed)
+character, the two-character sequence CRLF, any of the three preceding, or any
+Unicode newline sequence. The Unicode newline sequences are the three just
+mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
+</P>
+<P>
+Each of the first three conventions is used by at least one operating system as
+its standard newline sequence. When PCRE2 is built, a default can be specified.
+The default default is LF, which is the Unix standard. When PCRE2 is run, the
+default can be overridden, either when a pattern is compiled, or when it is
+matched.
+</P>
+<P>
+The newline convention can be changed when calling <b>pcre2_compile()</b>, or it
+can be specified by special text at the start of the pattern itself; this
+overrides any other settings. See the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+page for details of the special character sequences.
+</P>
+<P>
+In the PCRE2 documentation the word "newline" is used to mean "the character or
+pair of characters that indicate a line break". The choice of newline
+convention affects the handling of the dot, circumflex, and dollar
+metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
+recognized line ending sequence, the match position advancement for a
+non-anchored pattern. There is more detail about this in the
+<a href="#matchoptions">section on <b>pcre2_match()</b> options</a>
+below.
+</P>
+<P>
+The choice of newline convention does not affect the interpretation of
+the \n or \r escape sequences, nor does it affect what \R matches, which has 
+its own separate control.
+</P>
+<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
+<P>
+In a multithreaded application it is important to keep thread-specific data
+separate from data that can be shared between threads. The PCRE2 library code
+itself is thread-safe: it contains no static or global variables. The API is
+designed to be fairly simple for non-threaded applications while at the same
+time ensuring that multithreaded applications can use it.
+</P>
+<P>
+There are several different blocks of data that are used to pass information
+between the application and the PCRE libraries.
+</P>
+<P>
+(1) A pointer to the compiled form of a pattern is returned to the user when
+<b>pcre2_compile()</b> is successful. The data in the compiled pattern is fixed,
+and does not change when the pattern is matched. Therefore, it is thread-safe,
+that is, the same compiled pattern can be used by more than one thread
+simultaneously. An application can compile all its patterns at the start,
+before forking off multiple threads that use them. However, if the just-in-time
+optimization feature is being used, it needs separate memory stack areas for
+each thread. See the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation for more details.
+</P>
+<P>
+(2) The next section below introduces the idea of "contexts" in which PCRE2
+functions are called. A context is nothing more than a collection of parameters
+that control the way PCRE2 operates. Grouping a number of parameters together
+in a context is a convenient way of passing them to a PCRE2 function without
+using lots of arguments. The parameters that are stored in contexts are in some
+sense "advanced features" of the API. Many straightforward applications will
+not need to use contexts.
+</P>
+<P>
+In a multithreaded application, if the parameters in a context are values that
+are never changed, the same context can be used by all the threads. However, if
+any thread needs to change any value in a context, it must make its own
+thread-specific copy.
+</P>
+<P>
+(3) The matching functions need a block of memory for working space and for
+storing the results of a match. This includes details of what was matched, as
+well as additional information such as the name of a (*MARK) setting. Each
+thread must provide its own version of this memory.
+</P>
+<br><a name="SEC13" href="#TOC1">PCRE2 CONTEXTS</a><br>
+<P>
+Some PCRE2 functions have a lot of parameters, many of which are used only by
+specialist applications, for example, those that use custom memory management
+or non-standard character tables. To keep function argument lists at a
+reasonable size, and at the same time to keep the API extensible, "uncommon"
+parameters are passed to certain functions in a <b>context</b> instead of
+directly. A context is just a block of memory that holds the parameter values.
+Applications that do not need to adjust any of the context parameters can pass
+NULL when a context pointer is required.
+</P>
+<P>
+There are three different types of context: a general context that is relevant
+for several PCRE2 operations, a compile-time context, and a match-time context.
+</P>
+<br><b>
+The general context
+</b><br>
+<P>
+At present, this context just contains pointers to (and data for) external
+memory management functions that are called from several places in the PCRE2
+library. The context is named `general' rather than specifically `memory'
+because in future other fields may be added. If you do not want to supply your
+own custom memory management functions, you do not need to bother with a
+general context. A general context is created by:
+<b>pcre2_general_context *pcre2_general_context_create(</b>
+<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
+<br>
+<br>
+The two function pointers specify custom memory management functions, whose
+prototypes are:
+<pre>
+  <b>void *private_malloc(PCRE2_SIZE, void *);</b>
+  <b>void  private_free(void *, void *);</b>
+</pre>
+Whenever code in PCRE2 calls these functions, the final argument is the value
+of <i>memory_data</i>. Either of the first two arguments of the creation
+function may be NULL, in which case the system memory management functions
+<i>malloc()</i> and <i>free()</i> are used. (This is not currently useful, as
+there are no other fields in a general context, but in future there might be.)
+The <i>private_malloc()</i> function is used (if supplied) to obtain memory for
+storing the context, and all three values are saved as part of the context.
+</P>
+<P>
+Whenever PCRE2 creates a data block of any kind, the block contains a pointer
+to the <i>free()</i> function that matches the <i>malloc()</i> function that was
+used. When the time comes to free the block, this function is called.
+</P>
+<P>
+A general context can be copied by calling:
+<b>pcre2_general_context *pcre2_general_context_copy(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+The memory used for a general context should be freed by calling:
+<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
+<a name="compilecontext"></a></P>
+<br><b>
+The compile context
+</b><br>
+<P>
+A compile context is required if you want to change the default values of any 
+of the following compile-time parameters:
+<pre>
+  What \R matches (Unicode newlines or CR, LF, CRLF only);
+  PCRE2's character tables;
+  The newline character sequence;
+  The compile time nested parentheses limit;
+  An external function for stack checking.
+</pre>
+A compile context is also required if you are using custom memory management. 
+If none of these apply, just pass NULL as the context argument of
+<i>pcre2_compile()</i>.
+</P>
+<P>
+A compile context is created, copied, and freed by the following functions:
+<b>pcre2_compile_context *pcre2_compile_context_create(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_compile_context *pcre2_compile_context_copy(</b>
+<b>  pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+A compile context is created with default values for its parameters. These can 
+be changed by calling the following functions, which return 0 on success, or 
+PCRE2_ERROR_BADDATA if invalid data is detected.
+<b>int pcre2_set_bsr_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, 
+or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line 
+ending sequence. The value of this parameter does not affect what is compiled; 
+it is just saved with the compiled pattern. The value is used by the JIT
+compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and 
+<i>pcre2_dfa_match()</i>. You can change the value when calling these functions, 
+but doing so disables the use of JIT.
+<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  const unsigned char *<i>tables</i>);</b>
+<br>
+<br>
+The value must be the result of a call to <i>pcre2_maketables()</i>, whose only 
+argument is a general context. This function builds a set of character tables
+in the current locale.
+<b>int pcre2_set_newline_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+This specifies which characters or character sequences are to be recognized as 
+newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
+PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character 
+sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
+PCRE2_NEWLINE_ANY (any Unicode newline sequence).
+</P>
+<P>
+When a pattern is compiled with the PCRE2_EXTENDED option, the value of this
+parameter affects the recognition of white space and the end of internal
+comments starting with #. The value is saved with the compiled pattern for
+subsequent use by the JIT compiler and by the two interpreted matching
+functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>. You can change the
+value when calling these functions, but doing so disables the use of JIT.
+<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
+depth of parenthesis nesting in a pattern. This limit stops rogue patterns
+using up too much system stack when being compiled.
+<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  int (*<i>guard_function</i>)(uint32_t));</b>
+<br>
+<br>
+There is at least one application that runs PCRE2 in threads with very limited
+system stack, where running out of stack is to be avoided at all costs. The 
+parenthesis limit above cannot take account of how much stack is actually
+available. For a finer control, you can supply a function that is called
+whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
+pattern. The argument to the function gives the current depth of nesting. The
+function should return zero if all is well, or non-zero to force an error.
+<a name="matchcontext"></a></P>
+<br><b>
+The match context
+</b><br>
+<P>
+A match context is required if you want to change the default values of any 
+of the following match-time parameters:
+<pre>
+  What \R matches (Unicode newlines or CR, LF, CRLF only);
+  A callout function;
+  The limit for calling <i>match()</i>;  
+  The limit for calling <i>match()</i> recursively;
+  The newline character sequence;
+</pre>
+A match context is also required if you are using custom memory management. 
+If none of these apply, just pass NULL as the context argument of 
+<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
+Changing the newline value or what \R matches at match time disables the use 
+of JIT via <b>pcre2_match()</b>.  
+</P>
+<P>
+A match context is created, copied, and freed by the following functions:
+<b>pcre2_match_context *pcre2_match_context_create(</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_match_context *pcre2_match_context_copy(</b>
+<b>  pcre2_match_context *<i>mcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
+<br>
+<br>
+A match context is created with default values for its parameters. These can 
+be changed by calling the following functions, which return 0 on success, or 
+PCRE2_ERROR_BADDATA if invalid data is detected.
+<b>int pcre2_set_bsr_match(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, 
+or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line 
+ending sequence. If you want to make use of JIT matching, you should not use 
+this function, but instead set the value in a compile context.
+<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
+<b>  void *<i>callout_data</i>);</b>
+<br>
+<br>
+This sets up a "callout" function, which PCRE2 will call at specified points
+during a matching operation. Details are given in the
+<a href="pcre2callout.html"><b>pcre2callout</b></a>
+documentation.
+<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
+up too many resources when processing patterns that are not going to match, but
+which have a very large number of possibilities in their search trees. The
+classic example is a pattern that uses nested unlimited repeats.
+</P>
+<P>
+Internally, <b>pcre2_match()</b> uses a function called <b>match()</b>, which it
+calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
+imposed on the number of times this function is called during a match, which
+has the effect of limiting the amount of backtracking that can take place. For
+patterns that are not anchored, the count restarts from zero for each position
+in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>, 
+which ignores it.
+</P>
+<P>
+When <b>pcre2_match()</b> is called with a pattern that was successfully studied
+with <b>pcre2_jit_compile()</b>, the way that the matching is executed is
+entirely different. However, there is still the possibility of runaway matching
+that goes on for a very long time, and so the <i>match_limit</i> value is also
+used in this case (but in a different way) to limit how long the matching can
+continue.
+</P>
+<P>
+The default value for the limit can be set when PCRE2 is built; the default
+default is 10 million, which handles all but the most extreme cases. If the
+limit is exceeded, <b>pcre2_match()</b> returns PCRE2_ERROR_MATCHLIMIT. A value
+for the match limit may also be supplied by an item at the start of a pattern
+of the form
+<pre>
+  (*LIMIT_MATCH=ddd)
+</pre>
+where ddd is a decimal number. However, such a setting is ignored unless ddd is
+less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
+limit is set, less than the default.
+<b>int pcre2_set_recursion_limit(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+The <i>recursion_limit</i> parameter is similar to <i>match_limit</i>, but
+instead of limiting the total number of times that <b>match()</b> is called, it
+limits the depth of recursion. The recursion depth is a smaller number than the
+total number of calls, because not all calls to <b>match()</b> are recursive.
+This limit is of use only if it is set smaller than <i>match_limit</i>.
+</P>
+<P>
+Limiting the recursion depth limits the amount of system stack that can be
+used, or, when PCRE2 has been compiled to use memory on the heap instead of the
+stack, the amount of heap memory that can be used. This limit is not relevant,
+and is ignored, when matching is done using JIT compiled code or by the 
+<b>pcre2_dfa_match()</b> function.
+</P>
+<P>
+The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
+default default is the same value as the default for <i>match_limit</i>. If the
+limit is exceeded, <b>pcre2_match()</b> returns PCRE2_ERROR_RECURSIONLIMIT. A
+value for the recursion limit may also be supplied by an item at the start of a
+pattern of the form
+<pre>
+  (*LIMIT_RECURSION=ddd)
+</pre>
+where ddd is a decimal number. However, such a setting is ignored unless ddd is
+less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
+limit is set, less than the default.
+<b>int pcre2_set_newline_match(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+<br>
+<br>
+This specifies which characters or character sequences are to be recognized as
+newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
+PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
+sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
+PCRE2_NEWLINE_ANY (any Unicode newline sequence). If you want to make use of
+JIT matching, you should not use this function, but instead set the value in a
+compile context.
+<b>int pcre2_set_recursion_memory_management(</b>
+<b>  pcre2_match_context *<i>mcontext</i>,</b>
+<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
+<br>
+<br>
+This function sets up two additional custom memory management functions for use 
+by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
+backtracking data, instead of recursive function calls that use the system 
+stack. There is a discussion about PCRE2's stack usage in the
+<a href="pcre2stack.html"><b>pcre2stack</b></a>
+documentation. See the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+documentation for details of how to build PCRE2. Using the heap for recursion
+is a non-standard way of building PCRE2, for use in environments that have
+limited stacks. Because of the greater use of memory management,
+<b>pcre2_match()</b> runs more slowly. Functions that are different to the
+general custom memory functions are provided so that special-purpose external
+code can be used for this case, because the memory blocks are all the same
+size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
+so that they can be re-used when possible during the match. In the absence of 
+these functions, the normal custom memory management functions are used, if
+supplied, otherwise the system functions.
+</P>
+<br><a name="SEC14" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
+<P>
+<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>, PCRE2_SIZE <i>length</i>);</b>
+</P>
+<P>
+The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to
+discover which optional features have been compiled into the PCRE2 library. The
+<a href="pcre2build.html"><b>pcre2build</b></a>
+documentation has more details about these optional features.
+</P>
+<P>
+The first argument for <b>pcre2_config()</b> specifies which information is
+required. The second argument is a pointer to memory into which the information
+is placed, with the final argument giving the length of this memory in bytes.
+For calls that return numerical values, <i>where</i> should point to
+appropriately aligned memory, with <i>length</i> set to at least the "sizeof"
+the data type.
+</P>
+<P>
+The returned value from <b>pcre2_config()</b> is zero on success, or the
+negative error code PCRE2_ERROR_BADOPTION if the value in the first argument is
+not recognized. The following information is available:
+<pre>
+  PCRE2_CONFIG_BSR
+</pre>
+The output is an integer whose value indicates what character sequences the \R
+escape sequence matches by default. A value of 0 means that \R matches any
+Unicode line ending sequence; a value of 1 means that \R matches only CR, LF,
+or CRLF. The default can be overridden when a pattern is compiled or matched.
+<pre>
+  PCRE2_CONFIG_JIT
+</pre>
+The output is an integer that is set to one if support for just-in-time
+compiling is available; otherwise it is set to zero.
+<pre>
+  PCRE2_CONFIG_JITTARGET
+</pre>
+FIXME: this needs sorting out once JIT is implemented.
+If JIT support is available, the string contains the name of the architecture
+for which the JIT compiler is configured, for example "x86 32bit (little endian
++ unaligned)". If JIT support is not available, FIXME.
+<pre>
+  PCRE2_CONFIG_LINKSIZE
+</pre>
+The output is an integer that contains the number of bytes used for internal
+linkage in compiled regular expressions. When PCRE2 is configured, the value
+can be set to 2, 3, or 4, with the default being 2. This is the value that is 
+returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled, 
+a value of 3 is rounded up to 4, and when the 32-bit library is compiled, 
+internal linkages always use 4 bytes, so the configured value is not relevant.
+</P>
+<P>
+The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
+but the most massive patterns, since it allows the size of the compiled pattern
+to be up to 64K code units. Larger values allow larger regular expressions to
+be compiled by those two libraries, but at the expense of slower matching.
+<pre>
+  PCRE2_CONFIG_MATCHLIMIT
+</pre>
+The output is an unsigned long integer that gives the default limit for the
+number of internal matching function calls in a <b>pcre2_match()</b> execution.
+Further details are given with <b>pcre2_match()</b> below.
+<pre>
+  PCRE2_CONFIG_NEWLINE
+</pre>
+The output is an integer whose value specifies the default character sequence
+that is recognized as meaning "newline". The values are:
+<pre>
+  1  Carriage return (CR)
+  2  Linefeed (LF)
+  3  Carriage return, linefeed (CRLF)
+  4  Any Unicode line ending
+  5  Any of CR, LF, or CRLF
+</pre>
+The default should normally correspond to the standard sequence for your
+operating system.
+<pre>
+  PCRE2_CONFIG_PARENSLIMIT
+</pre>
+The output is an unsigned long integer that gives the maximum depth of nesting
+of parentheses (of any kind) in a pattern. This limit is imposed to cap the
+amount of system stack used when a pattern is compiled. It is specified when
+PCRE2 is built; the default is 250. This limit does not take into account the
+stack that may already be used by the calling application. For finer control
+over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
+<pre>
+  PCRE2_CONFIG_RECURSIONLIMIT
+</pre>
+The output is an unsigned long integer that gives the default limit for the
+depth of recursion when calling the internal matching function in a
+<b>pcre2_match()</b> execution. Further details are given with
+<b>pcre2_match()</b> below.
+<pre>
+  PCRE2_CONFIG_STACKRECURSE
+</pre>
+The output is an integer that is set to one if internal recursion when running
+<b>pcre2_match()</b> is implemented by recursive function calls that use the
+system stack to remember their state. This is the usual way that PCRE2 is
+compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
+heap instead of recursive function calls.
+<pre>
+  PCRE2_CONFIG_UNICODE_VERSION
+</pre>
+The <i>where</i> argument should point to a buffer that is at least 24 code
+units long. If PCRE2 has been compiled without Unicode support, this is filled
+with the text "Unicode not supported". Otherwise, the Unicode version string
+(for example, "7.0.0") is returnd. The string is zero-terminated.
+<pre>
+  PCRE2_CONFIG_UNICODE
+</pre>
+The output is an integer that is set to one if Unicode support is available;
+otherwise it is set to zero. Unicode support implies UTF support.
+<pre>
+  PCRE2_CONFIG_VERSION
+</pre>
+The <i>where</i> argument should point to a buffer that is at least 12 code 
+units long. It is filled with the PCRE2 version string, zero-terminated.   
+</P>
+<br><a name="SEC15" href="#TOC1">COMPILING A PATTERN</a><br>
+<P>
+<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
+<b>  uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
+<b>  pcre2_compile_context *<i>ccontext</i>);</b>
+<br>
+<br>
+<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
+</P>
+<P>
+This function compiles a pattern, defined by a pointer to a string of code 
+units and a length, into an internal form. If the pattern is zero-terminated, 
+the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a 
+pointer to a block of memory that contains the compiled pattern and related 
+data. The caller must free the memory by calling <b>pcre2_code_free()</b> when 
+it is no longer needed.
+</P>
+<P>
+If the compile context argument <i>ccontext</i> is NULL, the memory is obtained 
+by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory 
+function that was used for the compile context.
+</P>
+<P>
+The <i>options</i> argument contains various bit settings that affect the
+compilation. It should be zero if no options are required. The available
+options are described below. Some of them (in particular, those that are
+compatible with Perl, but some others as well) can also be set and unset from
+within the pattern (see the detailed description in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation). 
+</P>
+<P>
+For those options that can be different in different parts of the pattern, the
+contents of the <i>options</i> argument specifies their settings at the start of
+compilation. The PCRE2_ANCHORED, PCRE2_NO_UTF_CHECK, and
+PCRE2_NO_START_OPTIMIZE options can be set at the time of matching as well as
+at compile time.
+</P>
+<P>
+Other, less frequently required compile-time parameters (for example, the 
+newline setting) can be provided in a compile context (as described
+<a href="#compilecontext">above).</a>
+</P>
+<P>
+If <i>errorcode</i> or <i>erroroffset</i> is NULL, <b>pcre2_compile()</b> returns
+NULL immediately. Otherwise, if compilation of a pattern fails,
+<b>pcre2_compile()</b> returns NULL, having set these variables to an error code
+and an offset (number of code units) within the pattern, respectively. The
+<b>pcre2_get_error_message()</b> function provides a textual message for each
+error code. Compilation errors are positive numbers, but UTF formatting errors
+are negative numbers. For an invalid UTF-8 or UTF-16 string, the offset is that
+of the first code unit of the failing character.
+</P>
+<P>
+Some errors are not detected until the whole pattern has been scanned; in these
+cases, the offset passed back is the length of the pattern. Note that the
+offset is in code units, not characters, even in a UTF mode. It may sometimes
+point into the middle of a UTF-8 or UTF-16 character.
+</P>
+<P>
+This code fragment shows a typical straightforward call to
+<b>pcre2_compile()</b>:
+<pre>
+  pcre2_code *re;
+  PCRE2_SIZE erroffset;
+  int errorcode; 
+  re = pcre2_compile(
+    "^A.*Z",                /* the pattern */
+    PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */ 
+    0,                      /* default options */
+    &errorcode,             /* for error code */
+    &erroffset,             /* for error offset */
+    NULL);                  /* no compile context */
+</pre>
+The following names for option bits are defined in the <b>pcre2.h</b> header
+file:
+<pre>
+  PCRE2_ANCHORED
+</pre>
+If this bit is set, the pattern is forced to be "anchored", that is, it is
+constrained to match only at the first matching point in the string that is
+being searched (the "subject string"). This effect can also be achieved by
+appropriate constructs in the pattern itself, which is the only way to do it in
+Perl.
+<pre>
+  PCRE2_ALLOW_EMPTY_CLASS
+</pre>
+By default, for compatibility with Perl, a closing square bracket that 
+immediately follows an opening one is treated as a data character for the 
+class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which 
+therefore contains no characters and so can never match. 
+<pre>
+  PCRE2_ALT_BSUX
+</pre>
+This option request alternative handling of three escape sequences, which 
+makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
+</P>
+<P>
+(1) \U matches an upper case "U" character; by default \U causes a compile
+time error (Perl uses \U to upper case subsequent characters).
+</P>
+<P>
+(2) \u matches a lower case "u" character unless it is followed by four
+hexadecimal digits, in which case the hexadecimal number defines the code point
+to match. By default, \u causes a compile time error (Perl uses it to upper
+case the following character).
+</P>
+<P>
+(3) \x matches a lower case "x" character unless it is followed by two
+hexadecimal digits, in which case the hexadecimal number defines the code point
+to match. By default, as in Perl, a hexadecimal number is always expected after
+\x, but it may have zero, one, or two digits (so, for example, \xz matches a
+binary zero character followed by z).
+<pre>
+  PCRE2_AUTO_CALLOUT
+</pre>
+If this bit is set, <b>pcre2_compile()</b> automatically inserts callout items,
+all with number 255, before each pattern item. For discussion of the callout
+facility, see the
+<a href="pcre2callout.html"><b>pcre2callout</b></a>
+documentation.
+<pre>
+  PCRE2_CASELESS
+</pre>
+If this bit is set, letters in the pattern match both upper and lower case
+letters in the subject. It is equivalent to Perl's /i option, and it can be
+changed within a pattern by a (?i) option setting. 
+<pre>
+  PCRE2_DOLLAR_ENDONLY
+</pre>
+If this bit is set, a dollar metacharacter in the pattern matches only at the
+end of the subject string. Without this option, a dollar also matches
+immediately before a newline at the end of the string (but not before any other
+newlines). The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is
+set. There is no equivalent to this option in Perl, and no way to set it within
+a pattern.
+<pre>
+  PCRE2_DOTALL
+</pre>
+If this bit is set, a dot metacharacter in the pattern matches any character,
+including one that indicates a newline. However, it only ever matches one
+character, even if newlines are coded as CRLF. Without this option, a dot does
+not match when the current position in the subject is at a newline. This option
+is equivalent to Perl's /s option, and it can be changed within a pattern by a
+(?s) option setting. A negative class such as [^a] always matches newline
+characters, independent of the setting of this option.
+<pre>
+  PCRE2_DUPNAMES
+</pre>
+If this bit is set, names used to identify capturing subpatterns need not be
+unique. This can be helpful for certain types of pattern when it is known that
+only one instance of the named subpattern can ever be matched. There are more
+details of named subpatterns below; see also the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation.
+<pre>
+  PCRE2_EXTENDED
+</pre>
+If this bit is set, most white space characters in the pattern are totally
+ignored except when escaped or inside a character class. However, white space
+is not allowed within sequences such as (?&#62; that introduce various
+parenthesized subpatterns, nor within numerical quantifiers such as {1,3}.
+Ignorable white space is permitted between an item and a following quantifier
+and between a quantifier and a following + that indicates possessiveness.
+</P>
+<P>
+PCRE2_EXTENDED also causes characters between an unescaped # outside a
+character class and the next newline, inclusive, to be ignored, which makes it
+possible to include comments inside complicated patterns. Note that the end of
+this type of comment is a literal newline sequence in the pattern; escape
+sequences that happen to represent a newline do not count. PCRE2_EXTENDED is
+equivalent to Perl's /x option, and it can be changed within a pattern by a
+(?x) option setting.
+</P>
+<P>
+Which characters are interpreted as newlines can be specified by a setting in
+the compile context that is passed to <b>pcre2_compile()</b> or by a special
+sequence at the start of the pattern, as described in the section entitled
+<a href="pcrepattern.html#newlines">"Newline conventions"</a>
+in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is 
+built.
+<pre>
+  PCRE2_FIRSTLINE
+</pre>
+If this option is set, an unanchored pattern is required to match before or at
+the first newline in the subject string, though the matched text may continue
+over the newline.
+<pre>
+  PCRE2_MATCH_UNSET_BACKREF
+</pre>
+If this option is set, a back reference to an unset subpattern group matches an
+empty string (by default this causes the current matching alternative to fail).
+A pattern such as (\1)(a) succeeds when this option is set (assuming it can
+find an "a" in the subject), whereas it fails by default, for Perl
+compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka 
+JavaScript).
+<pre>
+  PCRE2_MULTILINE
+</pre>
+By default, for the purposes of matching "start of line" and "end of line",
+PCRE2 treats the subject string as consisting of a single line of characters,
+even if it actually contains newlines. The "start of line" metacharacter (^)
+matches only at the start of the string, and the "end of line" metacharacter
+($) matches only at the end of the string, or before a terminating newline
+(except when PCRE2_DOLLAR_ENDONLY is set). Note, however, that unless
+PCRE2_DOTALL is set, the "any character" metacharacter (.) does not match at a
+newline. This behaviour (for ^, $, and dot) is the same as Perl.
+</P>
+<P>
+When PCRE2_MULTILINE it is set, the "start of line" and "end of line"
+constructs match immediately following or immediately before internal newlines
+in the subject string, respectively, as well as at the very start and end. This
+is equivalent to Perl's /m option, and it can be changed within a pattern by a
+(?m) option setting. If there are no newlines in a subject string, or no
+occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
+<pre>
+  PCRE2_NEVER_UCP
+</pre>
+This option locks out the use of Unicode properties for handling \B, \b, \D,
+\d, \S, \s, \W, \w, and some of the POSIX character classes, as described 
+for the PCRE2_UCP option below. In particular, it prevents the creator of the 
+pattern from enabling this facility by starting the pattern with (*UCP). This 
+may be useful in applications that process patterns from external sources. The 
+option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
+<pre>
+  PCRE2_NEVER_UTF
+</pre>
+This option locks out interpretation of the pattern as UTF-8, UTF-16, or
+UTF-32, depending on which library is in use. In particular, it prevents the
+creator of the pattern from switching to UTF interpretation by starting the
+pattern with (*UTF). This may be useful in applications that process patterns
+from external sources. The combination of PCRE2_UTF and PCRE2_NEVER_UTF causes
+an error.
+<pre>
+  PCRE2_NO_AUTO_CAPTURE
+</pre>
+If this option is set, it disables the use of numbered capturing parentheses in
+the pattern. Any opening parenthesis that is not followed by ? behaves as if it
+were followed by ?: but named parentheses can still be used for capturing (and
+they acquire numbers in the usual way). There is no equivalent of this option
+in Perl.
+<pre>
+  PCRE2_NO_AUTO_POSSESS
+</pre>
+If this option is set, it disables "auto-possessification", which is an
+optimization that, for example, turns a+b into a++b in order to avoid
+backtracks into a+ that can never be successful. However, if callouts are in
+use, auto-possessification means that some callouts are never taken. You can
+set this option if you want the matching functions to do a full unoptimized
+search and run all the callouts, but it is mainly provided for testing
+purposes.
+<pre>
+  PCRE2_NO_START_OPTIMIZE
+</pre>
+This is an option that acts at matching time; that is, it is really an option
+for <b>pcre2_match()</b> or <b>pcre_dfa_match()</b>. If it is set at compile
+time, it is remembered with the compiled pattern and assumed at matching time.
+This is necessary if you want to use JIT execution, because the JIT compiler
+needs to know whether or not this option is set. For details, see the
+discussion of PCRE2_NO_START_OPTIMIZE in the section on <b>pcre2_match()</b> 
+options
+<a href="#matchoptions">below.</a>
+<pre>
+  PCRE2_NO_UTF_CHECK
+</pre>
+When PCRE2_UTF is set, the validity of the pattern as a UTF string is
+automatically checked. There are discussions about the validity of
+<a href="pcre2unicode.html#utf8strings">UTF-8 strings,</a>
+<a href="pcre2unicode.html#utf16strings">UTF-16 strings,</a>
+and
+<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
+in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+document. 
+If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
+error code.
+</P>
+<P>
+If you know that your pattern is valid, and you want to skip this check for
+performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it is set,
+the effect of passing an invalid UTF string as a pattern is undefined. It may
+cause your program to crash or loop. Note that this option can also be passed
+to <b>pcre2_match()</b> and <b>pcre_dfa_match()</b>, to suppress validity
+checking of the subject string.
+<pre>
+  PCRE2_UCP
+</pre>
+This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
+\w, and some of the POSIX character classes. By default, only ASCII characters
+are recognized, but if PCRE2_UCP is set, Unicode properties are used instead to
+classify characters. More details are given in the section on
+<a href="pcre2.html#genericchartypes">generic character types</a>
+in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+page. If you set PCRE2_UCP, matching one of the items it affects takes much
+longer. The option is available only if PCRE2 has been compiled with UTF
+support.
+<pre>
+  PCRE2_UNGREEDY
+</pre>
+This option inverts the "greediness" of the quantifiers so that they are not
+greedy by default, but become greedy if followed by "?". It is not compatible
+with Perl. It can also be set by a (?U) option setting within the pattern.
+<pre>
+  PCRE2_UTF
+</pre>
+This option causes PCRE2 to regard both the pattern and the subject strings
+that are subsequently processed as strings of UTF characters instead of
+single-code-unit strings. However, it is available only when PCRE2 is built to
+include UTF support. If not, the use of this option provokes an error. Details
+of how this option changes the behaviour of PCRE2 are given in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+page.
+</P>
+<br><a name="SEC16" href="#TOC1">COMPILATION ERROR CODES</a><br>
+<P>
+There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
+it finds an error in the pattern. There are also some negative error codes that
+are used for invalid UTF strings. These are the same as given by
+<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and are described in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
+textual error message from any error code.
+</P>
+<br><a name="SEC17" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
+<P>
+<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
+<br>
+<br>
+<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>, pcre2_jit_stack *<i>jit_stack</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
+<b>  PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_stack_assign(const pcre2_code *<i>code</i>,</b>
+<b>  pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
+<br>
+<br>
+<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
+</P>
+<P>
+These functions provide support for JIT compilation, which, if the just-in-time
+compiler is available, further processes a compiled pattern into machine code
+that executes much faster than the <b>pcre2_match()</b> interpretive matching
+function. Full details are given in the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation.
+</P>
+<P>
+JIT compilation is a heavyweight optimization. It can take some time for
+patterns to be analyzed, and for one-off matches and simple patterns the
+benefit of faster execution might be offset by a much slower compilation time.
+Most, but not all patterns can be optimized by the JIT compiler.
+<a name="localesupport"></a></P>
+<br><a name="SEC18" href="#TOC1">LOCALE SUPPORT</a><br>
+<P>
+PCRE2 handles caseless matching, and determines whether characters are letters,
+digits, or whatever, by reference to a set of tables, indexed by character code
+point. When running in UTF-8 mode, or using the 16-bit or 32-bit libraries,
+this applies only to characters with code points less than 256. By default,
+higher-valued code points never match escapes such as \w or \d. However, if
+PCRE2 is built with UTF support, all characters can be tested with \p and \P,
+or, alternatively, the PCRE2_UCP option can be set when a pattern is compiled;
+this causes \w and friends to use Unicode property support instead of the
+built-in tables.
+</P>
+<P>
+The use of locales with Unicode is discouraged. If you are handling characters
+with code points greater than 128, you should either use Unicode support, or
+use locales, but not try to mix the two.
+</P>
+<P>
+PCRE2 contains an internal set of character tables that are used by default.
+These are sufficient for many applications. Normally, the internal tables
+recognize only ASCII characters. However, when PCRE2 is built, it is possible
+to cause the internal tables to be rebuilt in the default "C" locale of the
+local system, which may cause them to be different.
+</P>
+<P>
+The internal tables can be overridden by tables supplied by the application
+that calls PCRE2. These may be created in a different locale from the default.
+As more and more applications change to using Unicode, the need for this locale
+support is expected to die away.
+</P>
+<P>
+External tables are built by calling the <b>pcre2_maketables()</b> function, in
+the relevant locale. The result can be passed to <b>pcre2_compile()</b> as often
+as necessary, by creating a compile context and calling
+<b>pcre2_set_character_tables()</b> to set the tables pointer therein. For
+example, to build and use tables that are appropriate for the French locale
+(where accented characters with values greater than 128 are treated as
+letters), the following code could be used:
+<pre>
+  setlocale(LC_CTYPE, "fr_FR");
+  tables = pcre2_maketables(NULL);
+  ccontext = pcre2_compile_context_create(NULL);
+  pcre2_set_character_tables(ccontext, tables);
+  re = pcre2_compile(..., ccontext);
+</pre>
+The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
+are using Windows, the name for the French locale is "french". It is the
+caller's responsibility to ensure that the memory containing the tables remains
+available for as long as it is needed.
+</P>
+<P>
+The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
+is saved with the compiled pattern, and the same tables are used by
+<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
+compilation, and matching all happen in the same locale, but different patterns
+can be processed in different locales.
+<a name="infoaboutpattern"></a></P>
+<br><a name="SEC19" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
+<P>
+<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
+</P>
+<P>
+The <b>pcre2_pattern_info()</b> function returns information about a compiled
+pattern. The first argument is a pointer to the compiled pattern. The second
+argument specifies which piece of information is required, and the third
+argument is a pointer to a variable to receive the data. The yield of the
+function is zero for success, or one of the following negative numbers:
+<pre>
+  PCRE2_ERROR_NULL           the argument <i>code</i> was NULL
+                             the argument <i>where</i> was NULL
+  PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+  PCRE2_ERROR_BADOPTION      the value of <i>what</i> was invalid
+  PCRE2_ERROR_UNSET          the requested field is not set
+</pre>
+The "magic number" is placed at the start of each compiled pattern as an simple
+check against passing an arbitrary memory pointer.
+Here is
+a typical call of <b>pcre2_pattern_info()</b>, to obtain the length of the compiled
+pattern:
+<pre>
+  int rc;
+  size_t length;
+  rc = pcre2_pattern_info(
+    re,               /* result of pcre2_compile() */
+    PCRE2_INFO_SIZE,  /* what is required */
+    &length);         /* where to put the data */
+</pre>
+The possible values for the second argument are defined in <b>pcre2.h</b>, and
+are as follows:
+<pre>
+  PCRE2_INFO_ALLOPTIONS
+  PCRE2_INFO_ARGOPTIONS 
+</pre>
+Return a copy of the pattern's options. The third argument should point to a 
+<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
+were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
+the compile options as modified by any top-level option settings at the start
+of the pattern itself. In other words, they are the options that will be in
+force when matching starts. For example, if the pattern /(?im)abc(?-i)d/ is
+compiled with the PCRE2_EXTENDED option, the result is PCRE2_CASELESS,
+PCRE2_MULTILINE, and PCRE2_EXTENDED.
+</P>
+<P>
+A pattern is automatically anchored by PCRE2 if all of its top-level
+alternatives begin with one of the following:
+<pre>
+  ^     unless PCRE2_MULTILINE is set
+  \A    always
+  \G    always
+  .*    if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
+</pre>
+For such patterns, the PCRE2_ANCHORED bit is set in the options returned for 
+PCRE2_INFO_ALLOPTIONS.
+<pre>
+  PCRE2_INFO_BACKREFMAX
+</pre>
+Return the number of the highest back reference in the pattern. The third
+argument should point to an <b>uint32_t</b> variable. Zero is returned if there
+are no back references.
+<pre>
+  PCRE2_INFO_BSR
+</pre>
+The output is a uint32_t whose value indicates what character sequences the \R
+escape sequence matches by default. A value of 0 means that \R matches any
+Unicode line ending sequence; a value of 1 means that \R matches only CR, LF,
+or CRLF. The default can be overridden when a pattern is matched.
+<pre>
+  PCRE2_INFO_CAPTURECOUNT
+</pre>
+Return the number of capturing subpatterns in the pattern. The third argument
+should point to an <b>uint32_t</b> variable.
+<pre>
+  PCRE2_INFO_FIRSTCODETYPE
+</pre>
+Return information about the first code unit of any matched string, for a
+non-anchored pattern. The third argument should point to an <b>uint32_t</b>
+variable.
+</P>
+<P>
+If there is a fixed first value, for example, the letter "c" from a pattern
+such as (cat|cow|coyote), 1 is returned, and the character value can be
+retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, and
+if either
+<br>
+<br>
+(a) the pattern was compiled with the PCRE2_MULTILINE option, and every branch
+starts with "^", or
+<br>
+<br>
+(b) every branch of the pattern starts with ".*" and PCRE2_DOTALL is not set
+(if it were set, the pattern would be anchored),
+<br>
+<br>
+2 is returned, indicating that the pattern matches only at the start of a
+subject string or after any newline within the string. Otherwise 0 is
+returned. For anchored patterns, 0 is returned.
+<pre>
+  PCRE2_INFO_FIRSTCODEUNIT
+</pre>
+Return the value of the first code unit of any matched string in the situation
+where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third
+argument should point to an <b>uint32_t</b> variable. In the 8-bit library, the
+value is always less than 256. In the 16-bit library the value can be up to
+0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff,
+and up to 0xffffffff when not using UTF-32 mode.
+<pre>
+  PCRE2_INFO_FIRSTBITMAP
+</pre>
+In the absence of a single first code unit for a non-anchored pattern,
+<b>pcre2_compile()</b> may construct a 256-bit table that defines a fixed set of
+values for the first code unit in any match. For example, a pattern that starts
+with [abc] results in a table with three bits set. When code unit values
+greater than 255 are supported, the flag bit for 255 means "any code unit of
+value 255 or above". If such a table was constructed, a pointer to it is
+returned. Otherwise NULL is returned. The third argument should point to an
+<b>const uint8_t *</b> variable.
+<pre>
+  PCRE2_INFO_HASCRORLF
+</pre>
+Return 1 if the pattern contains any explicit matches for CR or LF characters,
+otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
+explicit match is either a literal CR or LF character, or \r or \n.
+<pre>
+  PCRE2_INFO_JCHANGED
+</pre>
+Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
+0. The third argument should point to an <b>uint32_t</b> variable. (?J) and
+(?-J) set and unset the local PCRE2_DUPNAMES option, respectively.
+<pre>
+  PCRE2_INFO_JITSIZE
+</pre>
+If the compiled pattern was successfully processed by
+<b>pcre2_jit_compile()</b>, return the size of the JIT compiled code, otherwise
+return zero. The third argument should point to a <b>size_t</b> variable.
+<pre>
+  PCRE2_INFO_LASTCODETYPE
+</pre>
+Returns 1 if there is a rightmost literal code unit that must exist in any
+matched string, other than at its start. The third argument should  point to an
+<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is 
+returned, the code unit value itself can be retrieved using
+PCRE2_INFO_LASTCODEUNIT.
+</P>
+<P>
+For anchored patterns, a last literal value is recorded only if it follows
+something of variable length. For example, for the pattern /^a\d+z\d+/ the
+returned value is 1 (with "z" returned from PCRE2_INFO_LASTCODEUNIT), but for
+/^a\dz\d/ the returned value is 0.
+<pre>
+  PCRE2_INFO_LASTCODEUNIT
+</pre>
+Return the value of the rightmost literal data unit that must exist in any
+matched string, other than at its start, if such a value has been recorded. The
+third argument should point to an <b>uint32_t</b> variable. If there is no such
+value, 0 is returned.
+<pre>
+  PCRE2_INFO_MATCHEMPTY
+</pre>
+Return 1 if the pattern can match an empty string, otherwise 0. The third
+argument should point to an <b>uint32_t</b> variable.
+<pre>
+  PCRE2_INFO_MATCHLIMIT
+</pre>
+If the pattern set a match limit by including an item of the form
+(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
+should point to an unsigned 32-bit integer. If no such value has been set, the
+call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
+<pre>
+  PCRE2_INFO_MAXLOOKBEHIND
+</pre>
+Return the number of characters (not code units) in the longest lookbehind
+assertion in the pattern. The third argument should point to an unsigned 32-bit
+integer. This information is useful when doing multi-segment matching using the
+partial matching facilities. Note that the simple assertions \b and \B
+require a one-character lookbehind. \A also registers a one-character
+lookbehind, though it does not actually inspect the previous character. This is
+to ensure that at least one character from the old segment is retained when a
+new segment is processed. Otherwise, if there are no lookbehinds in the
+pattern, \A might match incorrectly at the start of a new segment.
+<pre>
+  PCRE2_INFO_MINLENGTH
+</pre>
+If a minimum length for matching subject strings was computed, its value is
+returned. Otherwise the returned value is 0. The value is a number of
+characters, which in UTF mode may be different from the number of code units.
+The third argument should point to an <b>uint32_t</b> variable. The value is a
+lower bound to the length of any matching string. There may not be any strings
+of that length that do actually match, but every string that does match is at
+least that long.
+<pre>
+  PCRE2_INFO_NAMECOUNT
+  PCRE2_INFO_NAMEENTRYSIZE
+  PCRE2_INFO_NAMETABLE
+</pre>
+PCRE2 supports the use of named as well as numbered capturing parentheses. The
+names are just an additional way of identifying the parentheses, which still
+acquire numbers. Several convenience functions such as
+<b>pcre2_substring_get_byname()</b> are provided for extracting captured
+substrings by name. It is also possible to extract the data directly, by first
+converting the name to a number in order to access the correct pointers in the
+output vector (described with <b>pcre2_match()</b> below). To do the conversion,
+you need to use the name-to-number map, which is described by these three
+values.
+</P>
+<P>
+The map consists of a number of fixed-size entries. PCRE2_INFO_NAMECOUNT gives
+the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives the size of each
+entry; both of these return a <b>uint32_t</b> value. The entry size depends on
+the length of the longest name. PCRE2_INFO_NAMETABLE returns a pointer to the
+first entry of the table. This is a PCRE2_SPTR pointer to a block of code
+units. In the 8-bit library, the first two bytes of each entry are the number
+of the capturing parenthesis, most significant byte first. In the 16-bit
+library, the pointer points to 16-bit data units, the first of which contains
+the parenthesis number. In the 32-bit library, the pointer points to 32-bit
+data units, the first of which contains the parenthesis number. The rest of the
+entry is the corresponding name, zero terminated.
+</P>
+<P>
+The names are in alphabetical order. If (?| is used to create multiple groups
+with the same number, as described in the
+<a href="pcre2pattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
+in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+page, the groups may be given the same name, but there is only one entry in the
+table. Different names for groups of the same number are not permitted.
+</P>
+<P>
+Duplicate names for subpatterns with different numbers are permitted, but only
+if PCRE2_DUPNAMES is set. They appear in the table in the order in which they
+were found in the pattern. In the absence of (?| this is the order of
+increasing number; when (?| is used this is not necessarily the case because
+later subpatterns may have lower numbers.
+</P>
+<P>
+As a simple example of the name/number table, consider the following pattern
+after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white
+space - including newlines - is ignored):
+<pre>
+  (?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )
+</pre>
+There are four named subpatterns, so the table has four entries, and each entry
+in the table is eight bytes long. The table is as follows, with non-printing
+bytes shows in hexadecimal, and undefined bytes shown as ??:
+<pre>
+  00 01 d  a  t  e  00 ??
+  00 05 d  a  y  00 ?? ??
+  00 04 m  o  n  t  h  00
+  00 02 y  e  a  r  00 ??
+</pre>
+When writing code to extract data from named subpatterns using the
+name-to-number map, remember that the length of the entries is likely to be
+different for each compiled pattern.
+<pre>
+  PCRE2_INFO_NEWLINE
+</pre>
+The output is a <b>uint32_t</b> whose value specifies the default character
+sequence that will be recognized as meaning "newline" while matching. The
+values are:
+<pre>
+  1  Carriage return (CR)
+  2  Linefeed (LF)
+  3  Carriage return, linefeed (CRLF)
+  4  Any Unicode line ending
+  5  Any of CR, LF, or CRLF
+</pre>
+The default can be overridden when a pattern is matched.
+<pre>
+  PCRE2_INFO_RECURSIONLIMIT
+</pre>
+If the pattern set a recursion limit by including an item of the form
+(*LIMIT_RECURSION=nnnn) at the start, the value is returned. The third
+argument should point to an unsigned 32-bit integer. If no such value has been
+set, the call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
+<pre>
+  PCRE2_INFO_SIZE
+</pre>
+Return the size of the compiled pattern in bytes (for all three libraries). The
+third argument should point to a <b>size_t</b> variable. This value does not
+include the size of the <b>pcre2_code</b> structure that is returned by
+<b>pcre_compile()</b>. The value that is used when <b>pcre2_compile()</b> is
+getting memory in which to place the compiled data is the value returned by
+this option plus the size of the <b>pcre2_code</b> structure. Processing a
+pattern with the JIT compiler does not alter the value returned by this option.
+<a name="matchdatablock"></a></P>
+<br><a name="SEC20" href="#TOC1">THE MATCH DATA BLOCK</a><br>
+<P>
+<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_match_data_create_from_pattern(pcre2_code *<i>code</i>,</b>
+<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<P>
+Information about successful and unsuccessful matches is placed in a match 
+data block, which is an opaque structure that is accessed by function calls. In
+particular, the match data block contains a vector of offsets into the subject
+string that define the matched part of the subject and any substrings that were
+capured. This is know as the <i>ovector</i>. 
+</P>
+<P>
+Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a 
+match data block by calling one of the creation functions above. For
+<b>pcre2_match_data_create()</b>, the first argument is the number of pairs of
+offsets in the <i>ovector</i>. One pair of offsets is required to identify the
+string that matched the whole pattern, with another pair for each captured
+substring. For example, a value of 4 creates enough space to record the
+matched portion of the subject plus three captured substrings. 
+</P>
+<P>
+For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
+pointer to a compiled pattern. In this case the ovector is created to be 
+exactly the right size to hold all the substrings a pattern might capture.
+</P>
+<P>
+The second argument of both these functions ia a pointer to a general context, 
+which can specify custom memory management for obtaining the memory for the 
+match data block. If you are not using custom memory management, pass NULL.
+</P>
+<P>
+A match data block can be used many times, with the same or different compiled
+patterns. When it is no longer needed, it should be freed by calling
+<b>pcre2_match_data_free()</b>. How to extract information from a match data
+block after a match operation is described in the sections on
+<a href="#matchedstrings">matched strings</a>
+and
+<a href="#matchotherdata">other match data</a>
+below.
+</P>
+<br><a name="SEC21" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
+<P>
+<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>);</b>
+</P>
+<P>
+The function <b>pcre2_match()</b> is called to match a subject string against a
+compiled pattern, which is passed in the <i>code</i> argument. You can call
+<b>pcre2_match()</b> with the same <i>code</i> argument as many times as you
+like, in order to find multiple matches in the subject string or to match
+different subject strings with the same pattern.
+</P>
+<P>
+This function is the main matching facility of the library, and it operates in
+a Perl-like manner. For specialist use there is also an alternative matching
+function, which is described
+<a href="#dfamatch">below</a>
+in the section about the <b>pcre2_dfa_match()</b> function.
+</P>
+<P>
+Here is an example of a simple call to <b>pcre2_match()</b>:
+<pre>
+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    match_data,     /* the match data block */
+    NULL);          /* a match context; NULL means use defaults */
+</pre>
+If the subject string is zero-terminated, the length can be given as 
+PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common 
+matching parameters are to be changed. For details, see the section on
+<a href="#matchcontext">the match context</a>
+above.
+</P>
+<br><b>
+The string to be matched by <b>pcre2_match()</b>
+</b><br>
+<P>
+The subject string is passed to <b>pcre2_match()</b> as a pointer in
+<i>subject</i>, a length in <i>length</i>, and a starting offset in
+<i>startoffset</i>. The length and offset are in code units, not characters.
+That is, they are in bytes for the 8-bit library, 16-bit code units for the
+16-bit library, and 32-bit code units for the 32-bit library, whether or not 
+UTF processing is enabled.
+</P>
+<P>
+If <i>startoffset</i> is greater than the length of the subject,
+<b>pcre2_match()</b> returns PCRE2_ERROR_BADOFFSET. When the starting offset is
+zero, the search for a match starts at the beginning of the subject, and this
+is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
+must point to the start of a character, or to the end of the subject (in UTF-32
+mode, one code unit equals one character, so all offsets are valid). Like the
+pattern string, the subject may contain binary zeroes. 
+</P>
+<P>
+A non-zero starting offset is useful when searching for another match in the
+same subject by calling <b>pcre2_match()</b> again after a previous success.
+Setting <i>startoffset</i> differs from passing over a shortened string and
+setting PCRE2_NOTBOL in the case of a pattern that begins with any kind of
+lookbehind. For example, consider the pattern
+<pre>
+  \Biss\B
+</pre>
+which finds occurrences of "iss" in the middle of words. (\B matches only if
+the current position in the subject is not a word boundary.) When applied to
+the string "Mississipi" the first call to <b>pcre2_match()</b> finds the first
+occurrence. If <b>pcre2_match()</b> is called again with just the remainder of
+the subject, namely "issipi", it does not match, because \B is always false at
+the start of the subject, which is deemed to be a word boundary. However, if
+<b>pcre2_match()</b> is passed the entire string again, but with
+<i>startoffset</i> set to 4, it finds the second occurrence of "iss" because it
+is able to look behind the starting point to discover that it is preceded by a
+letter.
+</P>
+<P>
+Finding all the matches in a subject is tricky when the pattern can match an
+empty string. It is possible to emulate Perl's /g behaviour by first trying the
+match again at the same offset, with the PCRE2_NOTEMPTY_ATSTART and
+PCRE2_ANCHORED options, and then if that fails, advancing the starting offset
+and trying an ordinary match again. There is some code that demonstrates how to
+do this in the
+<a href="pcre2demo.html"><b>pcre2demo</b></a>
+sample program. In the most general case, you have to check to see if the
+newline convention recognizes CRLF as a newline, and if so, and the current
+character is CR followed by LF, advance the starting offset by two characters
+instead of one.
+</P>
+<P>
+If a non-zero starting offset is passed when the pattern is anchored, one
+attempt to match at the given offset is made. This can only succeed if the
+pattern does not require the match to be at the start of the subject.
+<a name="matchoptions"></a></P>
+<br><b>
+Option bits for <b>pcre2_match()</b>
+</b><br>
+<P>
+The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
+zero. The only bits that may be set are PCRE2_ANCHORED, 
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
+PCRE2_PARTIAL_SOFT. Their action is described below.
+</P>
+<P>
+If the pattern was successfully processed by the just-in-time (JIT) compiler,
+the only supported options for matching using the JIT code are PCRE2_NOTBOL,
+PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
+PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. If an unsupported option is used,
+JIT matching is disabled and the normal interpretive code in
+<b>pcre2_match()</b> is run.
+<pre>
+  PCRE2_ANCHORED
+</pre>
+The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
+matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
+to be anchored by virtue of its contents, it cannot be made unachored at
+matching time. Note that setting the option at match time disables JIT 
+matching.
+<pre>
+  PCRE2_NOTBOL
+</pre>
+This option specifies that first character of the subject string is not the
+beginning of a line, so the circumflex metacharacter should not match before
+it. Setting this without PCRE2_MULTILINE (at compile time) causes circumflex
+never to match. This option affects only the behaviour of the circumflex
+metacharacter. It does not affect \A.
+<pre>
+  PCRE2_NOTEOL
+</pre>
+This option specifies that the end of the subject string is not the end of a
+line, so the dollar metacharacter should not match it nor (except in multiline
+mode) a newline immediately before it. Setting this without PCRE2_MULTILINE (at
+compile time) causes dollar never to match. This option affects only the
+behaviour of the dollar metacharacter. It does not affect \Z or \z.
+<pre>
+  PCRE2_NOTEMPTY
+</pre>
+An empty string is not considered to be a valid match if this option is set. If
+there are alternatives in the pattern, they are tried. If all the alternatives
+match the empty string, the entire match fails. For example, if the pattern
+<pre>
+  a?b?
+</pre>
+is applied to a string not beginning with "a" or "b", it matches an empty
+string at the start of the subject. With PCRE2_NOTEMPTY set, this match is not
+valid, so PCRE2 searches further into the string for occurrences of "a" or "b".
+<pre>
+  PCRE2_NOTEMPTY_ATSTART
+</pre>
+This is like PCRE2_NOTEMPTY, except that an empty string match that is not at
+the start of the subject is permitted. If the pattern is anchored, such a match
+can occur only if the pattern contains \K.
+<pre>
+  PCRE2_NO_START_OPTIMIZE
+</pre>
+There are a number of optimizations that <b>pcre2_match()</b> uses at the start
+of a match, in order to speed up the process. For example, if it is known that
+an unanchored match must start with a specific character, it searches the
+subject for that character, and fails immediately if it cannot find it, without
+actually running the main matching function. This means that a special item
+such as (*COMMIT) at the start of a pattern is not considered until after a
+suitable starting point for the match has been found. Also, when callouts or
+(*MARK) items are in use, these "start-up" optimizations can cause them to be
+skipped if the pattern is never actually used. The start-up optimizations are
+in effect a pre-scan of the subject that takes place before the pattern is run.
+</P>
+<P>
+The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
+possibly causing performance to suffer, but ensuring that in cases where the
+result is "no match", the callouts do occur, and that items such as (*COMMIT)
+and (*MARK) are considered at every possible starting position in the subject
+string. If PCRE2_NO_START_OPTIMIZE is set at compile time, it cannot be unset
+at matching time. The use of PCRE2_NO_START_OPTIMIZE at matching time (that is,
+passing it to <b>pcre2_match()</b>) disables JIT execution; in this situation,
+matching is always done using interpretively.
+</P>
+<P>
+Setting PCRE2_NO_START_OPTIMIZE can change the outcome of a matching operation.
+Consider the pattern
+<pre>
+  (*COMMIT)ABC
+</pre>
+When this is compiled, PCRE2 records the fact that a match must start with the
+character "A". Suppose the subject string is "DEFABC". The start-up
+optimization scans along the subject, finds "A" and runs the first match
+attempt from there. The (*COMMIT) item means that the pattern must match the
+current starting position, which in this case, it does. However, if the same
+match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
+subject string does not happen. The first match attempt is run starting from
+"D" and when this fails, (*COMMIT) prevents any further matches being tried, so
+the overall result is "no match". There are also other start-up optimizations.
+For example, a minimum length for the subject may be recorded. Consider the
+pattern
+<pre>
+  (*MARK:A)(X|Y)
+</pre>
+The minimum length for a match is one character. If the subject is "ABC", there
+will be attempts to match "ABC", "BC", and "C". An attempt to match an empty 
+string at the end of the subject does not take place, because PCRE2 knows that
+the subject is now too short, and so the (*MARK) is never encountered. In this
+case, the optimization does not affect the overall match result, which is still
+"no match", but it does affect the auxiliary information that is returned.
+<pre>
+  PCRE2_NO_UTF_CHECK
+</pre>
+When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
+string is checked by default when <b>pcre2_match()</b> is subsequently called.
+The entire string is checked before any other processing takes place, and a
+negative error code is returned if the check fails. There are several UTF error
+codes for each code unit width, corresponding to different problems with the
+code unit sequence. The value of <i>startoffset</i> is also checked, to ensure
+that it points to the start of a character or to the end of the subject. There
+are discussions about the validity of
+<a href="pcre2unicode.html#utf8strings">UTF-8 strings,</a>
+<a href="pcre2unicode.html#utf16strings">UTF-16 strings,</a>
+and
+<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
+in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+page. 
+</P>
+<P>
+If you know that your subject is valid, and you want to skip these checks for
+performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
+<b>pcre2_match()</b>. You might want to do this for the second and subsequent
+calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
+matches in a single subject string. 
+</P>
+<P>
+NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
+as a subject, or an invalid value of <i>startoffset</i>, is undefined. Your
+program may crash or loop indefinitely.
+<pre>
+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+</pre>
+These options turn on the partial matching feature. A partial match occurs if
+the end of the subject string is reached successfully, but there are not enough
+subject characters to complete the match. If this happens when
+PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) is set, matching continues by
+testing any remaining alternatives. Only if no complete match can be found is
+PCRE2_ERROR_PARTIAL returned instead of PCRE2_ERROR_NOMATCH. In other words,
+PCRE2_PARTIAL_SOFT says that the caller is prepared to handle a partial match,
+but only if no complete match can be found.
+</P>
+<P>
+If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this case, if
+a partial match is found, <b>pcre2_match()</b> immediately returns
+PCRE2_ERROR_PARTIAL, without considering any other alternatives. In other
+words, when PCRE2_PARTIAL_HARD is set, a partial match is considered to be more
+important that an alternative complete match.
+</P>
+<P>
+There is a more detailed discussion of partial and multi-segment matching, with
+examples, in the
+<a href="pcre2partial.html"><b>pcre2partial</b></a>
+documentation.
+</P>
+<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
+<P>
+When PCRE2 is built, a default newline convention is set; this is usually the 
+standard convention for the operating system. The default can be overridden in 
+either a 
+<a href="#compilecontext">compile context</a>
+or a
+<a href="#matchcontext">match context.</a>
+However, changing the newline convention at match time disables JIT matching.
+During matching, the newline choice affects the behaviour of the dot,
+circumflex, and dollar metacharacters. It may also alter the way the match
+position is advanced after a match failure for an unanchored pattern.
+</P>
+<P>
+When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is set,
+and a match attempt for an unanchored pattern fails when the current position
+is at a CRLF sequence, and the pattern contains no explicit matches for CR or
+LF characters, the match position is advanced by two characters instead of one,
+in other words, to after the CRLF.
+</P>
+<P>
+The above rule is a compromise that makes the most common cases work as
+expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is
+not set), it does not match the string "\r\nA" because, after failing at the
+start, it skips both the CR and the LF before retrying. However, the pattern
+[\r\n]A does match that string, because it contains an explicit CR or LF
+reference, and so advances only by one character after the first failure.
+</P>
+<P>
+An explicit match for CR of LF is either a literal appearance of one of those
+characters in the pattern, or one of the \r or \n escape sequences. Implicit
+matches such as [^X] do not count, nor does \s (which includes CR and LF in
+the characters that it matches).
+</P>
+<P>
+Notwithstanding the above, anomalous effects may still occur when CRLF is a
+valid newline sequence and explicit \r or \n escapes appear in the pattern.
+<a name="matchedstrings"></a></P>
+<br><a name="SEC23" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
+<P>
+<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<P>
+In general, a pattern matches a certain portion of the subject, and in
+addition, further substrings from the subject may be picked out by 
+parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
+book, this is called "capturing" in what follows, and the phrase "capturing
+subpattern" is used for a fragment of a pattern that picks out a substring.
+PCRE2 supports several other kinds of parenthesized subpattern that do not
+cause substrings to be captured. The <b>pcre2_pattern_info()</b> function can be
+used to find out how many capturing subpatterns there are in a compiled
+pattern.
+</P>
+<P>
+The overall matched string and any captured substrings are returned to the
+caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is 
+contained within the
+<a href="#matchdatablock">match data block.</a>
+You can obtain direct access to the ovector by calling 
+<b>pcre2_get_ovector_pointer()</b> to find its address, and 
+<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
+contains. Alternatively, you can use the auxiliary functions for accessing
+captured substrings
+<a href="#extractbynumber">by number</a>
+or
+<a href="#extractbyname">by name</a>
+(see below).
+</P>
+<P>
+Within the ovector, the first in each pair of values is set to the offset of
+the first code unit of a substring, and the second is set to the offset of the
+first code unit after the end of a substring. These values are always code unit
+offsets, not character offsets. That is, they are byte offsets in the 8-bit
+library, 16-bit offsets in the 16-bit library, and 32-bit offsets in the 32-bit
+library.
+</P>
+<P>
+The first pair of offsets (that is, <i>ovector[0]</i> and <i>ovector[1]</i>)
+identifies the portion of the subject string that was matched by the entire
+pattern. The next pair is used for the first capturing subpattern, and so on.
+The value returned by <b>pcre2_match()</b> is one more than the highest numbered
+pair that has been set. For example, if two substrings have been captured, the
+returned value is 3. If there are no capturing subpatterns, the return value
+from a successful match is 1, indicating that just the first pair of offsets
+has been set.
+</P>
+<P>
+If a capturing subpattern is matched repeatedly within a single match
+operation, it is the last portion of the string that it matched that is
+returned.
+</P>
+<P>
+If the ovector is too small to hold all the captured substring offsets, as much
+as possible is filled in, and the function returns a value of zero. If neither
+the actual string matched nor any captured substrings are of interest,
+<b>pcre2_match()</b> may be called with a match data block whose ovector is of
+zero length. However, if the pattern contains back references and the
+<i>ovector</i> is not big enough to remember the related substrings, PCRE2 has
+to get additional memory for use during matching. Thus it is usually advisable
+to set up a match data block containing an ovector of reasonable size.
+</P>
+<P>
+It is possible for capturing subpattern number <i>n+1</i> to match some part of
+the subject when subpattern <i>n</i> has not been used at all. For example, if
+the string "abc" is matched against the pattern (a|(z))(bc) the return from the
+function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
+happens, both values in the offset pairs corresponding to unused subpatterns
+are set to PCRE2_UNSET.
+</P>
+<P>
+Offset values that correspond to unused subpatterns at the end of the
+expression are also set to PCRE2_UNSET. For example, if the string "abc" is
+matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched.
+The return from the function is 2, because the highest used capturing
+subpattern number is 1. The offsets for for the second and third capturing
+subpatterns (assuming the vector is large enough, of course) are set to
+PCRE2_UNSET.
+</P>
+<P>
+Elements in the ovector that do not correspond to capturing parentheses in the
+pattern are never changed. That is, if a pattern contains <i>n</i> capturing
+parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
+<b>pcre2_match()</b>. The other elements retain whatever values they previously
+had.
+<a name="matchotherdata"></a></P>
+<br><b>
+Other information about the match
+</b><br>
+<P>
+<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<P>
+In addition to the offsets in the ovector, other information about a match is 
+retained in the match data block and can be retrieved by the above functions.
+</P>
+<P>
+When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
+pointer to the zero-terminated name, which is within the compiled pattern. 
+Otherwise NULL is returned. A (*MARK) name may be available after a failed 
+match or a partial match, as well as after a successful one.
+</P>
+<P>
+The other three functions yield values that give information about the part of 
+the subject string that was inspected during a successful match or a partial 
+match. Their results are undefined after a failed match. They return the 
+following values, respectively:
+<br>
+<br>
+(1) The offset of the leftmost character that was inspected during the match.
+This can be earlier than the point at which the match started if the pattern
+contains lookbehind assertions or \b or \B at the start.
+<br>
+<br>
+(2) The offset of the character that follows the rightmost character that was
+inspected during the match. This can be after the end of the match if the 
+pattern contains lookahead assertions.
+<br>
+<br>
+(3) The offset of the character at which the successful or partial match 
+started. This can be different to the value of <i>ovector[0]</i> if the pattern 
+contains the \K escape sequence.
+</P>
+<P>
+For example, if the pattern (?&#60;=abc)xx\Kyy(?=def) is matched against the
+string "123abcxxyydef123", the resulting offsets are:
+<pre>
+  ovector[0]   8
+  ovector[1]  10
+  leftchar     3
+  rightchar   13
+  startchar    6
+</pre>
+The <b>allusedtext</b> modifier in <b>pcre2test</b> can be used to display a
+longer string that shows the leftmost and rightmost characters in a match
+instead of just the matched string.
+<a name="errorlist"></a></P>
+<br><b>
+Error return values from <b>pcre2_match()</b>
+</b><br>
+<P>
+If <b>pcre2_match()</b> fails, it returns a negative number. This can be 
+converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
+error codes are also returned by other functions, and are documented with them.
+The codes are given names in the header file. If UTF checking is in force and
+an invalid UTF subject string is detected, one of a number of UTF-specific
+negative error codes is returned. Details are given in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+page. The following are the other errors that may be returned by
+<b>pcre2_match()</b>:
+<pre>
+  PCRE2_ERROR_NOMATCH
+</pre>
+The subject string did not match the pattern.
+<pre>
+  PCRE2_ERROR_PARTIAL
+</pre>
+The subject string did not match, but it did match partially. See the
+<a href="pcre2partial.html"><b>pcre2partial</b></a>
+documentation for details of partial matching.
+<pre>
+  PCRE2_ERROR_BADMAGIC
+</pre>
+PCRE2 stores a 4-byte "magic number" at the start of the compiled code, to
+catch the case when it is passed a junk pointer. This is the error that is
+returned when the magic number is not present.
+<pre>
+  PCRE2_ERROR_BADMODE
+</pre>
+This error is given when a pattern that was compiled by the 8-bit library is
+passed to a 16-bit or 32-bit library function, or vice versa.
+<pre>
+  PCRE2_ERROR_BADOFFSET
+</pre>
+The value of <i>startoffset</i> greater than the length of the subject.
+<pre>
+  PCRE2_ERROR_BADOPTION
+</pre>
+An unrecognized bit was set in the <i>options</i> argument.
+<pre>
+  PCRE2_ERROR_BADUTFOFFSET
+</pre>
+The UTF code unit sequence that was passed as a subject was checked and found
+to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the value of
+<i>startoffset</i> did not point to the beginning of a UTF character or the end
+of the subject.
+<pre>
+  PCRE2_ERROR_CALLOUT
+</pre>
+This error is never generated by <b>pcre2_match()</b> itself. It is provided for
+use by callout functions that want to cause <b>pcre2_match()</b> to return a
+distinctive error code. See the
+<a href="pcre2callout.html"><b>pcre2callout</b></a>
+documentation for details.
+<pre>
+  PCRE2_ERROR_INTERNAL
+</pre>
+An unexpected internal error has occurred. This error could be caused by a bug
+in PCRE2 or by overwriting of the compiled pattern.
+<pre>
+  PCRE2_ERROR_JIT_BADOPTION
+</pre>
+This error is returned when a pattern that was successfully studied using JIT
+is being matched, but the matching mode (partial or complete match) does not
+correspond to any JIT compilation mode. When the JIT fast path function is
+used, this error may be also given for invalid options. See the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation for more details.
+<pre>
+  PCRE2_ERROR_JIT_STACKLIMIT
+</pre>
+This error is returned when a pattern that was successfully studied using JIT
+is being matched, but the memory available for the just-in-time processing
+stack is not large enough. See the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation for more details.
+<pre>
+  PCRE2_ERROR_MATCHLIMIT
+</pre>
+The backtracking limit was reached.
+<pre>
+  PCRE2_ERROR_NOMEMORY
+</pre>
+If a pattern contains back references, but the ovector is not big enough to
+remember the referenced substrings, PCRE2 gets a block of memory at the start
+of matching to use for this purpose. There are some other special cases where
+extra memory is needed during matching. This error is given when memory cannot
+be obtained.
+<pre>
+  PCRE2_ERROR_NULL
+</pre>
+Either the <i>code</i>, <i>subject</i>, or <i>match_data</i> argument was passed
+as NULL.
+<pre>
+  PCRE2_ERROR_RECURSELOOP
+</pre>
+This error is returned when <b>pcre2_match()</b> detects a recursion loop within
+the pattern. Specifically, it means that either the whole pattern or a
+subpattern has been called recursively for the second time at the same position
+in the subject string. Some simple patterns that might do this are detected and
+faulted at compile time, but more complicated cases, in particular mutual
+recursions between two different subpatterns, cannot be detected until run
+time.
+<pre>
+  PCRE2_ERROR_RECURSIONLIMIT
+</pre>
+The internal recursion limit was reached.
+<a name="extractbynumber"></a></P>
+<br><a name="SEC24" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
+<P>
+<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_copy_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
+<b>  PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_get_bynumber(pcre2_match_data *<i>match_data</i>,</b>
+<b>  unsigned int <i>number</i>, PCRE2_UCHAR **<i>bufferptr</i>,</b>
+<b>  PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>void pcre2_substring_free(PCRE2_UCHAR *<i>buffer</i>);</b>
+</P>
+<P>
+Captured substrings can be accessed directly by using the ovector as described
+<a href="#matchedstrings">above.</a>
+For convenience, auxiliary functions are provided for extracting captured
+substrings as new, separate, zero-terminated strings. The functions in this
+section identify substrings by number. The next section describes similar
+functions for extracting substrings by name. A substring that contains a binary
+zero is correctly extracted and has a further zero added on the end, but the
+result is not, of course, a C string.
+</P>
+<P>
+You can find the length in code units of a captured substring without
+extracting it by calling <b>pcre2_substring_length_bynumber()</b>. The first
+argument is a pointer to the match data block, the second is the group number,
+and the third is a pointer to a variable into which the length is placed.
+</P>
+<P>
+The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a 
+supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
+new memory, obtained using the same memory allocation function that was used
+for the match data block. The first two arguments of these functions are a
+pointer to the match data block and a capturing group number. A group number of
+zero extracts the substring that matched the entire pattern, and higher values
+extract the captured substrings.
+</P>
+<P>
+The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
+the buffer and a pointer to a variable that contains its length in code units.
+This is updated to contain the actual number of code units used, excluding the
+terminating zero.
+</P>
+<P>
+For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point 
+to variables that are updated with a pointer to the new memory and the number 
+of code units that comprise the substring, again excluding the terminating 
+zero. When the substring is no longer needed, the memory should be freed by 
+calling <b>pcre2_substring_free()</b>.
+</P>
+<P>
+The return value from these functions is zero for success, or one of these
+error codes:
+<pre>
+  PCRE2_ERROR_NOMEMORY
+</pre>
+The buffer was too small for <b>pcre2_substring_copy_bynumber()</b>, or the
+attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
+<pre>
+  PCRE2_ERROR_NOSUBSTRING
+</pre>
+No substring with the given number was captured. This could be because there is 
+no capturing group of that number in the pattern, or because the group with 
+that number did not participate in the match, or because the ovector was too 
+small to capture that group.
+</P>
+<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
+<P>
+<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
+<b>"  PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
+<br>
+<br>
+<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
+</P>
+<P>
+The <b>pcre2_substring_list_get()</b> function extracts all available substrings
+and builds a list of pointers to them, and a second list that contains their
+lengths (in code units), excluding a terminating zero that is added to each of 
+them. All this is done in a single block of memory that is obtained using the
+same memory allocation function that was used to get the match data block.
+</P>
+<P>
+The address of the memory block is returned via <i>listptr</i>, which is also
+the start of the list of string pointers. The end of the list is marked by a
+NULL pointer. The address of the list of lengths is returned via
+<i>lengthsptr</i>. If your strings do not contain binary zeros and you do not
+therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
+argument to disable the creation of a list of lengths. The yield of the
+function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
+could not be obtained. When the list is no longer needed, it should be freed by 
+calling <b>pcre2_substring_list_free()</b>.
+</P>
+<P>
+If this function encounters a substring that is unset, which can happen when
+capturing subpattern number <i>n+1</i> matches some part of the subject, but
+subpattern <i>n</i> has not been used at all, it returns an empty string. This
+can be distinguished from a genuine zero-length substring by inspecting the
+appropriate offset in the ovector, which contains PCRE2_UNSET for unset
+substrings.
+<a name="extractbynname"></a></P>
+<br><a name="SEC26" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
+<P>
+<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_length_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_SIZE *<i>length</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_copy_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_UCHAR *<i>buffer</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>int pcre2_substring_get_byname(pcre2_match_data *<i>match_data</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_UCHAR **<i>bufferptr</i>, PCRE2_SIZE *<i>bufflen</i>);</b>
+<br>
+<br>
+<b>void pcre2_substring_free(PCRE2_UCHAR *<i>buffer</i>);</b>
+</P>
+<P>
+To extract a substring by name, you first have to find associated number.
+For example, for this pattern:
+<pre>
+  (a+)b(?&#60;xxx&#62;\d+)...
+</pre>
+the number of the subpattern called "xxx" is 2. If the name is known to be
+unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
+calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
+compiled pattern, and the second is the name. The yield of the function is the
+subpattern number, or PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
+name.
+</P>
+<P>
+Given the number, you can extract the substring directly, or use one of the
+functions described in the previous section. For convenience, there are also
+"byname" functions that correspond to the "bynumber" functions, the only 
+difference being that the second argument is a name instead of a number.
+However, if PCRE2_DUPNAMES is set and there are duplicate names,
+the behaviour may not be what you want (see the next section).
+</P>
+<P>
+<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
+subpatterns with the same number, as described in the
+<a href="pcre2pattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
+in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+page, you cannot use names to distinguish the different subpatterns, because
+names are not included in the compiled code. The matching process uses only
+numbers. For this reason, the use of different names for subpatterns of the
+same number causes an error at compile time.
+</P>
+<br><a name="SEC27" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
+<P>
+<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
+<b>  PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
+</P>
+<P>
+When a pattern is compiled with the PCRE2_DUPNAMES option, names for
+subpatterns are not required to be unique. Duplicate names are always allowed
+for subpatterns with the same number, created by using the (?| feature. Indeed,
+if such subpatterns are named, they are required to use the same names.
+</P>
+<P>
+Normally, patterns with duplicate names are such that in any one match, only
+one of the named subpatterns participates. An example is shown in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation.
+</P>
+<P>
+When duplicates are present, <b>pcre2_substring_copy_byname()</b> and
+<b>pcre2_substring_get_byname()</b> return the first substring corresponding to
+the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING is
+returned. The <b>pcre2_substring_number_from_name()</b> function returns one of
+the numbers that are associated with the name, but it is not defined which it
+is.
+</P>
+<P>
+If you want to get full details of all captured substrings for a given name,
+you must use the <b>pcre2_substring_nametable_scan()</b> function. The first
+argument is the compiled pattern, and the second is the name. If the third and
+fourth arguments are NULL, the function returns a group number (it is not
+defined which). Otherwise, the third and fourth arguments must be pointers to
+variables that are updated by the function. After it has run, they point to the
+first and last entries in the name-to-number table for the given name, and the
+function returns the length of each entry. In both cases,
+PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name.
+</P>
+<P>
+The format of the name table is described above in the section entitled
+<i>Information about a pattern</i>
+<a href="#infoaboutpattern">above.</a>
+Given all the relevant entries for the name, you can extract each of their
+numbers, and hence the captured data.
+</P>
+<br><a name="SEC28" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
+<P>
+The traditional matching function uses a similar algorithm to Perl, which stops
+when it finds the first match, starting at a given point in the subject. If you
+want to find all possible matches, or the longest possible match at a given 
+position, consider using the alternative matching function (see below) instead.
+If you cannot use the alternative function, you can kludge it up by making use
+of the callout facility, which is described in the
+<a href="pcre2callout.html"><b>pcre2callout</b></a>
+documentation.
+</P>
+<P>
+What you have to do is to insert a callout right at the end of the pattern.
+When your callout function is called, extract and save the current matched
+substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
+other alternatives. Ultimately, when it runs out of matches,
+<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
+<a name="dfamatch"></a></P>
+<br><a name="SEC29" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
+<P>
+<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
+<b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
+<b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
+<b>  pcre2_match_context *<i>mcontext</i>,</b>
+<b>  int *<i>workspace</i>, PCRE2_SIZE <i>wscount</i>);</b>
+</P>
+<P>
+The function <b>pcre2_dfa_match()</b> is called to match a subject string
+against a compiled pattern, using a matching algorithm that scans the subject
+string just once, and does not backtrack. This has different characteristics to
+the normal algorithm, and is not compatible with Perl. Some of the features of
+PCRE2 patterns are not supported. Nevertheless, there are times when this kind
+of matching can be useful. For a discussion of the two matching algorithms, and
+a list of features that <b>pcre2_dfa_match()</b> does not support, see the
+<a href="pcre2matching.html"><b>pcre2matching</b></a>
+documentation.
+</P>
+<P>
+The arguments for the <b>pcre2_dfa_match()</b> function are the same as for
+<b>pcre2_match()</b>, plus two extras. The ovector within the match data block
+is used in a different way, and this is described below. The other common
+arguments are used in the same way as for <b>pcre2_match()</b>, so their
+description is not repeated here.
+</P>
+<P>
+The two additional arguments provide workspace for the function. The workspace
+vector should contain at least 20 elements. It is used for keeping track of
+multiple paths through the pattern tree. More workspace is needed for patterns
+and subjects where there are a lot of potential matches.
+</P>
+<P>
+Here is an example of a simple call to <b>pcre2_dfa_match()</b>:
+<pre>
+  int wspace[20];
+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_dfa_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    match_data,     /* the match data block */
+    NULL,           /* a match context; NULL means use defaults */
+    wspace,         /* working space vector */
+    20);            /* number of elements (NOT size in bytes) */
+</PRE>
+</P>
+<br><b>
+Option bits for <b>pcre_dfa_match()</b>
+</b><br>
+<P>
+The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
+be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
+PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
+PCRE2_NO_START_OPTIMIZE, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
+PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of these are
+exactly the same as for <b>pcre2_match()</b>, so their description is not
+repeated here.
+<pre>
+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+</pre>
+These have the same general effect as they do for <b>pcre2_match()</b>, but the
+details are slightly different. When PCRE2_PARTIAL_HARD is set for
+<b>pcre2_dfa_match()</b>, it returns PCRE2_ERROR_PARTIAL if the end of the
+subject is reached and there is still at least one matching possibility that
+requires additional characters. This happens even if some complete matches have
+already been found. When PCRE2_PARTIAL_SOFT is set, the return code
+PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL if the end of the
+subject is reached, there have been no complete matches, but there is still at
+least one matching possibility. The portion of the string that was inspected
+when the longest partial match was found is set as the first matching string in
+both cases. There is a more detailed discussion of partial and multi-segment
+matching, with examples, in the
+<a href="pcre2partial.html"><b>pcre2partial</b></a>
+documentation.
+<pre>
+  PCRE2_DFA_SHORTEST
+</pre>
+Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as
+soon as it has found one match. Because of the way the alternative algorithm
+works, this is necessarily the shortest possible match at the first possible
+matching point in the subject string.
+<pre>
+  PCRE2_DFA_RESTART
+</pre>
+When <b>pcre2_dfa_match()</b> returns a partial match, it is possible to call it
+again, with additional subject characters, and have it continue with the same
+match. The PCRE2_DFA_RESTART option requests this action; when it is set, the
+<i>workspace</i> and <i>wscount</i> options must reference the same vector as
+before because data about the match so far is left in them after a partial
+match. There is more discussion of this facility in the
+<a href="pcre2partial.html"><b>pcre2partial</b></a>
+documentation.
+</P>
+<br><b>
+Successful returns from <b>pcre2_dfa_match()</b>
+</b><br>
+<P>
+When <b>pcre2_dfa_match()</b> succeeds, it may have matched more than one
+substring in the subject. Note, however, that all the matches from one run of
+the function start at the same point in the subject. The shorter matches are
+all initial substrings of the longer matches. For example, if the pattern
+<pre>
+  &#60;.*&#62;
+</pre>
+is matched against the string
+<pre>
+  This is &#60;something&#62; &#60;something else&#62; &#60;something further&#62; no more
+</pre>
+the three matched strings are
+<pre>
+  &#60;something&#62;
+  &#60;something&#62; &#60;something else&#62;
+  &#60;something&#62; &#60;something else&#62; &#60;something further&#62;
+</pre>
+On success, the yield of the function is a number greater than zero, which is
+the number of matched substrings. The offsets of the substrings are returned in
+the ovector, and can be extracted in the same way as for <b>pcre2_match()</b>.
+They are returned in reverse order of length; that is, the longest
+matching string is given first. If there were too many matches to fit into
+the ovector, the yield of the function is zero, and the vector is filled with
+the longest matches.
+</P>
+<P>
+NOTE: PCRE2's "auto-possessification" optimization usually applies to character
+repeats at the end of a pattern (as well as internally). For example, the
+pattern "a\d+" is compiled as if it were "a\d++" because there is no point in
+backtracking into the repeated digits. For DFA matching, this means that only
+one possible match is found. If you really do want multiple matches in such
+cases, either use an ungreedy repeat ("a\d+?") or set the
+PCRE2_NO_AUTO_POSSESS option when compiling.
+</P>
+<br><b>
+Error returns from <b>pcre2_dfa_match()</b>
+</b><br>
+<P>
+The <b>pcre2_dfa_match()</b> function returns a negative number when it fails.
+Many of the errors are the same as for <b>pcre2_match()</b>, as described
+<a href="#errorlist">above.</a>
+There are in addition the following errors that are specific to
+<b>pcre2_dfa_match()</b>:
+<pre>
+  PCRE2_ERROR_DFA_UITEM
+</pre>
+This return is given if <b>pcre2_dfa_match()</b> encounters an item in the
+pattern that it does not support, for instance, the use of \C or a back
+reference.
+<pre>
+  PCRE2_ERROR_DFA_UCOND
+</pre>
+This return is given if <b>pcre2_dfa_match()</b> encounters a condition item
+that uses a back reference for the condition, or a test for recursion in a
+specific group. These are not supported.
+<pre>
+  PCRE2_ERROR_DFA_WSSIZE
+</pre>
+This return is given if <b>pcre2_dfa_match()</b> runs out of space in the
+<i>workspace</i> vector.
+<pre>
+  PCRE2_ERROR_DFA_RECURSE
+</pre>
+When a recursive subpattern is processed, the matching function calls itself
+recursively, using private memory for the ovector and <i>workspace</i>. This
+error is given if the internal ovector is not large enough. This should be
+extremely rare, as a vector of size 1000 is used.
+<pre>
+  PCRE2_ERROR_DFA_BADRESTART
+</pre>
+When <b>pcre2_dfa_match()</b> is called with the <b>pcre2_dfa_RESTART</b> option,
+some plausibility checks are made on the contents of the workspace, which
+should contain data about the previous partial match. If any of these checks
+fail, this error is given.
+</P>
+<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3), 
+<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3), 
+<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
+</P>
+<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC32" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 16 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2callout.html b/doc/html/pcre2callout.html
new file mode 100644
index 0000000..c742f90
--- /dev/null
+++ b/doc/html/pcre2callout.html
@@ -0,0 +1,270 @@
+<html>
+<head>
+<title>pcre2callout specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2callout man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
+<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
+<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
+<li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
+<li><a name="TOC6" href="#SEC6">AUTHOR</a>
+<li><a name="TOC7" href="#SEC7">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>int (*pcre2_callout)(pcre2_callout_block *);</b>
+</P>
+<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
+<P>
+PCRE2 provides a feature called "callout", which is a means of temporarily
+passing control to the caller of PCRE2 in the middle of pattern matching. The
+caller of PCRE2 provides an external function by putting its entry point in
+a match context (see <b>pcre2_set_callout()</b>) in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation).
+</P>
+<P>
+Within a regular expression, (?C) indicates the points at which the external
+function is to be called. Different callout points can be identified by putting
+a number less than 256 after the letter C. The default value is zero.
+For example, this pattern has two callout points:
+<pre>
+  (?C1)abc(?C2)def
+</pre>
+If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
+automatically inserts callouts, all with number 255, before each item in the
+pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
+<pre>
+  A(\d{2}|--)
+</pre>
+it is processed as if it were
+<br>
+<br>
+(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+<br>
+<br>
+Notice that there is a callout before and after each parenthesis and
+alternation bar. If the pattern contains a conditional group whose condition is
+an assertion, an automatic callout is inserted immediately before the
+condition. Such a callout may also be inserted explicitly, for example:
+<pre>
+  (?(?C9)(?=a)ab|de)
+</pre>
+This applies only to assertion conditions (because they are themselves
+independent groups).
+</P>
+<P>
+Automatic callouts can be used for tracking the progress of pattern matching.
+The
+<a href="pcre2test.html"><b>pcre2test</b></a>
+program has a pattern qualifier (/auto_callout) that sets automatic callouts;
+when it is used, the output indicates how the pattern is being matched. This is
+useful information when you are trying to optimize the performance of a
+particular pattern.
+</P>
+<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
+<P>
+You should be aware that, because of optimizations in the way PCRE2 compiles
+and matches patterns, callouts sometimes do not happen exactly as you might
+expect.
+</P>
+<P>
+At compile time, PCRE2 "auto-possessifies" repeated items when it knows that
+what follows cannot be part of the repeat. For example, a+[bc] is compiled as
+if it were a++[bc]. The <b>pcre2test</b> output when this pattern is anchored
+and then applied with automatic callouts to the string "aaaa" is:
+<pre>
+  ---&#62;aaaa
+   +0 ^        ^
+   +1 ^        a+
+   +3 ^   ^    [bc]
+  No match
+</pre>
+This indicates that when matching [bc] fails, there is no backtracking into a+
+and therefore the callouts that would be taken for the backtracks do not occur.
+You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS
+to <b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
+this is done in <b>pcre2test</b> (using the /no_auto_possess qualifier), the
+output changes to this:
+<pre>
+  ---&#62;aaaa
+   +0 ^        ^
+   +1 ^        a+
+   +3 ^   ^    [bc]
+   +3 ^  ^     [bc]
+   +3 ^ ^      [bc]
+   +3 ^^       [bc]
+  No match
+</pre>
+This time, when matching [bc] fails, the matcher backtracks into a+ and tries
+again, repeatedly, until a+ itself fails.
+</P>
+<P>
+Other optimizations that provide fast "no match" results also affect callouts.
+For example, if the pattern is
+<pre>
+  ab(?C4)cd
+</pre>
+PCRE2 knows that any matching string must contain the letter "d". If the
+subject string is "abyz", the lack of "d" means that matching doesn't ever
+start, and the callout is never reached. However, with "abyd", though the
+result is still no match, the callout is obeyed.
+</P>
+<P>
+PCRE2 also knows the minimum length of a matching string, and will immediately
+give a "no match" return without actually running a match if the subject is not
+long enough, or, for unanchored patterns, if it has been scanned far enough.
+</P>
+<P>
+You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
+option to the matching function, or by starting the pattern with
+(*NO_START_OPT). This slows down the matching process, but does ensure that
+callouts such as the example above are obeyed.
+</P>
+<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
+<P>
+During matching, when PCRE2 reaches a callout point, the external function that
+is set in the match context is called (if it is set). This applies to both
+normal and DFA matching. The only argument to the callout function is a pointer
+to a <b>pcre2_callout</b> block. This structure contains the following fields:
+<pre>
+  uint32_t      <i>version</i>;
+  uint32_t      <i>callout_number</i>;
+  uint32_t      <i>capture_top</i>;
+  uint32_t      <i>capture_last</i>;
+  void         *<i>callout_data</i>;
+  PCRE2_SIZE   *<i>offset_vector</i>;
+  PCRE2_SPTR    <i>mark</i>;
+  PCRE2_SPTR    <i>subject</i>;
+  PCRE2_SIZE    <i>subject_length</i>;
+  PCRE2_SIZE    <i>start_match</i>;
+  PCRE2_SIZE    <i>current_position</i>;
+  PCRE2_SIZE    <i>pattern_position</i>;
+  PCRE2_SIZE    <i>next_item_length</i>;
+</pre>
+The <i>version</i> field contains the version number of the block format. The
+current version is 0. The version number will change in future if additional
+fields are added, but the intention is never to remove any of the existing
+fields.
+</P>
+<P>
+The <i>callout_number</i> field contains the number of the callout, as compiled
+into the pattern (that is, the number after ?C for manual callouts, and 255 for
+automatically generated callouts).
+</P>
+<P>
+The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
+(the "ovector") that was passed to the matching function in the match data
+block. When <b>pcre2_match()</b> is used, the contents can be inspected, in
+order to extract substrings that have been matched so far, in the same way as
+for extracting substrings after a match has completed. For the DFA matching
+function, this field is not useful.
+</P>
+<P>
+The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
+that were passed to the matching function.
+</P>
+<P>
+The <i>start_match</i> field normally contains the offset within the subject at
+which the current match attempt started. However, if the escape sequence \K
+has been encountered, this value is changed to reflect the modified starting
+point. If the pattern is not anchored, the callout function may be called
+several times from the same point in the pattern for different starting points
+in the subject.
+</P>
+<P>
+The <i>current_position</i> field contains the offset within the subject of the
+current match pointer.
+</P>
+<P>
+When the <b>pcre2_match()</b> is used, the <i>capture_top</i> field contains one
+more than the number of the highest numbered captured substring so far. If no
+substrings have been captured, the value of <i>capture_top</i> is one. This is
+always the case when the DFA functions are used, because they do not support
+captured substrings.
+</P>
+<P>
+The <i>capture_last</i> field contains the number of the most recently captured
+substring. However, when a recursion exits, the value reverts to what it was
+outside the recursion, as do the values of all captured substrings. If no
+substrings have been captured, the value of <i>capture_last</i> is 0. This is
+always the case for the DFA matching functions.
+</P>
+<P>
+The <i>callout_data</i> field contains a value that is passed to a matching
+function specifically so that it can be passed back in callouts. It is set in
+the match context when the callout is set up by calling
+<b>pcre2_set_callout()</b> (see the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation).
+</P>
+<P>
+The <i>pattern_position</i> field contains the offset to the next item to be
+matched in the pattern string.
+</P>
+<P>
+The <i>next_item_length</i> field contains the length of the next item to be
+matched in the pattern string. When the callout immediately precedes an
+alternation bar, a closing parenthesis, or the end of the pattern, the length
+is zero. When the callout precedes an opening parenthesis, the length is that
+of the entire subpattern.
+</P>
+<P>
+The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
+help in distinguishing between different automatic callouts, which all have the
+same callout number. However, they are set for all callouts.
+</P>
+<P>
+In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
+the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
+(*THEN) item in the match, or NULL if no such items have been passed. Instances
+of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
+callouts from the DFA matching function this field always contains NULL.
+</P>
+<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
+<P>
+The external callout function returns an integer to PCRE2. If the value is
+zero, matching proceeds as normal. If the value is greater than zero, matching
+fails at the current point, but the testing of other matching possibilities
+goes ahead, just as if a lookahead assertion had failed. If the value is less
+than zero, the match is abandoned, and the matching function returns the
+negative value.
+</P>
+<P>
+Negative values should normally be chosen from the set of PCRE2_ERROR_xxx
+values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
+failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
+functions; it will never be used by PCRE2 itself.
+</P>
+<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC7" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 19 October 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2demo.html b/doc/html/pcre2demo.html
new file mode 100644
index 0000000..2d1d92b
--- /dev/null
+++ b/doc/html/pcre2demo.html
@@ -0,0 +1,443 @@
+<html>
+<head>
+<title>pcre2demo specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2demo man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+</ul>
+<PRE>
+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+calling the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit 
+width. This demonstration program uses the 8-bit library.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* This macro must be defined before including pcre2.h. For a program that uses 
+only one code unit width, it makes it possible to use generic function names 
+such as pcre2_compile(). */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include &lt;stdio.h&gt;
+#include &lt;string.h&gt;
+#include &lt;pcre2.h&gt;
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int namecount;
+int name_entry_size;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+
+size_t subject_length;
+pcre2_match_data *match_data;
+
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present. Apart from that, there must be exactly two *
+* arguments.                                                              *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i &lt; argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+    else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Two arguments required: a regex and a subject string\n");
+  return 1;
+  }
+
+/* As pattern and subject are char arguments, they can be straightforwardly
+cast to PCRE2_SPTR as we are working in 8-bit code units. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,              /* the pattern */
+  -1,                   /* indicates pattern is zero-terminated */ 
+  0,                    /* default options */
+  &amp;errornumber,         /* for error number */
+  &amp;erroroffset,         /* for error offset */
+  NULL);                /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256]; 
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, 
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE again, in order to do a     *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result.        *
+*************************************************************************/
+
+/* Using this function ensures that the block is exactly the right size for
+the number of capturing parentheses in the pattern. */
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc &lt; 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\n", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /* data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeded. Get a pointer to the output vector, where string offsets are 
+stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used 
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\n");
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i &lt; rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  size_t substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &amp;namecount);          /* where to put the answer */
+
+if (namecount &lt;= 0) printf("No named substrings\n"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\n");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &amp;name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &amp;name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two 
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i &lt; namecount; i++)
+    {
+    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &amp;option_bits);
+utf8 = (option_bits &amp; PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF; 
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                    /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
+        start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
+        subject[start_offset] == '\r' &amp;&amp;
+        subject[start_offset + 1] == '\n')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] &lt; subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc &lt; 0)
+    {
+    printf("Matching error %d\n", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeded */
+
+  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\n");
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i &lt; rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount &lt;= 0) printf("No named substrings\n"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\n");
+    for (i = 0; i &lt; namecount; i++)
+      {
+      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\n");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
new file mode 100644
index 0000000..30b527d
--- /dev/null
+++ b/doc/html/pcre2test.html
@@ -0,0 +1,1199 @@
+<html>
+<head>
+<title>pcre2test specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2test man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
+<li><a name="TOC2" href="#SEC2">PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
+<li><a name="TOC3" href="#SEC3">INPUT ENCODING</a>
+<li><a name="TOC4" href="#SEC4">COMMAND LINE OPTIONS</a>
+<li><a name="TOC5" href="#SEC5">DESCRIPTION</a>
+<li><a name="TOC6" href="#SEC6">COMMAND LINES</a>
+<li><a name="TOC7" href="#SEC7">MODIFIER SYNTAX</a>
+<li><a name="TOC8" href="#SEC8">PATTERN SYNTAX</a>
+<li><a name="TOC9" href="#SEC9">SUBJECT LINE SYNTAX</a>
+<li><a name="TOC10" href="#SEC10">PATTERN MODIFIERS</a>
+<li><a name="TOC11" href="#SEC11">SUBJECT MODIFIERS</a>
+<li><a name="TOC12" href="#SEC12">THE ALTERNATIVE MATCHING FUNCTION</a>
+<li><a name="TOC13" href="#SEC13">DEFAULT OUTPUT FROM pcre2test</a>
+<li><a name="TOC14" href="#SEC14">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
+<li><a name="TOC15" href="#SEC15">RESTARTING AFTER A PARTIAL MATCH</a>
+<li><a name="TOC16" href="#SEC16">CALLOUTS</a>
+<li><a name="TOC17" href="#SEC17">NON-PRINTING CHARACTERS</a>
+<li><a name="TOC18" href="#SEC18">SEE ALSO</a>
+<li><a name="TOC19" href="#SEC19">AUTHOR</a>
+<li><a name="TOC20" href="#SEC20">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
+<P>
+<b>pcre2test [options] [input file [output file]]</b>
+<br>
+<br>
+<b>pcre2test</b> is a test program for the PCRE2 regular expression libraries,
+but it can also be used for experimenting with regular expressions. This
+document describes the features of the test program; for details of the regular
+expressions themselves, see the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation. For details of the PCRE2 library function calls and their
+options, see the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation.
+</P>
+<P>
+The input for <b>pcre2test</b> is a sequence of regular expression patterns and
+subject strings to be matched. The output shows the result of each match
+attempt. Modifiers on the command line, the patterns, and the subject lines
+specify PCRE2 function options, control how the subject is processed, and what
+output is produced.
+</P>
+<P>
+As the original fairly simple PCRE library evolved, it acquired many different
+features, and as a result, the original <b>pcretest</b> program ended up with a
+lot of options in a messy, arcane syntax, for testing all the features. The
+move to the new PCRE2 API provided an opportunity to re-implement the test
+program as <b>pcre2test</b>, with a cleaner modifier syntax. Nevertheless, there
+are still many obscure modifiers, some of which are specifically designed for
+use in conjunction with the test script and data files that are distributed as
+part of PCRE2. All the modifiers are documented here, some without much
+justification, but many of them are unlikely to be of use except when testing
+the libraries.
+</P>
+<br><a name="SEC2" href="#TOC1">PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
+<P>
+Different versions of the PCRE2 library can be built to support character
+strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or
+all three of these libraries may be simultaneously installed. The
+<b>pcre2test</b> program can be used to test all the libraries. However, its own
+input and output are always in 8-bit format. When testing the 16-bit or 32-bit
+libraries, patterns and subject strings are converted to 16- or 32-bit format
+before being passed to the library functions. Results are converted back to
+8-bit code units for output.
+</P>
+<P>
+In the rest of this document, the names of library functions and structures
+are given in generic form, for example, <b>pcre_compile()</b>. The actual
+names used in the libraries have a suffix _8, _16, or _32, as appropriate.
+</P>
+<br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br>
+<P>
+Input to <b>pcre2test</b> is processed line by line, either by calling the C
+library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
+below). In Unix-like environments, <b>fgets()</b> treats any bytes other than
+newline as data characters. However, in some Windows environments character 26
+(hex 1A) causes an immediate end of file, and no further data is read. For
+maximum portability, therefore, it is safest to avoid non-printing characters
+in <b>pcre2test</b> input files.
+</P>
+<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
+<P>
+<b>-8</b>
+If the 8-bit library has been built, this option causes it to be used (this is
+the default). If the 8-bit library has not been built, this option causes an
+error.
+</P>
+<P>
+<b>-16</b>
+If the 16-bit library has been built, this option causes it to be used. If only
+the 16-bit library has been built, this is the default. If the 16-bit library
+has not been built, this option causes an error.
+</P>
+<P>
+<b>-32</b>
+If the 32-bit library has been built, this option causes it to be used. If only
+the 32-bit library has been built, this is the default. If the 32-bit library
+has not been built, this option causes an error.
+</P>
+<P>
+<b>-b</b>
+Behave as if each pattern has the <b>/fullbincode</b> modifier; the full
+internal binary form of the pattern is output after compilation.
+</P>
+<P>
+<b>-C</b>
+Output the version number of the PCRE2 library, and all available information
+about the optional features that are included, and then exit with zero exit
+code. All other options are ignored.
+</P>
+<P>
+<b>-C</b> <i>option</i>
+Output information about a specific build-time option, then exit. This
+functionality is intended for use in scripts such as <b>RunTest</b>. The
+following options output the value and set the exit code as indicated:
+<pre>
+  ebcdic-nl  the code for LF (= NL) in an EBCDIC environment:
+               0x15 or 0x25
+               0 if used in an ASCII environment
+               exit code is always 0
+  linksize   the configured internal link size (2, 3, or 4)
+               exit code is set to the link size
+  newline    the default newline setting:
+               CR, LF, CRLF, ANYCRLF, or ANY
+               exit code is always 0
+  bsr        the default setting for what \R matches:
+               ANYCRLF or ANY
+               exit code is always 0
+</pre>
+The following options output 1 for true or 0 for false, and set the exit code
+to the same value:
+<pre>
+  ebcdic     compiled for an EBCDIC environment
+  jit        just-in-time support is available
+  pcre16     the 16-bit library was built
+  pcre32     the 32-bit library was built
+  pcre8      the 8-bit library was built
+  unicode    Unicode support is available
+</pre>
+If an unknown option is given, an error message is output; the exit code is 0.
+</P>
+<P>
+<b>-d</b>
+Behave as if each pattern has the <b>debug</b> modifier; the internal
+form and information about the compiled pattern is output after compilation;
+<b>-d</b> is equivalent to <b>-b -i</b>.
+</P>
+<P>
+<b>-dfa</b>
+Behave as if each subject line has the <b>dfa</b> modifier; matching is done
+using the <b>pcre2_dfa_match()</b> function instead of the default
+<b>pcre2_match()</b>.
+</P>
+<P>
+<b>-help</b>
+Output a brief summary these options and then exit.
+</P>
+<P>
+<b>-i</b>
+Behave as if each pattern has the <b>/info</b> modifier; information about the
+compiled pattern is given after compilation.
+</P>
+<P>
+<b>-jit</b>
+Behave as if each pattern line has the <b>jit</b> modifier; after successful
+compilation, each pattern is passed to the just-in-time compiler, if available.
+</P>
+<P>
+\fB-pattern\fB <i>modifier-list</i>
+Behave as if each pattern line contains the given modifiers.
+</P>
+<P>
+<b>-q</b>
+Do not output the version number of <b>pcre2test</b> at the start of execution.
+</P>
+<P>
+<b>-S</b> <i>size</i>
+On Unix-like systems, set the size of the run-time stack to <i>size</i>
+megabytes.
+</P>
+<P>
+<b>-subject</b> <i>modifier-list</i>
+Behave as if each subject line contains the given modifiers.
+</P>
+<P>
+<b>-t</b>
+Run each compile and match many times with a timer, and output the resulting
+times per compile or match. You can control the number of iterations that are
+used for timing by following <b>-t</b> with a number (as a separate item on the
+command line). For example, "-t 1000" iterates 1000 times. The default is to
+iterate 500,000 times.
+</P>
+<P>
+<b>-tm</b>
+This is like <b>-t</b> except that it times only the matching phase, not the
+compile phase.
+</P>
+<P>
+<b>-T</b> <b>-TM</b>
+These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
+the total times for all compiles and matches are output.
+</P>
+<P>
+<b>-version</b>
+Output the PCRE2 version number and then exit.
+</P>
+<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
+<P>
+If <b>pcre2test</b> is given two filename arguments, it reads from the first and
+writes to the second. If it is given only one filename argument, it reads from
+that file and writes to stdout. Otherwise, it reads from stdin and writes to
+stdout, and prompts for each line of input, using "re&#62;" to prompt for regular
+expression patterns, and "data&#62;" to prompt for subject lines.
+</P>
+<P>
+When <b>pcre2test</b> is built, a configuration option can specify that it
+should be linked with the <b>libreadline</b> or <b>libedit</b> library. When this
+is done, if the input is from a terminal, it is read using the <b>readline()</b>
+function. This provides line-editing and history facilities. The output from
+the <b>-help</b> option states whether or not <b>readline()</b> will be used.
+</P>
+<P>
+The program handles any number of tests, each of which consists of a set of
+input lines. Each set starts with a regular expression pattern, followed by any
+number of subject lines to be matched against that pattern. In between sets of
+test data, command lines that begin with a hash (#) character may appear. This
+file format, with some restrictions, can also be processed by the
+<b>perltest.pl</b> script that is distributed with PCRE2 as a means of checking
+that the behaviour of PCRE2 and Perl is the same.
+</P>
+<P>
+Each subject line is matched separately and independently. If you want to do
+multi-line matches, you have to use the \n escape sequence (or \r or \r\n,
+etc., depending on the newline setting) in a single line of input to encode the
+newline sequences. There is no limit on the length of subject lines; the input
+buffer is automatically extended if it is too small. There is a replication
+feature that makes it possible to generate long subject lines without having to
+supply them explicitly.
+</P>
+<P>
+An empty line or the end of the file signals the end of the subject lines for a
+test, at which point a new pattern or command line is expected if there is
+still input to be read.
+</P>
+<br><a name="SEC6" href="#TOC1">COMMAND LINES</a><br>
+<P>
+In between sets of test data, a line that begins with a hash (#) character is
+interpreted as a command line. If the first character is followed by white
+space or an exclamation mark, the line is treated as a comment, and ignored.
+Otherwise, the following commands are recognized:
+<pre>
+  #forbid_utf
+</pre>
+Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
+options set, which locks out the use of UTF and Unicode property features. This
+is a trigger guard that is used in test files to ensure that UTF/Unicode tests
+are not accidentally added to files that are used when UTF support is not
+included in the library. This effect can also be obtained by the use of
+<b>#pattern</b>; the difference is that <b>#forbid_utf</b> cannot be unset, and
+the automatic options are not displayed in pattern information, to avoid
+cluttering up test output.
+<pre>
+  #pattern &#60;modifier-list&#62;
+</pre>
+This command sets a default modifier list that applies to all subsequent
+patterns. Modifiers on a pattern can change these settings.
+<pre>
+  #perltest
+</pre>
+The appearance of this line causes all subsequent modifier settings to be
+checked for compatibility with the <b>perltest.pl</b> script, which is used to
+confirm that Perl gives the same results as PCRE2. Also, apart from comment
+lines, none of the other command lines are permitted, because they and many
+of the modifiers are specific to <b>pcre2test</b>, and should not be used in
+test files that are also processed by <b>perltest.pl</b>. The \fP#perltest\fB
+command helps detect tests that are accidentally put in the wrong file.
+<pre>
+  #subject &#60;modifier-list&#62;
+</pre>
+This command sets a default modifier list that applies to all subsequent
+subject lines. Modifiers on a subject line can change these settings.
+</P>
+<br><a name="SEC7" href="#TOC1">MODIFIER SYNTAX</a><br>
+<P>
+Modifier lists are used with both pattern and subject lines. Items in a list
+are separated by commas and optional white space. Some modifiers may be given
+for both patterns and subject lines, whereas others are valid for one or the
+other only. Each modifier has a long name, for example "anchored", and some of
+them must be followed by an equals sign and a value, for example, "offset=12".
+Modifiers that do not take values may be preceded by a minus sign to turn off a
+previous default setting.
+</P>
+<P>
+A few of the more common modifiers can also be specified as single letters, for
+example "i" for "caseless". In documentation, following the Perl convention,
+these are written with a slash ("the /i modifier") for clarity. Abbreviated
+modifiers must all be concatenated in the first item of a modifier list. If the
+first item is not recognized as a long modifier name, it is interpreted as a
+sequence of these abbreviations. For example:
+<pre>
+  /abc/ig,newline=cr,jit=3
+</pre>
+This is a pattern line whose modifier list starts with two one-letter modifiers
+(/i and /g). The lower-case abbreviated modifiers are the same as used in Perl.
+</P>
+<br><a name="SEC8" href="#TOC1">PATTERN SYNTAX</a><br>
+<P>
+A pattern line must start with one of the following characters (common symbols,
+excluding pattern meta-characters):
+<pre>
+  / ! " ' ` - = _ : ; , % & @ ~
+</pre>
+This is interpreted as the pattern's delimiter. A regular expression may be
+continued over several input lines, in which case the newline characters are
+included within it. It is possible to include the delimiter within the pattern
+by escaping it with a backslash, for example
+<pre>
+  /abc\/def/
+</pre>
+If you do this, the escape and the delimiter form part of the pattern, but
+since the delimiters are all non-alphanumeric, this does not affect its
+interpretation. If the terminating delimiter is immediately followed by a
+backslash, for example,
+<pre>
+  /abc/\
+</pre>
+then a backslash is added to the end of the pattern. This is done to provide a
+way of testing the error condition that arises if a pattern finishes with a
+backslash, because
+<pre>
+  /abc\/
+</pre>
+is interpreted as the first line of a pattern that starts with "abc/", causing
+pcre2test to read the next line as a continuation of the regular expression.
+</P>
+<P>
+A pattern can be followed by a modifier list (details below).
+</P>
+<br><a name="SEC9" href="#TOC1">SUBJECT LINE SYNTAX</a><br>
+<P>
+Before each subject line is passed to <b>pcre2_match()</b> or
+<b>pcre2_dfa_match()</b>, leading and trailing white space is removed, and the
+line is scanned for backslash escapes. The following provide a means of
+encoding non-printing characters in a visible way:
+<pre>
+  \a         alarm (BEL, \x07)
+  \b         backspace (\x08)
+  \e         escape (\x27)
+  \f         form feed (\x0c)
+  \n         newline (\x0a)
+  \r         carriage return (\x0d)
+  \t         tab (\x09)
+  \v         vertical tab (\x0b)
+  \nnn       octal character (up to 3 octal digits); always
+               a byte unless &#62; 255 in UTF-8 or 16-bit or 32-bit mode
+  \o{dd...}  octal character (any number of octal digits}
+  \xhh       hexadecimal byte (up to 2 hex digits)
+  \x{hh...}  hexadecimal character (any number of hex digits)
+</pre>
+The use of \x{hh...} is not dependent on the use of the utf modifier on
+the pattern. It is recognized always. There may be any number of hexadecimal
+digits inside the braces; invalid values provoke error messages.
+</P>
+<P>
+Note that \xhh specifies one byte rather than one character in UTF-8 mode;
+this makes it possible to construct invalid UTF-8 sequences for testing
+purposes. On the other hand, \x{hh} is interpreted as a UTF-8 character in
+UTF-8 mode, generating more than one byte if the value is greater than 127.
+When testing the 8-bit library not in UTF-8 mode, \x{hh} generates one byte
+for values less than 256, and causes an error for greater values.
+</P>
+<P>
+In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
+possible to construct invalid UTF-16 sequences for testing purposes.
+</P>
+<P>
+In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This makes it
+possible to construct invalid UTF-32 sequences for testing purposes.
+</P>
+<P>
+There is a special backslash sequence that specifies replication of one or more
+characters:
+<pre>
+  \[&#60;characters&#62;]{&#60;count&#62;}
+</pre>
+This makes it possible to test long strings without having to provide them as
+part of the file. For example:
+<pre>
+  \[abc]{4}
+</pre>
+is converted to "abcabcabcabc". This feature does not support nesting. To
+include a closing square bracket in the characters, code it as \x5D.
+</P>
+<P>
+A backslash followed by an equals sign marke the end of the subject string and
+the start of a modifier list. For example:
+<pre>
+  abc\=notbol,notempty
+</pre>
+A backslash followed by any other non-alphanumeric character just escapes that
+character. A backslash followed by anything else causes an error. However, if
+the very last character in the line is a backslash (and there is no modifier
+list), it is ignored. This gives a way of passing an empty line as data, since
+a real empty line terminates the data input.
+</P>
+<br><a name="SEC10" href="#TOC1">PATTERN MODIFIERS</a><br>
+<P>
+There are three types of modifier that can appear in pattern lines, two of
+which may also be used in a <b>#pattern</b> command. A pattern's modifier list
+can add to or override default modifiers that were set by a previous
+<b>#pattern</b> command.
+</P>
+<br><b>
+Setting compilation options
+</b><br>
+<P>
+The following modifiers set options for <b>pcre2_compile()</b>. The most common
+ones have single-letter abbreviations. See
+<a href="pcreapi.html"><b>pcreapi</b></a>
+for a description of their effects.
+<pre>
+      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      alt_bsux                  set PCRE2_ALT_BSUX
+      anchored                  set PCRE2_ANCHORED
+      auto_callout              set PCRE2_AUTO_CALLOUT
+  /i  caseless                  set PCRE2_CASELESS
+      dollar_endonly            set PCRE2_DOLLAR_ENDONLY
+  /s  dotall                    set PCRE2_DOTALL
+      dupnames                  set PCRE2_DUPNAMES
+  /x  extended                  set PCRE2_EXTENDED
+      firstline                 set PCRE2_FIRSTLINE
+      match_unset_backref       set PCRE2_MATCH_UNSET_BACKREF
+  /m  multiline                 set PCRE2_MULTILINE
+      never_ucp                 set PCRE2_NEVER_UCP
+      never_utf                 set PCRE2_NEVER_UTF
+      no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+      no_auto_possess           set PCRE2_NO_AUTO_POSSESS
+      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+      no_utf_check              set PCRE2_NO_UTF_CHECK
+      ucp                       set PCRE2_UCP
+      ungreedy                  set PCRE2_UNGREEDY
+      utf                       set PCRE2_UTF
+</pre>
+As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
+non-printing characters in output strings to be printed using the \x{hh...}
+notation. Otherwise, those less than 0x100 are output in hex without the curly
+brackets.
+</P>
+<br><b>
+Setting compilation controls
+</b><br>
+<P>
+The following modifiers affect the compilation process or request information
+about the pattern:
+<pre>
+      bsr=[anycrlf|unicode]     specify \R handling
+  /B  bincode                   show binary code without lengths
+      debug                     same as info,fullbincode
+      fullbincode               show binary code with lengths
+  /I  info                      show info about compiled pattern
+      hex                       pattern is coded in hexadecimal
+      jit[=&#60;number&#62;]            use JIT
+      locale=&#60;name&#62;             use this locale
+      memory                    show memory used
+      newline=&#60;type&#62;            set newline type
+      parens_nest_limit=&#60;n&#62;     set maximum parentheses depth
+      perlcompat                lock out non-Perl modifiers
+      posix                     use the POSIX API
+      stackguard=&#60;number&#62;       test the stackguard feature
+      tables=[0|1|2]            select internal tables
+      use_length                use the pattern's length
+</pre>
+The effects of these modifiers are described in the following sections.
+FIXME: Give more examples.
+</P>
+<br><b>
+Newline and \R handling
+</b><br>
+<P>
+The <b>bsr</b> modifier specifies what \R in a pattern should match. If it is
+set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to "unicode",
+\R matches any Unicode newline sequence. The default is specified when PCRE2
+is built, with the default default being Unicode.
+</P>
+<P>
+The <b>newline</b> modifier specifies which characters are to be interpreted as
+newlines, both in the pattern and (by default) in subject lines. The type must
+be one of CR, LF, CRLF, ANYCRLF, or ANY.
+</P>
+<P>
+Both the \R and newline settings can be changed at match time, but if this is
+done, JIT matching is disabled.
+</P>
+<br><b>
+Information about a pattern
+</b><br>
+<P>
+The <b>debug</b> modifier is a shorthand for <b>info,fullbincode</b>, requesting
+all available information.
+</P>
+<P>
+The <b>bincode</b> modifier causes a representation of the compiled code to be
+output after compilation. This information does not contain length and offset
+values, which ensures that the same output is generated for different internal
+link sizes and different code unit widths. By using <b>bincode</b>, the same
+regression tests can be used in different environments.
+</P>
+<P>
+The <b>fullbincode</b> modifier, by contrast, <i>does</i> include length and
+offset values. This is used in a few special tests and is also useful for
+one-off tests.
+</P>
+<P>
+The <b>info</b> modifier requests information about the compiled pattern
+(whether it is anchored, has a fixed first character, and so on). The
+information is obtained from the <b>pcre2_pattern_info()</b> function.
+</P>
+<br><b>
+Specifying a pattern in hex
+</b><br>
+<P>
+The <b>hex</b> modifier specifies that the characters of the pattern are to be
+interpreted as pairs of hexadecimal digits. White space is permitted between
+pairs. For example:
+<pre>
+  /ab 32 59/hex
+</pre>
+This feature is provided as a way of creating patterns that contain binary zero
+characters. When <b>hex</b> is set, it implies <b>use_length</b>.
+</P>
+<br><b>
+Using the pattern's length
+</b><br>
+<P>
+By default, <b>pcre2test</b> passes patterns as zero-terminated strings to
+<b>pcre2_compile()</b>, giving the length as -1. If <b>use_length</b> is set, the
+length of the pattern is passed. This is implied if <b>hex</b> is set.
+</P>
+<br><b>
+JIT compilation
+</b><br>
+<P>
+The <b>/jit</b> modifier may optionally be followed by a number in the range 0
+to 7:
+<pre>
+  0  disable JIT
+  1  normal match only
+  2  soft partial match only
+  3  normal match and soft partial match
+  4  hard partial match only
+  6  soft and hard partial match
+  7  all three modes
+</pre>
+If no number is given, 7 is assumed. If JIT compilation is successful, the
+compiled JIT code will automatically be used when <b>pcre2_match()</b> is run,
+except when incompatible run-time options are specified. For more details, see
+the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation. See also the <b>jitstack</b> modifier below for a way of
+setting the size of the JIT stack.
+</P>
+<P>
+If the <b>jitverify</b> modifier is specified, the text "(JIT)" is added to the
+first output line after a match or non match when JIT-compiled code was
+actually used. This modifier can also be set on a subject line.
+</P>
+<br><b>
+Setting a locale
+</b><br>
+<P>
+The <b>/locale</b> modifier must specify the name of a locale, for example:
+<pre>
+  /pattern/locale=fr_FR
+</pre>
+The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
+character tables for the locale, and this is then passed to
+<b>pcre2_compile()</b> when compiling the regular expression. The same tables
+are used when matching the following subject lines. The <b>/locale</b> modifier
+applies only to the pattern on which it appears, but can be given in a
+<b>#pattern</b> command if a default is needed. Setting a locale and alternate
+character tables are mutually exclusive.
+</P>
+<br><b>
+Showing pattern memory
+</b><br>
+<P>
+The <b>/memory</b> modifier causes the size in bytes of the memory block used to
+hold the compiled pattern to be output. This does not include the size of the
+<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
+subsequently passed to the JIT compiler, the size of the JIT compiled code is
+also output.
+</P>
+<br><b>
+Limiting nested parentheses
+</b><br>
+<P>
+The <b>parens_nest_limit</b> modifier sets a limit on the depth of nested
+parentheses in a pattern. Breaching the limit causes a compilation error.
+</P>
+<br><b>
+Using the POSIX wrapper API
+</b><br>
+<P>
+The <b>/posix</b> modifier causes <b>pcre2test</b> to call PCRE2 via the POSIX
+wrapper API rather than its native API. This supports only the 8-bit library.
+When the POSIX API is being used, the following pattern modifiers set options
+for the <b>regcomp()</b> function:
+<pre>
+  caseless           REG_ICASE
+  multiline          REG_NEWLINE
+  no_auto_capture    REG_NOSUB
+  dotall             REG_DOTALL     )
+  ungreedy           REG_UNGREEDY   ) These options are not part of
+  ucp                REG_UCP        )   the POSIX standard
+  utf                REG_UTF8       )
+</pre>
+The <b>aftertext</b> and <b>allaftertext</b> subject modifiers work as described
+below. All other modifiers cause an error.
+</P>
+<br><b>
+Testing the stack guard feature
+</b><br>
+<P>
+The <b>/stackguard</b> modifier is used to test the use of
+<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
+enable stack availability to be checked during compilation (see the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation for details). If the number specified by the modifier is greater
+than zero, <b>pcre2_set_compile_recursion_guard()</b> is called to set up
+callback from <b>pcre2_compile()</b> to a local function. The argument it is
+passed is the current nesting parenthesis depth; if this is greater than the
+value given by the modifier, non-zero is returned, causing the compilation to
+be aborted.
+</P>
+<br><b>
+Using alternative character tables
+</b><br>
+<P>
+The <b>/tables</b> modifier must be followed by a single digit. It causes a
+specific set of built-in character tables to be passed to
+<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
+different character tables. The digit specifies the tables as follows:
+<pre>
+  0   do not pass any special character tables
+  1   the default ASCII tables, as distributed in
+        pcre2_chartables.c.dist
+  2   a set of tables defining ISO 8859 characters
+</pre>
+In table 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Setting alternate character tables and a locale
+are mutually exclusive.
+</P>
+<br><b>
+Setting certain match controls
+</b><br>
+<P>
+The following modifiers are really subject modifiers, and are described below.
+However, they may be included in a pattern's modifier list, in which case they
+are applied to every subject line that is processed with that pattern. They do
+not affect the compilation process.
+<pre>
+      aftertext                 show text after match
+      allaftertext              show text after captures
+      allcaptures               show all captures
+      allusedtext               show all consulted text 
+  /g  global                    global matching
+      jitverify                 verify JIT usage
+      mark                      show mark values
+</pre>
+These modifiers may not appear in a <b>#pattern</b> command. If you want them as
+defaults, set them in a <b>#subject</b> command.
+</P>
+<br><a name="SEC11" href="#TOC1">SUBJECT MODIFIERS</a><br>
+<P>
+The modifiers that can appear in subject lines and the <b>#subject</b>
+command are of two types.
+</P>
+<br><b>
+Setting match options
+</b><br>
+<P>
+The following modifiers set options for <b>pcre2_match()</b> or
+<b>pcre2_dfa_match()</b>. See
+<a href="pcreapi.html"><b>pcreapi</b></a>
+for a description of their effects.
+<pre>
+      anchored                  set PCRE2_ANCHORED
+      dfa_restart               set PCRE2_DFA_RESTART
+      dfa_shortest              set PCRE2_DFA_SHORTEST
+      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+      no_utf_check              set PCRE2_NO_UTF_CHECK
+      notbol                    set PCRE2_NOTBOL
+      notempty                  set PCRE2_NOTEMPTY
+      notempty_atstart          set PCRE2_NOTEMPTY_ATSTART
+      noteol                    set PCRE2_NOTEOL
+      partial_hard (or ph)      set PCRE2_PARTIAL_HARD
+      partial_soft (or ps)      set PCRE2_PARTIAL_SOFT
+</pre>
+The partial matching modifiers are provided with abbreviations because they 
+appear frequently in tests.
+</P>
+<P>
+If the <b>/posix</b> modifier was present on the pattern, causing the POSIX
+wrapper API to be used, the only option-setting modifiers that have any effect
+are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
+REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
+Any other modifiers cause an error.
+</P>
+<br><b>
+Setting match controls
+</b><br>
+<P>
+The following modifiers affect the matching process or request additional
+information. Some of them may also be specified on a pattern line (see above),
+in which case they apply to every subject line that is matched against that
+pattern.
+<pre>
+      aftertext                 show text after match
+      allaftertext              show text after captures
+      allcaptures               show all captures
+      allusedtext               show all consulted text 
+      altglobal                 alternative global matching
+      bsr=[anycrlf|unicode]     specify \R handling
+      callout_capture           show captures at callout time
+      callout_data=&#60;n&#62;          set a value to pass via callouts
+      callout_fail=&#60;n&#62;[:&#60;m&#62;]    control callout failure
+      callout_none              do not supply a callout function
+      copy=&#60;number or name&#62;     copy captured substring
+      dfa                       use <b>pcre2_dfa_match()</b>
+      find_limits               find match and recursion limits
+      get=&#60;number or name&#62;      extract captured substring
+      getall                    extract all captured substrings
+  /g  global                    global matching
+      jitstack=&#60;n&#62;              set size of JIT stack
+      jitverify                 verify JIT usage
+      mark                      show mark values
+      match_limit=&#62;n&#62;           set a match limit
+      memory                    show memory usage
+      newline=&#60;type&#62;            set newline type
+      offset=&#60;n&#62;                set starting offset
+      ovector=&#60;n&#62;               set size of output vector
+      recursion_limit=&#60;n&#62;       set a recursion limit
+</pre>
+The effects of these modifiers are described in the following sections.
+FIXME: Give more examples.
+</P>
+<br><b>
+Newline and \R handling
+</b><br>
+<P>
+These modifiers set the newline and \R processing conventions for the subject
+line, overriding any values that were set at compile time (as described above).
+JIT matching is disabled if these settings are changed at match time.
+</P>
+<br><b>
+Showing more text
+</b><br>
+<P>
+The <b>aftertext</b> modifier requests that as well as outputting the substring
+that matched the entire pattern, <b>pcre2test</b> should in addition output the
+remainder of the subject string. This is useful for tests where the subject
+contains multiple copies of the same substring. The <b>allaftertext</b> modifier
+requests the same action for captured substrings as well as the main matched
+substring. In each case the remainder is output on the following line with a
+plus character following the capture number.
+</P>
+<P>
+The <b>allusedtext</b> modifier requests that all the text that was consulted 
+during a successful pattern match be shown. This affects the output if there 
+is a lookbehind at the start of a match, or a lookahead at the end, or if \K 
+is used in the pattern. Characters that precede or follow the start and end of 
+the actual match are indicated in the output by '&#60;' or '&#62;' characters 
+underneath them. Here is an example:
+<pre>
+  /(?&#60;=pqr)abc(?=xyz)/
+      123pqrabcxyz456\=allusedtext
+   0: pqrabcxyz
+      &#60;&#60;&#60;   &#62;&#62;&#62;
+</pre>
+This shows that the matched string is "abc", with the preceding and following
+strings "pqr" and "xyz" also consulted during the match.
+</P>
+<br><b>
+Showing the value of all capture groups
+</b><br>
+<P>
+The <b>allcaptures</b> modifier requests that the values of all potential
+captured parentheses be output after a match. By default, only those up to the
+highest one actually used in the match are output (corresponding to the return
+code from <b>pcre2_match()</b>). Groups that did not take part in the match
+are output as "&#60;unset&#62;".
+</P>
+<br><b>
+Testing callouts
+</b><br>
+<P>
+A callout function is supplied when <b>pcre2test</b> calls the library matching
+functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
+set, the current captured groups are output when a callout occurs.
+</P>
+<P>
+The <b>callout_fail</b> modifier can be given one or two numbers. If there is
+only one number, 1 is returned instead of 0 when a callout of that number is
+reached. If two numbers are given, 1 is returned when callout &#60;n&#62; is reached
+for the &#60;m&#62;th time.
+</P>
+<P>
+The <b>callout_data</b> modifier can be given an unsigned or a negative number.
+Any value other than zero is used as a return from <b>pcre2test</b>'s callout
+function.
+</P>
+<br><b>
+Testing substring extraction functions
+</b><br>
+<P>
+The <b>copy</b> and <b>get</b> modifiers can be used to test the
+<b>pcre2_substring_copy_xxx()</b> and <b>pcre2_substring_get_xxx()</b> functions.
+They can be given more than once, and each can specify a group name or number,
+for example:
+<pre>
+   abcd\=copy=1,copy=3,get=G1
+</pre>
+If the <b>#subject</b> command is used to set default copy and get lists, these
+can be unset by specifying a negative number for numbered groups and an empty
+name for named groups.
+</P>
+<P>
+The <b>getall</b> modifier tests <b>pcre2_substring_list_get()</b>, which
+extracts all captured substrings.
+</P>
+<P>
+If the subject line is successfully matched, the substrings extracted by the
+convenience functions are output with C, G, or L after the string number
+instead of a colon. This is in addition to the normal full list. The string
+length (that is, the return from the extraction function) is given in
+parentheses after each substring.
+</P>
+<br><b>
+Finding all matches in a string
+</b><br>
+<P>
+Searching for all possible matches within a subject can be requested by the
+<b>global</b> or <b>/altglobal</b> modifier. After finding a match, the matching
+function is called again to search the remainder of the subject. The difference
+between <b>global</b> and <b>altglobal</b> is that the former uses the
+<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
+to start searching at a new point within the entire string (which is what Perl
+does), whereas the latter passes over a shortened substring. This makes a
+difference to the matching process if the pattern begins with a lookbehind
+assertion (including \b or \B).
+</P>
+<P>
+If an empty string is matched, the next match is done with the
+PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for
+another, non-empty, match at the same point in the subject. If this match
+fails, the start offset is advanced, and the normal match is retried. This
+imitates the way Perl handles such cases when using the <b>/g</b> modifier or
+the <b>split()</b> function. Normally, the start offset is advanced by one
+character, but if the newline convention recognizes CRLF as a newline, and the
+current character is CR followed by LF, an advance of two is used.
+</P>
+<br><b>
+Setting the JIT stack size
+</b><br>
+<P>
+The <b>jitstack</b> modifier provides a way of setting the maximum stack size
+that is used by the just-in-time optimization code. It is ignored if JIT
+optimization is not being used. Providing a stack that is larger than the
+default 32K is necessary only for very complicated patterns.
+</P>
+<br><b>
+Setting match and recursion limits
+</b><br>
+<P>
+The <b>match_limit</b> and <b>recursion_limit</b> modifiers set the appropriate
+limits in the match context. These values are ignored when the
+<b>find_limits</b> modifier is specified.
+</P>
+<br><b>
+Finding minimum limits
+</b><br>
+<P>
+If the <b>find_limits</b> modifier is present, <b>pcre2test</b> calls
+<b>pcre2_match()</b> several times, setting different values in the match
+context via <b>pcre2_set_match_limit()</b> and <b>pcre2_set_recursion_limit()</b>
+until it finds the minimum values for each parameter that allow
+<b>pcre2_match()</b> to complete without error.
+</P>
+<P>
+The <i>match_limit</i> number is a measure of the amount of backtracking
+that takes place, and learning the minimum value can be instructive. For most
+simple matches, the number is quite small, but for patterns with very large
+numbers of matching possibilities, it can become large very quickly with
+increasing length of subject string. The <i>match_limit_recursion</i> number is
+a measure of how much stack (or, if PCRE2 is compiled with NO_RECURSE, how much
+heap) memory is needed to complete the match attempt.
+</P>
+<br><b>
+Showing MARK names
+</b><br>
+<P>
+The <b>mark</b> modifier causes the names from backtracking control verbs that
+are returned from calls to <b>pcre2_match()</b> to be displayed. If a mark is
+returned for a match, non-match, or partial match, <b>pcre2test</b> shows it.
+For a match, it is on a line by itself, tagged with "MK:". Otherwise, it
+is added to the non-match message.
+</P>
+<br><b>
+Showing memory usage
+</b><br>
+<P>
+The <b>memory</b> modifier causes <b>pcre2test</b> to log all memory allocation
+and freeing calls that occur during a match operation.
+</P>
+<br><b>
+Setting a starting offset
+</b><br>
+<P>
+The <b>offset</b> modifier sets an offset in the subject string at which
+matching starts. Its value is a number of code units, not characters.
+</P>
+<br><b>
+Setting the size of the output vector
+</b><br>
+<P>
+The <b>ovector</b> modifier applies only to the subject line in which it
+appears, though of course it can also be used to set a default in a
+<b>#subject</b> command. It specifies the number of pairs of offsets that are
+available for storing matching information. The default is 15.
+</P>
+<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
+<P>
+By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
+<b>pcre2_match()</b> to match each subject line. PCRE2 also supports an
+alternative matching function, <b>pcre2_dfa_match()</b>, which operates in a
+different way, and has some restrictions. The differences between the two
+functions are described in the
+<a href="pcre2matching.html"><b>pcre2matching</b></a>
+documentation.
+</P>
+<P>
+If the <b>dfa</b> modifier is set, the alternative matching function is used.
+This function finds all possible matches at a given point in the subject. If,
+however, the <b>dfa_shortest</b> modifier is set, processing stops after the
+first match is found. This is always the shortest possible match.
+</P>
+<br><a name="SEC13" href="#TOC1">DEFAULT OUTPUT FROM pcre2test</a><br>
+<P>
+This section describes the output when the normal matching function,
+<b>pcre2_match()</b>, is being used.
+</P>
+<P>
+When a match succeeds, <b>pcre2test</b> outputs the list of captured substrings,
+starting with number 0 for the string that matched the whole pattern.
+Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or
+"Partial match:" followed by the partially matching substring when the
+return is PCRE2_ERROR_PARTIAL. (Note that this is the
+entire substring that was inspected during the partial match; it may include
+characters before the actual match start if a lookbehind assertion, \K, \b,
+or \B was involved.)
+</P>
+<P>
+For any other return, <b>pcre2test</b> outputs the PCRE2
+negative error number and a short descriptive phrase. If the error is a failed
+UTF string check, the offset of the start of the failing character and the
+reason code are also output. Here is an example of an interactive
+<b>pcre2test</b> run.
+<pre>
+  $ pcre2test
+  PCRE2 version 9.00 2014-05-10
+
+    re&#62; /^abc(\d+)/
+  data&#62; abc123
+   0: abc123
+   1: 123
+  data&#62; xyz
+  No match
+</pre>
+Unset capturing substrings that are not followed by one that is set are not
+returned by <b>pcre2_match()</b>, and are not shown by <b>pcre2test</b>. In the
+following example, there are two capturing substrings, but when the first data
+line is matched, the second, unset substring is not shown. An "internal" unset
+substring is shown as "&#60;unset&#62;", as for the second data line.
+<pre>
+    re&#62; /(a)|(b)/
+  data&#62; a
+   0: a
+   1: a
+  data&#62; b
+   0: b
+   1: &#60;unset&#62;
+   2: b
+</pre>
+If the strings contain any non-printing characters, they are output as \xhh
+escapes if the value is less than 256 and UTF mode is not set. Otherwise they
+are output as \x{hh...} escapes. See below for the definition of non-printing
+characters. If the <b>/aftertext</b> modifier is set, the output for substring
+0 is followed by the the rest of the subject string, identified by "0+" like
+this:
+<pre>
+    re&#62; /cat/aftertext
+  data&#62; cataract
+   0: cat
+   0+ aract
+</pre>
+If global matching is requested, the results of successive matching attempts
+are output in sequence, like this:
+<pre>
+    re&#62; /\Bi(\w\w)/g
+  data&#62; Mississippi
+   0: iss
+   1: ss
+   0: iss
+   1: ss
+   0: ipp
+   1: pp
+</pre>
+"No match" is output only if the first match attempt fails. Here is an example
+of a failure message (the offset 4 that is specified by \&#62;4 is past the end of
+the subject string):
+<pre>
+    re&#62; /xyz/
+  data&#62; xyz\=offset=4
+  Error -24 (bad offset value)
+</PRE>
+</P>
+<P>
+Note that whereas patterns can be continued over several lines (a plain "&#62;"
+prompt is used for continuations), subject lines may not. However newlines can
+be included in a subject by means of the \n escape (or \r, \r\n, etc.,
+depending on the newline sequence setting).
+</P>
+<br><a name="SEC14" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
+<P>
+When the alternative matching function, <b>pcre2_dfa_match()</b>, is used, the
+output consists of a list of all the matches that start at the first point in
+the subject where there is at least one match. For example:
+<pre>
+    re&#62; /(tang|tangerine|tan)/
+  data&#62; yellow tangerine\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+</pre>
+(Using the normal matching function on this data finds only "tang".) The
+longest matching string is always given first (and numbered zero). After a
+PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the
+partially matching substring. (Note that this is the entire substring that was
+inspected during the partial match; it may include characters before the actual
+match start if a lookbehind assertion, \K, \b, or \B was involved.)
+</P>
+<P>
+If global matching is requested, the search for further matches resumes
+at the end of the longest match. For example:
+<pre>
+    re&#62; /(tang|tangerine|tan)/g
+  data&#62; yellow tangerine and tangy sultana\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+   0: tang
+   1: tan
+   0: tan
+</pre>
+The alternative matching function does not support substring capture, so the
+modifiers that are concerned with captured substrings are not relevant.
+</P>
+<br><a name="SEC15" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
+<P>
+When the alternative matching function has given the PCRE2_ERROR_PARTIAL
+return, indicating that the subject partially matched the pattern, you can
+restart the match with additional subject data by means of the
+<b>dfa_restart</b> modifier. For example:
+<pre>
+    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data&#62; 23ja\=P,dfa
+  Partial match: 23ja
+  data&#62; n05\=dfa,dfa_restart
+   0: n05
+</pre>
+For further information about partial matching, see the
+<a href="pcre2partial.html"><b>pcre2partial</b></a>
+documentation.
+</P>
+<br><a name="SEC16" href="#TOC1">CALLOUTS</a><br>
+<P>
+If the pattern contains any callout requests, <b>pcre2test</b>'s callout function
+is called during matching. This works with both matching functions. By default,
+the called function displays the callout number, the start and current
+positions in the text at the callout time, and the next pattern item to be
+tested. For example:
+<pre>
+  ---&#62;pqrabcdef
+    0    ^  ^     \d
+</pre>
+This output indicates that callout number 0 occurred for a match attempt
+starting at the fourth character of the subject string, when the pointer was at
+the seventh character, and when the next pattern item was \d. Just
+one circumflex is output if the start and current positions are the same.
+</P>
+<P>
+Callouts numbered 255 are assumed to be automatic callouts, inserted as a
+result of the <b>/auto_callout</b> pattern modifier. In this case, instead of
+showing the callout number, the offset in the pattern, preceded by a plus, is
+output. For example:
+<pre>
+    re&#62; /\d?[A-E]\*/auto_callout
+  data&#62; E*
+  ---&#62;E*
+   +0 ^      \d?
+   +3 ^      [A-E]
+   +8 ^^     \*
+  +10 ^ ^
+   0: E*
+</pre>
+If a pattern contains (*MARK) items, an additional line is output whenever
+a change of latest mark is passed to the callout function. For example:
+<pre>
+    re&#62; /a(*MARK:X)bc/auto_callout
+  data&#62; abc
+  ---&#62;abc
+   +0 ^       a
+   +1 ^^      (*MARK:X)
+  +10 ^^      b
+  Latest Mark: X
+  +11 ^ ^     c
+  +12 ^  ^
+   0: abc
+</pre>
+The mark changes between matching "a" and "b", but stays the same for the rest
+of the match, so nothing more is output. If, as a result of backtracking, the
+mark reverts to being unset, the text "&#60;unset&#62;" is output.
+</P>
+<P>
+The callout function in <b>pcre2test</b> returns zero (carry on matching) by
+default, but you can use a <b>callout_fail</b> modifier in a subject line (as
+described above) to change this and other parameters of the callout.
+</P>
+<P>
+Inserting callouts can be helpful when using <b>pcre2test</b> to check
+complicated regular expressions. For further information about callouts, see
+the
+<a href="pcre2callout.html"><b>pcre2callout</b></a>
+documentation.
+</P>
+<br><a name="SEC17" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
+<P>
+When <b>pcre2test</b> is outputting text in the compiled version of a pattern,
+bytes other than 32-126 are always treated as non-printing characters and are
+therefore shown as hex escapes.
+</P>
+<P>
+When <b>pcre2test</b> is outputting text that is a matched part of a subject
+string, it behaves in the same way, unless a different locale has been set for
+the pattern (using the <b>/locale</b> modifier). In this case, the
+<b>isprint()</b> function is used to distinguish printing and non-printing
+characters.
+</P>
+<br><a name="SEC18" href="#TOC1">SEE ALSO</a><br>
+<P>
+<b>pcre2</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcre2api</b>(3),
+<b>pcre2callout</b>(3),
+<b>pcre2jit</b>, <b>pcre2matching</b>(3), <b>pcre2partial</b>(d),
+<b>pcre2pattern</b>(3), <b>pcre2precompile</b>(3).
+</P>
+<br><a name="SEC19" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><a name="SEC20" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 19 August 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html
new file mode 100644
index 0000000..bbefd02
--- /dev/null
+++ b/doc/html/pcre2unicode.html
@@ -0,0 +1,270 @@
+<html>
+<head>
+<title>pcre2unicode specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2unicode man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+UNICODE AND UTF SUPPORT
+</b><br>
+<P>
+When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
+character properties and can process text strings in UTF-8, UTF-16, or UTF-32
+format (depending on the code unit width). By default, PCRE2 assumes that one
+code unit is one character. To process a pattern as a UTF string, where a
+character may require more than one code unit, you must call
+<a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
+with the PCRE2_UTF option flag, or the pattern must start with the sequence
+(*UTF). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF strings instead of
+strings of individual one-code-unit characters.
+</P>
+<P>
+If you build PCRE2 with Unicode support, the library will be bigger, but the
+additional run time overhead is limited to testing the PCRE2_UTF flag
+occasionally, so should not be very much.
+</P>
+<br><b>
+UNICODE PROPERTY SUPPORT
+</b><br>
+<P>
+When PCRE2 is built with Unicode support, the escape sequences \p{..},
+\P{..}, and \X can be used. The Unicode properties that can be tested are
+limited to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
+the derived properties Any and L&. Full lists are given in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+and
+<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
+documentation. Only the short names for properties are supported. For example,
+\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
+Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
+compatibility with Perl 5.6. PCRE does not support this.
+</P>
+<br><b>
+WIDE CHARACTERS AND UTF MODES
+</b><br>
+<P>
+Codepoints less than 256 can be specified in patterns by either braced or
+unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
+values have to use braced sequences. Unbraced octal code points up to \777 are
+also recognized; larger ones can be coded using \o{...}.
+</P>
+<P>
+In UTF modes, repeat quantifiers apply to complete UTF characters, not to
+individual code units.
+</P>
+<P>
+In UTF modes, the dot metacharacter matches one UTF character instead of a
+single code unit.
+</P>
+<P>
+The escape sequence \C can be used to match a single code unit, in a UTF mode, 
+but its use can lead to some strange effects because it breaks up multi-unit
+characters (see the description of \C in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation). The use of \C is not supported in the alternative matching
+function <b>pcre2_dfa_exec()</b>, nor is it supported in UTF mode by the JIT
+optimization. If JIT optimization is requested for a UTF pattern that contains
+\C, it will not succeed, and so the matching will be carried out by the normal
+interpretive function.
+</P>
+<P>
+The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
+characters of any code value, but, by default, the characters that PCRE2
+recognizes as digits, spaces, or word characters remain the same set as in
+non-UTF mode, all with code points less than 256. This remains true even when
+PCRE2 is built to include Unicode support, because to do otherwise would slow
+down matching in many common cases. Note that this also applies to \b
+and \B, because they are defined in terms of \w and \W. If you want
+to test for a wider sense of, say, "digit", you can use explicit Unicode
+property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option,
+the way that the character escapes work is changed so that Unicode properties
+are used to determine which characters match. There are more details in the
+section on
+<a href="pcre2pattern.html#genericchartypes">generic character types</a>
+in the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation.
+</P>
+<P>
+Similarly, characters that match the POSIX named character classes are all
+low-valued characters, unless the PCRE2_UCP option is set.
+</P>
+<P>
+However, the special horizontal and vertical white space matching escapes (\h,
+\H, \v, and \V) do match all the appropriate Unicode characters, whether or
+not PCRE2_UCP is set.
+</P>
+<P>
+Case-insensitive matching in UTF mode makes use of Unicode properties. A few
+Unicode characters such as Greek sigma have more than two codepoints that are
+case-equivalent, and these are treated as such.
+</P>
+<br><b>
+VALIDITY OF UTF STRINGS
+</b><br>
+<P>
+When the PCRE2_UTF option is set, the strings passed as patterns and subjects
+are (by default) checked for validity on entry to the relevant functions. 
+If an invalid UTF string is passed, an error return is given. 
+</P>
+<P>
+UTF-16 and UTF-32 strings can indicate their endianness by special code knows
+as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
+strings to be in host byte order.
+</P>
+<P>
+The entire string is checked before any other processing takes place. In
+addition to checking the format of the string, there is a check to ensure that
+all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
+The so-called "non-character" code points are not excluded because Unicode
+corrigendum #9 makes it clear that they should not be.
+</P>
+<P>
+Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
+where they are used in pairs to encode code points with values greater than
+0xFFFF. The code points that are encoded by UTF-16 pairs are available
+independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
+surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
+UTF-32.)
+</P>
+<P>
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
+assumes that the pattern or subject it is given (respectively) contains only
+valid UTF code unit sequences.
+</P>
+<P>
+Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
+the pattern; it does not also apply to subject strings. If you want to disable
+the check for a subject string you must pass this option to <b>pcre2_exec()</b>
+or <b>pcre2_dfa_exec()</b>.
+</P>
+<P>
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely.
+<a name="utf8strings"></a></P>
+<br><b>
+Errors in UTF-8 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-8 strings:
+<pre>
+  PCRE2_ERROR_UTF8_ERR1
+  PCRE2_ERROR_UTF8_ERR2
+  PCRE2_ERROR_UTF8_ERR3
+  PCRE2_ERROR_UTF8_ERR4
+  PCRE2_ERROR_UTF8_ERR5
+</pre>
+The string ends with a truncated UTF-8 character; the code specifies how many
+bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
+no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
+allows for up to 6 bytes, and this is checked first; hence the possibility of
+4 or 5 missing bytes.
+<pre>
+  PCRE2_ERROR_UTF8_ERR6
+  PCRE2_ERROR_UTF8_ERR7
+  PCRE2_ERROR_UTF8_ERR8
+  PCRE2_ERROR_UTF8_ERR9
+  PCRE2_ERROR_UTF8_ERR10
+</pre>
+The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
+character do not have the binary value 0b10 (that is, either the most
+significant bit is 0, or the next bit is 1).
+<pre>
+  PCRE2_ERROR_UTF8_ERR11
+  PCRE2_ERROR_UTF8_ERR12
+</pre>
+A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
+these code points are excluded by RFC 3629.
+<pre>
+  PCRE2_ERROR_UTF8_ERR13
+</pre>
+A 4-byte character has a value greater than 0x10fff; these code points are
+excluded by RFC 3629.
+<pre>
+  PCRE2_ERROR_UTF8_ERR14
+</pre>
+A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
+code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
+from UTF-8.
+<pre>
+  PCRE2_ERROR_UTF8_ERR15
+  PCRE2_ERROR_UTF8_ERR16
+  PCRE2_ERROR_UTF8_ERR17
+  PCRE2_ERROR_UTF8_ERR18
+  PCRE2_ERROR_UTF8_ERR19
+</pre>
+A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
+value that can be represented by fewer bytes, which is invalid. For example,
+the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
+one byte.
+<pre>
+  PCRE2_ERROR_UTF8_ERR20
+</pre>
+The two most significant bits of the first byte of a character have the binary
+value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
+byte can only validly occur as the second or subsequent byte of a multi-byte
+character.
+<pre>
+  PCRE2_ERROR_UTF8_ERR21
+</pre>
+The first byte of a character has the value 0xfe or 0xff. These values can
+never occur in a valid UTF-8 string.
+<a name="utf16strings"></a></P>
+<br><b>
+Errors in UTF-16 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-16 strings:
+<pre>
+  PCRE_UTF16_ERR1  Missing low surrogate at end of string
+  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
+  PCRE_UTF16_ERR3  Isolated low surrogate
+
+<a name="utf32strings"></a></PRE>
+</P>
+<br><b>
+Errors in UTF-32 strings
+</b><br>
+<P>
+The following negative error codes are given for invalid UTF-32 strings:
+<pre>
+  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
+  PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
+
+</PRE>
+</P>
+<br><b>
+AUTHOR
+</b><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><b>
+REVISION
+</b><br>
+<P>
+Last updated: 16 September 2014
+<br>
+Copyright &copy; 1997-2014 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
diff --git a/doc/index.html.src b/doc/index.html.src
new file mode 100644
index 0000000..4e264ec
--- /dev/null
+++ b/doc/index.html.src
@@ -0,0 +1,177 @@
+<html>
+<!-- This is a manually maintained file that is the root of the HTML version of 
+     the PCRE2 documentation. When the HTML documents are built from the man 
+     page versions, the entire doc/html directory is emptied, this file is then 
+     copied into doc/html/index.html, and the remaining files therein are 
+     created by the 132html script.
+-->      
+<head>
+<title>PCRE2 specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
+<p>
+The HTML documentation for PCRE2 consists of a number of pages that are listed
+below in alphabetical order. If you are new to PCRE2, please read the first one
+first.
+</p>
+
+<table>
+<tr><td><a href="pcre2.html">pcre</a></td>
+    <td>&nbsp;&nbsp;Introductory page</td></tr>
+
+<tr><td><a href="pcre2-config.html">pcre-config</a></td>
+    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
+
+<tr><td><a href="pcre2api.html">pcreapi</a></td>
+    <td>&nbsp;&nbsp;PCRE2's native API</td></tr>
+
+<tr><td><a href="pcre2build.html">pcrebuild</a></td>
+    <td>&nbsp;&nbsp;Building PCRE2</td></tr>
+
+<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
+    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
+
+<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
+    <td>&nbsp;&nbsp;Compability with Perl</td></tr>
+
+<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
+    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
+
+<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
+
+<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
+<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
+    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
+
+<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
+    <td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
+
+<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
+    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
+
+<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
+    <td>&nbsp;&nbsp;Some comments on performance</td></tr>
+
+<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
+    <td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
+
+<tr><td><a href="pcre2precompile.html">pcre2precompile</a></td>
+    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
+
+<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
+    <td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
+
+<tr><td><a href="pcre2stack.html">pcre2stack</a></td>
+    <td>&nbsp;&nbsp;Discussion of PCRE2's stack usage</td></tr>
+
+<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
+    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
+
+<tr><td><a href="pcre2test.html">pcre2test</a></td>
+    <td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
+
+<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
+</table>
+
+<p>
+There are also individual pages that summarize the interface for each function
+in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
+functions.
+</p>
+
+<table>    
+
+<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
+    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
+
+<tr><td><a href="pcre2_compile2.html">pcre2_compile2</a></td>
+    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>
+
+<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
+    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
+
+<tr><td><a href="pcre2_copy_named_substring.html">pcre2_copy_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_copy_substring.html">pcre2_copy_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
+
+<tr><td><a href="pcre2_dfa_exec.html">pcre2_dfa_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_exec.html">pcre2_exec</a></td>
+    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
+    (Perl compatible)</td></tr>
+
+<tr><td><a href="pcre2_free_study.html">pcre2_free_study</a></td>
+    <td>&nbsp;&nbsp;Free study data</td></tr>
+
+<tr><td><a href="pcre2_free_substring.html">pcre2_free_substring</a></td>
+    <td>&nbsp;&nbsp;Free extracted substring</td></tr>
+
+<tr><td><a href="pcre2_free_substring_list.html">pcre2_free_substring_list</a></td>
+    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
+
+<tr><td><a href="pcre2_fullinfo.html">pcre2_fullinfo</a></td>
+    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
+
+<tr><td><a href="pcre2_get_named_substring.html">pcre2_get_named_substring</a></td>
+    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_stringnumber.html">pcre2_get_stringnumber</a></td>
+    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
+
+<tr><td><a href="pcre2_get_stringtable_entries.html">pcre2_get_stringtable_entries</a></td>
+    <td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
+
+<tr><td><a href="pcre2_get_substring.html">pcre2_get_substring</a></td>
+    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
+
+<tr><td><a href="pcre2_get_substring_list.html">pcre2_get_substring_list</a></td>
+    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
+
+<tr><td><a href="pcre2_jit_exec.html">pcre2_jit_exec</a></td>
+    <td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
+    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
+    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
+
+<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
+    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
+    
+<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_refcount.html">pcre2_refcount</a></td>
+    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_study.html">pcre2_study</a></td>
+    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
+
+<tr><td><a href="pcre2_utf16_to_host_byte_order.html">pcre2_utf16_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_utf32_to_host_byte_order.html">pcre2_utf32_to_host_byte_order</a></td>
+    <td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
+
+<tr><td><a href="pcre2_version.html">pcre2_version</a></td>
+    <td>&nbsp;&nbsp;Return PCRE2 version and release date</td></tr>
+</table>
+
+</html>
+
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
new file mode 100644
index 0000000..52b7406
--- /dev/null
+++ b/doc/pcre2.txt
@@ -0,0 +1,2903 @@
+-----------------------------------------------------------------------------
+This file contains a concatenation of the PCRE2 man pages, converted to plain
+text format for ease of searching with a text editor, or for use on systems
+that do not have a man page processor. The small individual files that give
+synopses of each function in the library have not been included. Neither has
+the pcre2demo program. There are separate text files for the pcre2grep and
+pcre2test commands.
+-----------------------------------------------------------------------------
+
+
+PCRE2API(3)                Library Functions Manual                PCRE2API(3)
+
+
+
+NAME
+       PCRE2 - Perl-compatible regular expressions (revised API)
+
+       #include <pcre2.h>
+
+       PCRE2  is  a  new API for PCRE. This document contains a description of
+       all its functions. See the pcre2 document for an overview  of  all  the
+       PCRE2 documentation.
+
+
+PCRE2 NATIVE API BASIC FUNCTIONS
+
+       pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
+         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
+         pcre2_compile_context *ccontext);
+
+       pcre2_code_free(pcre2_code *code);
+
+       pcre2_match_data_create(uint32_t ovecsize,
+         pcre2_general_context *gcontext);
+
+       pcre2_match_data_create_from_pattern(pcre2_code *code,
+         pcre2_general_context *gcontext);
+
+       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext);
+
+       int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext,
+         int *workspace, PCRE2_SIZE wscount);
+
+       void pcre2_match_data_free(pcre2_match_data *match_data);
+
+
+PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS
+
+       PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *match_data);
+
+       PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data);
+
+       uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data);
+
+       PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
+
+       PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *match_data);
+
+       PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
+
+
+PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS
+
+       pcre2_general_context *pcre2_general_context_create(
+         void *(*private_malloc)(PCRE2_SIZE, void *),
+         void (*private_free)(void *, void *), void *memory_data);
+
+       pcre2_general_context *pcre2_general_context_copy(
+         pcre2_general_context *gcontext);
+
+       void pcre2_general_context_free(pcre2_general_context *gcontext);
+
+
+PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
+
+       pcre2_compile_context *pcre2_compile_context_create(
+         pcre2_general_context *gcontext);
+
+       pcre2_compile_context *pcre2_compile_context_copy(
+         pcre2_compile_context *ccontext);
+
+       void pcre2_compile_context_free(pcre2_compile_context *ccontext);
+
+       int pcre2_set_bsr_compile(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       int pcre2_set_character_tables(pcre2_compile_context *ccontext,
+         const unsigned char *tables);
+
+       int pcre2_set_newline_compile(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
+         int (*guard_function)(uint32_t));
+
+
+PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
+
+       pcre2_match_context *pcre2_match_context_create(
+         pcre2_general_context *gcontext);
+
+       pcre2_match_context *pcre2_match_context_copy(
+         pcre2_match_context *mcontext);
+
+       void pcre2_match_context_free(pcre2_match_context *mcontext);
+
+       int pcre2_set_bsr_match(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       int pcre2_set_callout(pcre2_match_context *mcontext,
+         int (*callout_function)(pcre2_callout_block *),
+         void *callout_data);
+
+       int pcre2_set_match_limit(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       int pcre2_set_newline_match(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       int pcre2_set_recursion_limit(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       int pcre2_set_recursion_memory_management(
+         pcre2_match_context *mcontext,
+         void *(*private_malloc)(PCRE2_SIZE, void *),
+         void (*private_free)(void *, void *), void *memory_data);
+
+
+PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS
+
+       int pcre2_substring_copy_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen);
+
+       int pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_UCHAR *buffer,
+         PCRE2_SIZE *bufflen);
+
+       void pcre2_substring_free(PCRE2_UCHAR *buffer);
+
+       int pcre2_substring_get_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen);
+
+       int pcre2_substring_get_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_UCHAR **bufferptr,
+         PCRE2_SIZE *bufflen);
+
+       int pcre2_substring_length_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_SIZE *length);
+
+       int pcre2_substring_length_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_SIZE *length);
+
+       int pcre2_substring_nametable_scan(const pcre2_code *code,
+         PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
+
+       int pcre2_substring_number_from_name(const pcre2_code *code,
+         PCRE2_SPTR name);
+
+       void pcre2_substring_list_free(PCRE2_SPTR *list);
+
+       int pcre2_substring_list_get(pcre2_match_data *match_data,
+         PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
+
+
+PCRE2 NATIVE API JIT FUNCTIONS
+
+       int pcre2_jit_compile(pcre2_code *code, uint32_t options);
+
+       int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack);
+
+       void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
+
+       pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
+         PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
+
+       void pcre2_jit_stack_assign(const pcre2_code *code,
+         pcre2_jit_callback callback_function, void *callback_data);
+
+       void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
+
+
+PCRE2 NATIVE API AUXILIARY FUNCTIONS
+
+       int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer,
+         PCRE2_SIZE bufflen);
+
+       const unsigned char *pcre2_maketables(pcre2_general_context *gcontext);
+
+       int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
+
+       int pcre2_config(uint32_t what, void *where, PCRE2_SIZE length);
+
+
+PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
+
+       There  are  three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit
+       code units, respectively. However,  there  is  just  one  header  file,
+       pcre2.h.   This  contains the function prototypes and other definitions
+       for all three libraries. One, two, or all three can be installed simul-
+       taneously.  On  Unix-like  systems the libraries are called libpcre2-8,
+       libpcre2-16, and libpcre2-32, and they can also co-exist with the orig-
+       inal PCRE libraries.
+
+       Character  strings are passed to and from a PCRE2 library as a sequence
+       of unsigned integers in code units  of  the  appropriate  width.  Every
+       PCRE2  function  comes  in three different forms, one for each library,
+       for example:
+
+         pcre2_compile_8()
+         pcre2_compile_16()
+         pcre2_compile_32()
+
+       There are also three different sets of data types:
+
+         PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32
+         PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32
+
+       The UCHAR types define unsigned code units of the  appropriate  widths.
+       For  example,  PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR
+       types are constant pointers to the equivalent  UCHAR  types,  that  is,
+       they are pointers to vectors of unsigned code units.
+
+       Many  applications use only one code unit width. For their convenience,
+       macros are defined whose names are the generic forms such as pcre2_com-
+       pile()  and  PCRE2_SPTR.  These  macros  use  the  value  of  the macro
+       PCRE2_CODE_UNIT_WIDTH to generate the appropriate width-specific  func-
+       tion and macro names.  PCRE2_CODE_UNIT_WIDTH is not defined by default.
+       An application must define it to be  8,  16,  or  32  before  including
+       pcre2.h in order to make use of the generic names.
+
+       Applications  that use more than one code unit width can be linked with
+       more than one PCRE2 library, but must define  PCRE2_CODE_UNIT_WIDTH  to
+       be  0  before  including pcre2.h, and then use the real function names.
+       Any code that is to be included in an environment where  the  value  of
+       PCRE2_CODE_UNIT_WIDTH  is  unknown  should  also  use the real function
+       names. (Unfortunately, it is not possible in C code to save and restore
+       the value of a macro.)
+
+       If  PCRE2_CODE_UNIT_WIDTH  is  not  defined before including pcre2.h, a
+       compiler error occurs.
+
+       When using multiple libraries in an application,  you  must  take  care
+       when  processing  any  particular  pattern to use only functions from a
+       single library.  For example, if you want to run a match using  a  pat-
+       tern  that  was  compiled  with pcre2_compile_16(), you must do so with
+       pcre2_match_16(), not pcre2_match_8().
+
+       In the function summaries above, and in the rest of this  document  and
+       other  PCRE2  documents,  functions  and data types are described using
+       their generic names, without the 8, 16, or 32 suffix.
+
+
+PCRE2 API OVERVIEW
+
+       PCRE2 has its own native API, which  is  described  in  this  document.
+       There are also some wrapper functions for the 8-bit library that corre-
+       spond to the POSIX regular expression API, but they do not give  access
+       to all the functionality. They are described in the pcre2posix documen-
+       tation. Both these APIs define a set of C function calls.
+
+       The native API C data types, function prototypes,  option  values,  and
+       error codes are defined in the header file pcre2.h, which contains def-
+       initions of PCRE2_MAJOR and PCRE2_MINOR, the major  and  minor  release
+       numbers  for the library. Applications can use these to include support
+       for different releases of PCRE2.
+
+       In a Windows environment, if you want to statically link an application
+       program  against  a non-dll PCRE2 library, you must define PCRE2_STATIC
+       before including pcre2.h.
+
+       The functions pcre2_compile(), and pcre2_match() are used for compiling
+       and  matching regular expressions in a Perl-compatible manner. A sample
+       program that demonstrates the simplest way of using them is provided in
+       the file called pcre2demo.c in the PCRE2 source distribution. A listing
+       of this program is  given  in  the  pcre2demo  documentation,  and  the
+       pcre2sample documentation describes how to compile and run it.
+
+       Just-in-time  compiler support is an optional feature of PCRE2 that can
+       be built in appropriate hardware environments. It greatly speeds up the
+       matching  performance of many patterns. Programs can request that it be
+       used if available, by calling pcre2_jit_compile() after a  pattern  has
+       been successfully compiled by pcre2_compile(). This does nothing if JIT
+       support is not available.
+
+       More complicated programs might need to  make  use  of  the  specialist
+       functions    pcre2_jit_stack_alloc(),    pcre2_jit_stack_free(),    and
+       pcre2_jit_stack_assign() in order to  control  the  JIT  code's  memory
+       usage.
+
+       JIT matching is automatically used by pcre2_match() if it is available.
+       There is also a direct interface for JIT matching, which gives improved
+       performance.  The  JIT-specific functions are discussed in the pcre2jit
+       documentation.
+
+       A second matching function, pcre2_dfa_exec(), which is not Perl-compat-
+       ible,  is also provided. This uses a different algorithm for the match-
+       ing. The alternative algorithm finds all possible matches (at  a  given
+       point  in  the  subject), and scans the subject just once (unless there
+       are lookbehind assertions). However, this  algorithm  does  not  return
+       captured  substrings.  A description of the two matching algorithms and
+       their advantages and disadvantages is given in the pcre2matching  docu-
+       mentation. There is no JIT support for pcre2_dfa_match().
+
+       In  addition  to  the  main compiling and matching functions, there are
+       convenience functions for extracting captured substrings from a subject
+       string that is matched by pcre2_match(). They are:
+
+         pcre2_substring_copy_byname()
+         pcre2_substring_copy_bynumber()
+         pcre2_substring_get_byname()
+         pcre2_substring_get_bynumber()
+         pcre2_substring_list_get()
+         pcre2_substring_length_byname()
+         pcre2_substring_length_bynumber()
+         pcre2_substring_nametable_scan()
+         pcre2_substring_number_from_name()
+
+       pcre2_substring_free()  and  pcre2_substring_list_free()  are also pro-
+       vided, to free the memory used for extracted strings.
+
+       There are functions for finding out information about a  compiled  pat-
+       tern  (pcre2_pattern_info())  and  about  the  configuration with which
+       PCRE2 was built (pcre2_config()).
+
+
+NEWLINES
+
+       PCRE2 supports five different conventions for indicating line breaks in
+       strings:  a  single  CR (carriage return) character, a single LF (line-
+       feed) character, the two-character sequence CRLF, any of the three pre-
+       ceding,  or any Unicode newline sequence. The Unicode newline sequences
+       are the three just mentioned, plus the single characters  VT  (vertical
+       tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line
+       separator, U+2028), and PS (paragraph separator, U+2029).
+
+       Each of the first three conventions is used by at least  one  operating
+       system as its standard newline sequence. When PCRE2 is built, a default
+       can be specified.  The default default is LF, which is the  Unix  stan-
+       dard.  When  PCRE2 is run, the default can be overridden, either when a
+       pattern is compiled, or when it is matched.
+
+       The newline convention can be changed when calling pcre2_compile(),  or
+       it can be specified by special text at the start of the pattern itself;
+       this overrides any  other  settings.  See  the  pcre2pattern  page  for
+       details of the special character sequences.
+
+       In  the  PCRE2  documentation  the  word "newline" is used to mean "the
+       character or pair of characters that indicate a line break". The choice
+       of  newline convention affects the handling of the dot, circumflex, and
+       dollar metacharacters, the handling of #-comments in /x mode, and, when
+       CRLF  is a recognized line ending sequence, the match position advance-
+       ment for a non-anchored pattern. There is more detail about this in the
+       section on pcre2_match() options below.
+
+       The  choice of newline convention does not affect the interpretation of
+       the \n or \r escape sequences, nor does  it  affect  what  \R  matches,
+       which has its own separate control.
+
+
+MULTITHREADING
+
+       In  a multithreaded application it is important to keep thread-specific
+       data separate from data that can be shared between threads.  The  PCRE2
+       library  code  itself  is  thread-safe: it contains no static or global
+       variables. The API is designed to be  fairly  simple  for  non-threaded
+       applications  while at the same time ensuring that multithreaded appli-
+       cations can use it.
+
+       There are several different blocks of data that are used to pass infor-
+       mation between the application and the PCRE libraries.
+
+       (1) A pointer to the compiled form of a pattern is returned to the user
+       when pcre2_compile() is successful. The data in the compiled pattern is
+       fixed,  and  does not change when the pattern is matched. Therefore, it
+       is thread-safe, that is, the same compiled pattern can be used by  more
+       than one thread simultaneously. An application can compile all its pat-
+       terns at the start, before forking off multiple threads that use  them.
+       However,  if  the  just-in-time  optimization feature is being used, it
+       needs separate memory stack areas for each  thread.  See  the  pcre2jit
+       documentation for more details.
+
+       (2)  The  next section below introduces the idea of "contexts" in which
+       PCRE2 functions are called. A context is nothing more than a collection
+       of parameters that control the way PCRE2 operates. Grouping a number of
+       parameters together in a context is a convenient way of passing them to
+       a  PCRE2  function without using lots of arguments. The parameters that
+       are stored in contexts are in some sense  "advanced  features"  of  the
+       API. Many straightforward applications will not need to use contexts.
+
+       In a multithreaded application, if the parameters in a context are val-
+       ues that are never changed, the same context can be  used  by  all  the
+       threads. However, if any thread needs to change any value in a context,
+       it must make its own thread-specific copy.
+
+       (3) The matching functions need a block of memory for working space and
+       for  storing  the results of a match. This includes details of what was
+       matched, as well as additional  information  such  as  the  name  of  a
+       (*MARK)  setting. Each thread must provide its own version of this mem-
+       ory.
+
+
+PCRE2 CONTEXTS
+
+       Some PCRE2 functions have a lot of parameters, many of which  are  used
+       only  by  specialist  applications,  for example, those that use custom
+       memory management or non-standard character tables.  To  keep  function
+       argument  lists  at a reasonable size, and at the same time to keep the
+       API extensible, "uncommon" parameters are passed to  certain  functions
+       in  a  context instead of directly. A context is just a block of memory
+       that holds the parameter values.  Applications  that  do  not  need  to
+       adjust  any  of  the  context  parameters  can pass NULL when a context
+       pointer is required.
+
+       There are three different types of context: a general context  that  is
+       relevant  for  several  PCRE2 operations, a compile-time context, and a
+       match-time context.
+
+   The general context
+
+       At present, this context just  contains  pointers  to  (and  data  for)
+       external  memory  management  functions  that  are  called from several
+       places in the PCRE2 library. The context is named `general' rather than
+       specifically  `memory'  because in future other fields may be added. If
+       you do not want to supply your own custom memory management  functions,
+       you  do not need to bother with a general context. A general context is
+       created by:
+
+       pcre2_general_context *pcre2_general_context_create(
+         void *(*private_malloc)(PCRE2_SIZE, void *),
+         void (*private_free)(void *, void *), void *memory_data);
+
+       The two function pointers specify custom memory  management  functions,
+       whose prototypes are:
+
+         void *private_malloc(PCRE2_SIZE, void *);
+         void  private_free(void *, void *);
+
+       Whenever code in PCRE2 calls these functions, the final argument is the
+       value of memory_data. Either of the first two arguments of the creation
+       function  may be NULL, in which case the system memory management func-
+       tions malloc() and free() are used. (This is not currently  useful,  as
+       there  are  no  other  fields in a general context, but in future there
+       might be.)  The private_malloc() function  is  used  (if  supplied)  to
+       obtain  memory  for storing the context, and all three values are saved
+       as part of the context.
+
+       Whenever PCRE2 creates a data block of any kind, the block  contains  a
+       pointer  to the free() function that matches the malloc() function that
+       was used. When the time comes to  free  the  block,  this  function  is
+       called.
+
+       A general context can be copied by calling:
+
+       pcre2_general_context *pcre2_general_context_copy(
+         pcre2_general_context *gcontext);
+
+       The memory used for a general context should be freed by calling:
+
+       void pcre2_general_context_free(pcre2_general_context *gcontext);
+
+
+   The compile context
+
+       A  compile context is required if you want to change the default values
+       of any of the following compile-time parameters:
+
+         What \R matches (Unicode newlines or CR, LF, CRLF only);
+         PCRE2's character tables;
+         The newline character sequence;
+         The compile time nested parentheses limit;
+         An external function for stack checking.
+
+       A compile context is also required if you are using custom memory  man-
+       agement.   If  none of these apply, just pass NULL as the context argu-
+       ment of pcre2_compile().
+
+       A compile context is created, copied, and freed by the following  func-
+       tions:
+
+       pcre2_compile_context *pcre2_compile_context_create(
+         pcre2_general_context *gcontext);
+
+       pcre2_compile_context *pcre2_compile_context_copy(
+         pcre2_compile_context *ccontext);
+
+       void pcre2_compile_context_free(pcre2_compile_context *ccontext);
+
+       A  compile  context  is created with default values for its parameters.
+       These can be changed by calling the following functions, which return 0
+       on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
+
+       int pcre2_set_bsr_compile(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       The  value  must  be PCRE2_BSR_ANYCRLF, to specify that \R matches only
+       CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R  matches  any
+       Unicode  line  ending  sequence.  The  value of this parameter does not
+       affect what is compiled; it is just saved with  the  compiled  pattern.
+       The value is used by the JIT compiler and by the two interpreted match-
+       ing functions, pcre2_match() and pcre2_dfa_match(). You can change  the
+       value  when  calling  these functions, but doing so disables the use of
+       JIT.
+
+       int pcre2_set_character_tables(pcre2_compile_context *ccontext,
+         const unsigned char *tables);
+
+       The value must be the result of a  call  to  pcre2_maketables(),  whose
+       only argument is a general context. This function builds a set of char-
+       acter tables in the current locale.
+
+       int pcre2_set_newline_compile(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       This specifies which characters or character sequences are to be recog-
+       nized  as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage
+       return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the
+       two-character  sequence  CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any
+       of the above), or PCRE2_NEWLINE_ANY (any Unicode newline sequence).
+
+       When a pattern is compiled with the PCRE2_EXTENDED option, the value of
+       this  parameter  affects  the recognition of white space and the end of
+       internal comments starting with #. The value is saved with the compiled
+       pattern  for  subsequent  use by the JIT compiler and by the two inter-
+       preted matching functions, pcre2_match() and pcre2_dfa_match(). You can
+       change  the  value  when calling these functions, but doing so disables
+       the use of JIT.
+
+       int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
+         uint32_t value);
+
+       This parameter ajusts the limit, set when PCRE2 is built (default 250),
+       on  the  depth  of  parenthesis  nesting in a pattern. This limit stops
+       rogue patterns using up too much system stack when being compiled.
+
+       int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
+         int (*guard_function)(uint32_t));
+
+       There is at least one application that runs PCRE2 in threads with  very
+       limited  system  stack,  where running out of stack is to be avoided at
+       all costs. The parenthesis limit above cannot take account of how  much
+       stack  is  actually  available.  For  a finer control, you can supply a
+       function that is called whenever pcre2_compile() starts  to  compile  a
+       parenthesized part of a pattern. The argument to the function gives the
+       current depth of nesting. The function should return  zero  if  all  is
+       well, or non-zero to force an error.
+
+   The match context
+
+       A match context is required if you want to change the default values of
+       any of the following match-time parameters:
+
+         What \R matches (Unicode newlines or CR, LF, CRLF only);
+         A callout function;
+         The limit for calling match();
+         The limit for calling match() recursively;
+         The newline character sequence;
+
+       A match context is also required if you are using custom memory manage-
+       ment.   If  none of these apply, just pass NULL as the context argument
+       of pcre2_match(), pcre2_dfa_match(),  or  pcre2_jit_match().   Changing
+       the  newline value or what \R matches at match time disables the use of
+       JIT via pcre2_match().
+
+       A match context is created, copied, and freed by  the  following  func-
+       tions:
+
+       pcre2_match_context *pcre2_match_context_create(
+         pcre2_general_context *gcontext);
+
+       pcre2_match_context *pcre2_match_context_copy(
+         pcre2_match_context *mcontext);
+
+       void pcre2_match_context_free(pcre2_match_context *mcontext);
+
+       A  match  context  is  created  with default values for its parameters.
+       These can be changed by calling the following functions, which return 0
+       on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
+
+       int pcre2_set_bsr_match(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       The  value  must  be PCRE2_BSR_ANYCRLF, to specify that \R matches only
+       CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R  matches  any
+       Unicode  line ending sequence. If you want to make use of JIT matching,
+       you should not use this function, but instead set the value in  a  com-
+       pile context.
+
+       int pcre2_set_callout(pcre2_match_context *mcontext,
+         int (*callout_function)(pcre2_callout_block *),
+         void *callout_data);
+
+       This  sets  up a "callout" function, which PCRE2 will call at specified
+       points during a matching operation. Details are given in the pcre2call-
+       out documentation.
+
+       int pcre2_set_match_limit(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       The  match_limit  parameter  provides  a means of preventing PCRE2 from
+       using up too many resources when processing patterns that are not going
+       to  match, but which have a very large number of possibilities in their
+       search trees. The classic example is a pattern that uses nested  unlim-
+       ited repeats.
+
+       Internally,  pcre2_match()  uses  a  function  called match(), which it
+       calls repeatedly (sometimes recursively). The limit set by  match_limit
+       is  imposed  on  the  number  of times this function is called during a
+       match, which has the effect of limiting the amount of backtracking that
+       can  take place. For patterns that are not anchored, the count restarts
+       from zero for each position in the subject string. This  limit  is  not
+       relevant to pcre2_dfa_match(), which ignores it.
+
+       When pcre2_match() is called with a pattern that was successfully stud-
+       ied with pcre2_jit_compile(), the way that the matching is executed  is
+       entirely  different. However, there is still the possibility of runaway
+       matching that goes on for a very long  time,  and  so  the  match_limit
+       value  is  also used in this case (but in a different way) to limit how
+       long the matching can continue.
+
+       The default value for the limit can be set when  PCRE2  is  built;  the
+       default  default  is 10 million, which handles all but the most extreme
+       cases.   If   the   limit   is    exceeded,    pcre2_match()    returns
+       PCRE2_ERROR_MATCHLIMIT.  A  value  for the match limit may also be sup-
+       plied by an item at the start of a pattern of the form
+
+         (*LIMIT_MATCH=ddd)
+
+       where ddd is a decimal number.  However,  such  a  setting  is  ignored
+       unless  ddd  is  less than the limit set by the caller of pcre2_match()
+       or, if no such limit is set, less than the default.
+
+       int pcre2_set_recursion_limit(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       The recursion_limit parameter is similar to match_limit, but instead of
+       limiting  the  total  number of times that match() is called, it limits
+       the depth of recursion. The recursion depth is a  smaller  number  than
+       the  total number of calls, because not all calls to match() are recur-
+       sive.  This limit is of use only if it is set smaller than match_limit.
+
+       Limiting the recursion depth limits the amount of system stack that can
+       be  used,  or,  when  PCRE2 has been compiled to use memory on the heap
+       instead of the stack, the amount of heap memory that can be used.  This
+       limit  is not relevant, and is ignored, when matching is done using JIT
+       compiled code or by the pcre2_dfa_match() function.
+
+       The default value for recursion_limit can be set when PCRE2  is  built;
+       the  default  default is the same value as the default for match_limit.
+       If the limit is exceeded, pcre2_match() returns  PCRE2_ERROR_RECURSION-
+       LIMIT.  A value for the recursion limit may also be supplied by an item
+       at the start of a pattern of the form
+
+         (*LIMIT_RECURSION=ddd)
+
+       where ddd is a decimal number.  However,  such  a  setting  is  ignored
+       unless  ddd  is  less than the limit set by the caller of pcre2_match()
+       or, if no such limit is set, less than the default.
+
+       int pcre2_set_newline_match(pcre2_match_context *mcontext,
+         uint32_t value);
+
+       This specifies which characters or character sequences are to be recog-
+       nized  as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage
+       return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the
+       two-character  sequence  CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any
+       of the above), or PCRE2_NEWLINE_ANY (any Unicode newline sequence).  If
+       you want to make use of JIT matching, you should not use this function,
+       but instead set the value in a compile context.
+
+       int pcre2_set_recursion_memory_management(
+         pcre2_match_context *mcontext,
+         void *(*private_malloc)(PCRE2_SIZE, void *),
+         void (*private_free)(void *, void *), void *memory_data);
+
+       This function sets up two additional custom memory management functions
+       for  use  by  pcre2_match()  when PCRE2 is compiled to use the heap for
+       remembering backtracking data, instead of recursive function calls that
+       use  the  system stack. There is a discussion about PCRE2's stack usage
+       in the pcre2stack documentation. See the pcre2build  documentation  for
+       details  of  how to build PCRE2. Using the heap for recursion is a non-
+       standard way of building PCRE2, for use in environments that have  lim-
+       ited   stacks.  Because  of  the  greater  use  of  memory  management,
+       pcre2_match() runs more slowly. Functions that  are  different  to  the
+       general  custom  memory  functions are provided so that special-purpose
+       external code can be used for this case, because the memory blocks  are
+       all the same size. The blocks are retained by pcre2_match() until it is
+       about to exit so that they can be  re-used  when  possible  during  the
+       match. In the absence of these functions, the normal custom memory man-
+       agement functions are used, if supplied,  otherwise  the  system  func-
+       tions.
+
+
+CHECKING BUILD-TIME OPTIONS
+
+       int pcre2_config(uint32_t what, void *where, PCRE2_SIZE length);
+
+       The  function  pcre2_config()  makes  it possible for a PCRE2 client to
+       discover which optional features have  been  compiled  into  the  PCRE2
+       library.  The  pcre2build  documentation  has  more details about these
+       optional features.
+
+       The first argument for pcre2_config() specifies  which  information  is
+       required.  The  second  argument  is a pointer to memory into which the
+       information is placed, with the final argument  giving  the  length  of
+       this  memory  in  bytes.  For calls that return numerical values, where
+       should point to appropriately aligned memory, with  length  set  to  at
+       least the "sizeof" the data type.
+
+       The returned value from pcre2_config() is zero on success, or the nega-
+       tive error code PCRE2_ERROR_BADOPTION if the value in the  first  argu-
+       ment is not recognized. The following information is available:
+
+         PCRE2_CONFIG_BSR
+
+       The output is an integer whose value indicates what character sequences
+       the \R escape sequence matches by default. A value of 0 means  that  \R
+       matches  any  Unicode  line ending sequence; a value of 1 means that \R
+       matches only CR, LF, or CRLF. The default can be overridden when a pat-
+       tern is compiled or matched.
+
+         PCRE2_CONFIG_JIT
+
+       The output is an integer that is set to one if support for just-in-time
+       compiling is available; otherwise it is set to zero.
+
+         PCRE2_CONFIG_JITTARGET
+
+       FIXME: this needs sorting out once JIT is implemented.  If JIT  support
+       is  available,  the  string  contains  the name of the architecture for
+       which the JIT compiler is configured, for example  "x86  32bit  (little
+       endian + unaligned)". If JIT support is not available, FIXME.
+
+         PCRE2_CONFIG_LINKSIZE
+
+       The  output  is  an  integer that contains the number of bytes used for
+       internal linkage in compiled regular expressions. When PCRE2 is config-
+       ured,  the  value  can  be set to 2, 3, or 4, with the default being 2.
+       This is the value that is returned by pcre2_config(). However, when the
+       16-bit  library  is compiled, a value of 3 is rounded up to 4, and when
+       the 32-bit library is compiled, internal linkages always use  4  bytes,
+       so the configured value is not relevant.
+
+       The default value of 2 for the 8-bit and 16-bit libraries is sufficient
+       for all but the most massive patterns, since it allows the size of  the
+       compiled pattern to be up to 64K code units. Larger values allow larger
+       regular expressions to be compiled by those two libraries, but  at  the
+       expense of slower matching.
+
+         PCRE2_CONFIG_MATCHLIMIT
+
+       The output is an unsigned long integer that gives the default limit for
+       the number of internal matching function calls in a pcre2_match()  exe-
+       cution.  Further details are given with pcre2_match() below.
+
+         PCRE2_CONFIG_NEWLINE
+
+       The  output  is  an integer whose value specifies the default character
+       sequence that is recognized as meaning "newline". The values are:
+
+         1  Carriage return (CR)
+         2  Linefeed (LF)
+         3  Carriage return, linefeed (CRLF)
+         4  Any Unicode line ending
+         5  Any of CR, LF, or CRLF
+
+       The default should normally correspond to  the  standard  sequence  for
+       your operating system.
+
+         PCRE2_CONFIG_PARENSLIMIT
+
+       The  output is an unsigned long integer that gives the maximum depth of
+       nesting of parentheses (of any  kind)  in  a  pattern.  This  limit  is
+       imposed  to  cap the amount of system stack used when a pattern is com-
+       piled. It is specified when PCRE2 is built; the default  is  250.  This
+       limit  does not take into account the stack that may already be used by
+       the calling application.  For  finer  control  over  compilation  stack
+       usage, see pcre2_set_compile_recursion_guard().
+
+         PCRE2_CONFIG_RECURSIONLIMIT
+
+       The output is an unsigned long integer that gives the default limit for
+       the depth of recursion when calling the internal matching function in a
+       pcre2_match()  execution.  Further details are given with pcre2_match()
+       below.
+
+         PCRE2_CONFIG_STACKRECURSE
+
+       The output is an integer that is set to one if internal recursion  when
+       running  pcre2_match()  is implemented by recursive function calls that
+       use the system stack to remember their state. This  is  the  usual  way
+       that PCRE2 is compiled. The output is zero if PCRE2 was compiled to use
+       blocks of data on the heap instead of recursive function calls.
+
+         PCRE2_CONFIG_UNICODE_VERSION
+
+       The where argument should point to a buffer that is at  least  24  code
+       units long. If PCRE2 has been compiled without Unicode support, this is
+       filled with the text "Unicode not supported".  Otherwise,  the  Unicode
+       version  string  (for example, "7.0.0") is returnd. The string is zero-
+       terminated.
+
+         PCRE2_CONFIG_UNICODE
+
+       The output is an integer that is set  to  one  if  Unicode  support  is
+       available;  otherwise  it  is  set to zero. Unicode support implies UTF
+       support.
+
+         PCRE2_CONFIG_VERSION
+
+       The where argument should point to a buffer that is at  least  12  code
+       units  long.  It  is  filled with the PCRE2 version string, zero-termi-
+       nated.
+
+
+COMPILING A PATTERN
+
+       pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
+         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
+         pcre2_compile_context *ccontext);
+
+       pcre2_code_free(pcre2_code *code);
+
+       This function compiles a pattern, defined by a pointer to a  string  of
+       code units and a length, into an internal form. If the pattern is zero-
+       terminated, the length should be  specified  as  PCRE2_ZERO_TERMINATED.
+       The  function  returns a pointer to a block of memory that contains the
+       compiled pattern and related data. The caller must free the  memory  by
+       calling pcre2_code_free() when it is no longer needed.
+
+       If  the  compile  context  argument  ccontext  is  NULL,  the memory is
+       obtained by calling malloc(). Otherwise, it is obtained from  the  same
+       memory function that was used for the compile context.
+
+       The options argument contains various bit settings that affect the com-
+       pilation. It should be zero if no options are required.  The  available
+       options  are  described  below. Some of them (in particular, those that
+       are compatible with Perl, but some others as well) can also be set  and
+       unset  from  within  the  pattern  (see the detailed description in the
+       pcre2pattern documentation).
+
+       For those options that can be different in different parts of the  pat-
+       tern,  the contents of the options argument specifies their settings at
+       the start of compilation. The PCRE2_ANCHORED,  PCRE2_NO_UTF_CHECK,  and
+       PCRE2_NO_START_OPTIMIZE  options  can be set at the time of matching as
+       well as at compile time.
+
+       Other, less frequently required compile-time parameters  (for  example,
+       the newline setting) can be provided in a compile context (as described
+       above).
+
+       If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme-
+       diately.  Otherwise, if compilation of a pattern fails, pcre2_compile()
+       returns NULL, having set these variables to an error code and an offset
+       (number   of   code   units)  within  the  pattern,  respectively.  The
+       pcre2_get_error_message() function provides a textual message for  each
+       error code. Compilation errors are positive numbers, but UTF formatting
+       errors are negative numbers. For an invalid UTF-8 or UTF-16 string, the
+       offset is that of the first code unit of the failing character.
+
+       Some  errors are not detected until the whole pattern has been scanned;
+       in these cases, the offset passed back is the length  of  the  pattern.
+       Note  that  the  offset is in code units, not characters, even in a UTF
+       mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char-
+       acter.
+
+       This  code  fragment shows a typical straightforward call to pcre2_com-
+       pile():
+
+         pcre2_code *re;
+         PCRE2_SIZE erroffset;
+         int errorcode;
+         re = pcre2_compile(
+           "^A.*Z",                /* the pattern */
+           PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */
+           0,                      /* default options */
+           &errorcode,             /* for error code */
+           &erroffset,             /* for error offset */
+           NULL);                  /* no compile context */
+
+       The following names for option bits are defined in the  pcre2.h  header
+       file:
+
+         PCRE2_ANCHORED
+
+       If this bit is set, the pattern is forced to be "anchored", that is, it
+       is constrained to match only at the first matching point in the  string
+       that  is being searched (the "subject string"). This effect can also be
+       achieved by appropriate constructs in the pattern itself, which is  the
+       only way to do it in Perl.
+
+         PCRE2_ALLOW_EMPTY_CLASS
+
+       By  default, for compatibility with Perl, a closing square bracket that
+       immediately follows an opening one is treated as a data  character  for
+       the  class.  When  PCRE2_ALLOW_EMPTY_CLASS  is  set,  it terminates the
+       class, which therefore contains no characters and so can never match.
+
+         PCRE2_ALT_BSUX
+
+       This option request alternative handling  of  three  escape  sequences,
+       which  makes  PCRE2's  behaviour more like ECMAscript (aka JavaScript).
+       When it is set:
+
+       (1) \U matches an upper case "U" character; by default \U causes a com-
+       pile time error (Perl uses \U to upper case subsequent characters).
+
+       (2) \u matches a lower case "u" character unless it is followed by four
+       hexadecimal digits, in which case the hexadecimal  number  defines  the
+       code  point  to match. By default, \u causes a compile time error (Perl
+       uses it to upper case the following character).
+
+       (3) \x matches a lower case "x" character unless it is followed by  two
+       hexadecimal  digits,  in  which case the hexadecimal number defines the
+       code point to match. By default, as in Perl, a  hexadecimal  number  is
+       always expected after \x, but it may have zero, one, or two digits (so,
+       for example, \xz matches a binary zero character followed by z).
+
+         PCRE2_AUTO_CALLOUT
+
+       If this bit  is  set,  pcre2_compile()  automatically  inserts  callout
+       items, all with number 255, before each pattern item. For discussion of
+       the callout facility, see the pcre2callout documentation.
+
+         PCRE2_CASELESS
+
+       If this bit is set, letters in the pattern match both upper  and  lower
+       case  letters in the subject. It is equivalent to Perl's /i option, and
+       it can be changed within a pattern by a (?i) option setting.
+
+         PCRE2_DOLLAR_ENDONLY
+
+       If this bit is set, a dollar metacharacter in the pattern matches  only
+       at  the  end  of the subject string. Without this option, a dollar also
+       matches immediately before a newline at the end of the string (but  not
+       before  any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored
+       if PCRE2_MULTILINE is set. There is no equivalent  to  this  option  in
+       Perl, and no way to set it within a pattern.
+
+         PCRE2_DOTALL
+
+       If  this  bit  is  set,  a dot metacharacter in the pattern matches any
+       character, including one that indicates a  newline.  However,  it  only
+       ever matches one character, even if newlines are coded as CRLF. Without
+       this option, a dot does not match when the current position in the sub-
+       ject  is  at  a newline. This option is equivalent to Perl's /s option,
+       and it can be changed within a pattern by a (?s) option setting. A neg-
+       ative class such as [^a] always matches newline characters, independent
+       of the setting of this option.
+
+         PCRE2_DUPNAMES
+
+       If this bit is set, names used to identify capturing  subpatterns  need
+       not be unique. This can be helpful for certain types of pattern when it
+       is known that only one instance of the named  subpattern  can  ever  be
+       matched.  There  are  more details of named subpatterns below; see also
+       the pcre2pattern documentation.
+
+         PCRE2_EXTENDED
+
+       If this bit is set, most white space  characters  in  the  pattern  are
+       totally  ignored  except when escaped or inside a character class. How-
+       ever, white space is not allowed within  sequences  such  as  (?>  that
+       introduce various parenthesized subpatterns, nor within numerical quan-
+       tifiers such as {1,3}.  Ignorable white space is permitted  between  an
+       item  and a following quantifier and between a quantifier and a follow-
+       ing + that indicates possessiveness.
+
+       PCRE2_EXTENDED also causes characters between an unescaped # outside  a
+       character  class  and the next newline, inclusive, to be ignored, which
+       makes it possible to include comments inside complicated patterns. Note
+       that  the  end of this type of comment is a literal newline sequence in
+       the pattern; escape sequences that happen to represent a newline do not
+       count.  PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be
+       changed within a pattern by a (?x) option setting.
+
+       Which characters are interpreted as newlines can be specified by a set-
+       ting  in  the compile context that is passed to pcre2_compile() or by a
+       special sequence at the start of the pattern, as described in the  sec-
+       tion  entitled "Newline conventions" in the pcre2pattern documentation.
+       A default is defined when PCRE2 is built.
+
+         PCRE2_FIRSTLINE
+
+       If this option is set, an  unanchored  pattern  is  required  to  match
+       before  or  at  the  first  newline  in  the subject string, though the
+       matched text may continue over the newline.
+
+         PCRE2_MATCH_UNSET_BACKREF
+
+       If this option is set, a back reference to an  unset  subpattern  group
+       matches  an  empty  string (by default this causes the current matching
+       alternative to fail).  A pattern such as  (\1)(a)  succeeds  when  this
+       option  is set (assuming it can find an "a" in the subject), whereas it
+       fails by default, for Perl compatibility.  Setting  this  option  makes
+       PCRE2 behave more like ECMAscript (aka JavaScript).
+
+         PCRE2_MULTILINE
+
+       By  default,  for  the purposes of matching "start of line" and "end of
+       line", PCRE2 treats the subject string as consisting of a  single  line
+       of  characters,  even  if  it actually contains newlines. The "start of
+       line" metacharacter (^) matches only at the start of  the  string,  and
+       the  "end  of  line"  metacharacter  ($) matches only at the end of the
+       string,  or  before  a  terminating  newline  (except  when  PCRE2_DOL-
+       LAR_ENDONLY  is  set).  Note, however, that unless PCRE2_DOTALL is set,
+       the "any character" metacharacter (.) does not match at a newline. This
+       behaviour (for ^, $, and dot) is the same as Perl.
+
+       When  PCRE2_MULTILINE  it is set, the "start of line" and "end of line"
+       constructs match immediately following or immediately  before  internal
+       newlines  in  the  subject string, respectively, as well as at the very
+       start and end. This is equivalent to Perl's /m option, and  it  can  be
+       changed within a pattern by a (?m) option setting. If there are no new-
+       lines in a subject string, or no occurrences of ^ or $  in  a  pattern,
+       setting PCRE2_MULTILINE has no effect.
+
+         PCRE2_NEVER_UCP
+
+       This  option  locks  out the use of Unicode properties for handling \B,
+       \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as
+       described  for  the  PCRE2_UCP option below. In particular, it prevents
+       the creator of the pattern from enabling this facility by starting  the
+       pattern  with  (*UCP).  This may be useful in applications that process
+       patterns from external sources. The  option  combination  PCRE_UCP  and
+       PCRE_NEVER_UCP causes an error.
+
+         PCRE2_NEVER_UTF
+
+       This  option  locks out interpretation of the pattern as UTF-8, UTF-16,
+       or UTF-32, depending on which library is in use. In particular, it pre-
+       vents  the  creator of the pattern from switching to UTF interpretation
+       by starting the pattern with (*UTF). This may be useful in applications
+       that  process  patterns  from  external  sources.  The  combination  of
+       PCRE2_UTF and PCRE2_NEVER_UTF causes an error.
+
+         PCRE2_NO_AUTO_CAPTURE
+
+       If this option is set, it disables the use of numbered capturing paren-
+       theses  in the pattern. Any opening parenthesis that is not followed by
+       ? behaves as if it were followed by ?: but named parentheses can  still
+       be  used  for  capturing  (and  they acquire numbers in the usual way).
+       There is no equivalent of this option in Perl.
+
+         PCRE2_NO_AUTO_POSSESS
+
+       If this option is set, it disables "auto-possessification", which is an
+       optimization  that,  for example, turns a+b into a++b in order to avoid
+       backtracks into a+ that can never be successful. However,  if  callouts
+       are  in  use,  auto-possessification means that some callouts are never
+       taken. You can set this option if you want the matching functions to do
+       a  full  unoptimized  search and run all the callouts, but it is mainly
+       provided for testing purposes.
+
+         PCRE2_NO_START_OPTIMIZE
+
+       This is an option that acts at matching time; that is, it is really  an
+       option  for  pcre2_match() or pcre_dfa_match(). If it is set at compile
+       time, it is remembered with the compiled pattern and assumed at  match-
+       ing  time.  This is necessary if you want to use JIT execution, because
+       the JIT compiler needs to know whether or not this option is  set.  For
+       details,  see  the discussion of PCRE2_NO_START_OPTIMIZE in the section
+       on pcre2_match() options below.
+
+         PCRE2_NO_UTF_CHECK
+
+       When PCRE2_UTF is set, the validity of the pattern as a UTF  string  is
+       automatically  checked.  There  are  discussions  about the validity of
+       UTF-8 strings, UTF-16 strings, and UTF-32 strings in  the  pcre2unicode
+       document.  If an invalid UTF sequence is found, pcre2_compile() returns
+       a negative error code.
+
+       If you know that your pattern is valid, and you want to skip this check
+       for  performance  reasons,  you  can set the PCRE2_NO_UTF_CHECK option.
+       When it is set, the effect of passing an invalid UTF string as  a  pat-
+       tern  is  undefined.  It  may cause your program to crash or loop. Note
+       that  this  option  can   also   be   passed   to   pcre2_match()   and
+       pcre_dfa_match(), to suppress validity checking of the subject string.
+
+         PCRE2_UCP
+
+       This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
+       \w, and some of the POSIX character classes.  By  default,  only  ASCII
+       characters  are recognized, but if PCRE2_UCP is set, Unicode properties
+       are used instead to classify characters. More details are given in  the
+       section on generic character types in the pcre2pattern page. If you set
+       PCRE2_UCP, matching one of the items it affects takes much longer.  The
+       option is available only if PCRE2 has been compiled with UTF support.
+
+         PCRE2_UNGREEDY
+
+       This  option  inverts  the "greediness" of the quantifiers so that they
+       are not greedy by default, but become greedy if followed by "?". It  is
+       not  compatible  with Perl. It can also be set by a (?U) option setting
+       within the pattern.
+
+         PCRE2_UTF
+
+       This option causes PCRE2 to regard both the  pattern  and  the  subject
+       strings  that  are  subsequently processed as strings of UTF characters
+       instead of single-code-unit strings. However, it is available only when
+       PCRE2  is  built to include UTF support. If not, the use of this option
+       provokes an error. Details of how this option changes the behaviour  of
+       PCRE2 are given in the pcre2unicode page.
+
+
+COMPILATION ERROR CODES
+
+       There  are over 80 positive error codes that pcre2_compile() may return
+       if it finds an error in the pattern. There are also some negative error
+       codes  that  are  used  for  invalid UTF strings. These are the same as
+       given by pcre2_match() and pcre2_dfa_match(), and are described in  the
+       pcre2unicode page. The pcre2_get_error_message() function can be called
+       to obtain a textual error message from any error code.
+
+
+JUST-IN-TIME (JIT) COMPILATION
+
+       int pcre2_jit_compile(pcre2_code *code, uint32_t options);
+
+       int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack);
+
+       void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
+
+       pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
+         PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
+
+       void pcre2_jit_stack_assign(const pcre2_code *code,
+         pcre2_jit_callback callback_function, void *callback_data);
+
+       void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
+
+       These functions provide support for  JIT  compilation,  which,  if  the
+       just-in-time  compiler  is available, further processes a compiled pat-
+       tern into machine code that executes much faster than the pcre2_match()
+       interpretive  matching function. Full details are given in the pcre2jit
+       documentation.
+
+       JIT compilation is a heavyweight optimization. It can  take  some  time
+       for  patterns  to  be analyzed, and for one-off matches and simple pat-
+       terns the benefit of faster execution might be offset by a much  slower
+       compilation  time.   Most, but not all patterns can be optimized by the
+       JIT compiler.
+
+
+LOCALE SUPPORT
+
+       PCRE2 handles caseless matching, and determines whether characters  are
+       letters,  digits, or whatever, by reference to a set of tables, indexed
+       by character code point. When running  in  UTF-8  mode,  or  using  the
+       16-bit  or  32-bit libraries, this applies only to characters with code
+       points less than 256. By default, higher-valued code points never match
+       escapes  such as \w or \d. However, if PCRE2 is built with UTF support,
+       all characters can be tested with \p and  \P,  or,  alternatively,  the
+       PCRE2_UCP  option can be set when a pattern is compiled; this causes \w
+       and friends to use Unicode property support  instead  of  the  built-in
+       tables.
+
+       The  use  of  locales  with Unicode is discouraged. If you are handling
+       characters with code points greater than 128,  you  should  either  use
+       Unicode support, or use locales, but not try to mix the two.
+
+       PCRE2  contains  an  internal  set of character tables that are used by
+       default.  These are sufficient for  many  applications.  Normally,  the
+       internal tables recognize only ASCII characters. However, when PCRE2 is
+       built, it is possible to cause the internal tables to be rebuilt in the
+       default "C" locale of the local system, which may cause them to be dif-
+       ferent.
+
+       The internal tables can be overridden by tables supplied by the  appli-
+       cation  that  calls  PCRE2.  These may be created in a different locale
+       from the default.  As more and more applications change to  using  Uni-
+       code, the need for this locale support is expected to die away.
+
+       External  tables  are built by calling the pcre2_maketables() function,
+       in the relevant locale. The result can be passed to pcre2_compile()  as
+       often   as  necessary,  by  creating  a  compile  context  and  calling
+       pcre2_set_character_tables() to set the  tables  pointer  therein.  For
+       example,  to  build  and use tables that are appropriate for the French
+       locale (where accented characters with  values  greater  than  128  are
+       treated as letters), the following code could be used:
+
+         setlocale(LC_CTYPE, "fr_FR");
+         tables = pcre2_maketables(NULL);
+         ccontext = pcre2_compile_context_create(NULL);
+         pcre2_set_character_tables(ccontext, tables);
+         re = pcre2_compile(..., ccontext);
+
+       The  locale  name "fr_FR" is used on Linux and other Unix-like systems;
+       if you are using Windows, the name for the French locale  is  "french".
+       It  is the caller's responsibility to ensure that the memory containing
+       the tables remains available for as long as it is needed.
+
+       The pointer that is passed (via the compile context) to pcre2_compile()
+       is  saved  with  the  compiled pattern, and the same tables are used by
+       pcre2_match() and pcre_dfa_match(). Thus, for any single pattern,  com-
+       pilation,  and  matching  all  happen in the same locale, but different
+       patterns can be processed in different locales.
+
+
+INFORMATION ABOUT A COMPILED PATTERN
+
+       int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
+
+       The pcre2_pattern_info() function returns information about a  compiled
+       pattern.  The  first argument is a pointer to the compiled pattern. The
+       second argument specifies which piece of information is  required,  and
+       the  third argument is a pointer to a variable to receive the data. The
+       yield of the function is zero for success, or one of the following neg-
+       ative numbers:
+
+         PCRE2_ERROR_NULL           the argument code was NULL
+                                    the argument where was NULL
+         PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+         PCRE2_ERROR_BADOPTION      the value of what was invalid
+         PCRE2_ERROR_UNSET          the requested field is not set
+
+       The  "magic  number" is placed at the start of each compiled pattern as
+       an simple check against passing an arbitrary memory pointer.  Here is a
+       typical  call of pcre2_pattern_info(), to obtain the length of the com-
+       piled pattern:
+
+         int rc;
+         size_t length;
+         rc = pcre2_pattern_info(
+           re,               /* result of pcre2_compile() */
+           PCRE2_INFO_SIZE,  /* what is required */
+           &length);         /* where to put the data */
+
+       The possible values for the second argument are defined in pcre2.h, and
+       are as follows:
+
+         PCRE2_INFO_ALLOPTIONS
+         PCRE2_INFO_ARGOPTIONS
+
+       Return a copy of the pattern's options. The third argument should point
+       to a  uint32_t  variable.  PCRE2_INFO_ARGOPTIONS  returns  exactly  the
+       options  that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP-
+       TIONS returns the compile options as modified by any  top-level  option
+       settings  at  the start of the pattern itself. In other words, they are
+       the options that will be in force when matching starts. For example, if
+       the  pattern  /(?im)abc(?-i)d/  is  compiled  with  the  PCRE2_EXTENDED
+       option,   the   result   is   PCRE2_CASELESS,   PCRE2_MULTILINE,    and
+       PCRE2_EXTENDED.
+
+       A  pattern  is  automatically anchored by PCRE2 if all of its top-level
+       alternatives begin with one of the following:
+
+         ^     unless PCRE2_MULTILINE is set
+         \A    always
+         \G    always
+         .*    if PCRE2_DOTALL is set and there are no back
+                 references to the subpattern in which .* appears
+
+       For such patterns,  the  PCRE2_ANCHORED  bit  is  set  in  the  options
+       returned for PCRE2_INFO_ALLOPTIONS.
+
+         PCRE2_INFO_BACKREFMAX
+
+       Return  the  number  of  the highest back reference in the pattern. The
+       third argument should point to an uint32_t variable. Zero  is  returned
+       if there are no back references.
+
+         PCRE2_INFO_BSR
+
+       The output is a uint32_t whose value indicates what character sequences
+       the \R escape sequence matches by default. A value of 0 means  that  \R
+       matches  any  Unicode  line ending sequence; a value of 1 means that \R
+       matches only CR, LF, or CRLF. The default can be overridden when a pat-
+       tern is matched.
+
+         PCRE2_INFO_CAPTURECOUNT
+
+       Return  the  number  of capturing subpatterns in the pattern. The third
+       argument should point to an uint32_t variable.
+
+         PCRE2_INFO_FIRSTCODETYPE
+
+       Return information about the first code unit of any matched string, for
+       a  non-anchored pattern. The third argument should point to an uint32_t
+       variable.
+
+       If there is a fixed first value, for example, the  letter  "c"  from  a
+       pattern  such  as  (cat|cow|coyote),  1  is returned, and the character
+       value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there  is  no
+       fixed first value, and if either
+
+       (a) the pattern was compiled with the PCRE2_MULTILINE option, and every
+       branch starts with "^", or
+
+       (b) every branch of the pattern starts with ".*"  and  PCRE2_DOTALL  is
+       not set (if it were set, the pattern would be anchored),
+
+       2 is returned, indicating that the pattern matches only at the start of
+       a subject string or after any newline within the string. Otherwise 0 is
+       returned. For anchored patterns, 0 is returned.
+
+         PCRE2_INFO_FIRSTCODEUNIT
+
+       Return  the  value  of the first code unit of any matched string in the
+       situation where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
+       The  third  argument should point to an uint32_t variable. In the 8-bit
+       library, the value is always less than 256. In the 16-bit  library  the
+       value  can  be  up  to 0xffff. In the 32-bit library in UTF-32 mode the
+       value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32
+       mode.
+
+         PCRE2_INFO_FIRSTBITMAP
+
+       In  the absence of a single first code unit for a non-anchored pattern,
+       pcre2_compile() may construct a 256-bit table that defines a fixed  set
+       of  values for the first code unit in any match. For example, a pattern
+       that starts with [abc] results in a table with  three  bits  set.  When
+       code  unit  values greater than 255 are supported, the flag bit for 255
+       means "any code unit of value 255 or above". If such a table  was  con-
+       structed,  a pointer to it is returned. Otherwise NULL is returned. The
+       third argument should point to an const uint8_t * variable.
+
+         PCRE2_INFO_HASCRORLF
+
+       Return 1 if the pattern contains any explicit  matches  for  CR  or  LF
+       characters, otherwise 0. The third argument should point to an uint32_t
+       variable. An explicit match is either a literal CR or LF character,  or
+       \r or \n.
+
+         PCRE2_INFO_JCHANGED
+
+       Return  1  if  the (?J) or (?-J) option setting is used in the pattern,
+       otherwise 0. The third argument should point to an  uint32_t  variable.
+       (?J)  and  (?-J) set and unset the local PCRE2_DUPNAMES option, respec-
+       tively.
+
+         PCRE2_INFO_JITSIZE
+
+       If the compiled pattern was successfully  processed  by  pcre2_jit_com-
+       pile(),  return  the  size  of  the JIT compiled code, otherwise return
+       zero. The third argument should point to a size_t variable.
+
+         PCRE2_INFO_LASTCODETYPE
+
+       Returns 1 if there is a rightmost literal code unit that must exist  in
+       any  matched string, other than at its start. The third argument should
+       point to an uint32_t  variable.  If  there  is  no  such  value,  0  is
+       returned.  When  1  is  returned,  the  code  unit  value itself can be
+       retrieved using PCRE2_INFO_LASTCODEUNIT.
+
+       For anchored patterns, a last literal value is recorded only if it fol-
+       lows  something  of  variable  length.  For  example,  for  the pattern
+       /^a\d+z\d+/  the  returned  value  is  1  (with   "z"   returned   from
+       PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0.
+
+         PCRE2_INFO_LASTCODEUNIT
+
+       Return  the value of the rightmost literal data unit that must exist in
+       any matched string, other than at its start, if such a value  has  been
+       recorded.  The  third argument should point to an uint32_t variable. If
+       there is no such value, 0 is returned.
+
+         PCRE2_INFO_MATCHEMPTY
+
+       Return 1 if the pattern can match an empty  string,  otherwise  0.  The
+       third argument should point to an uint32_t variable.
+
+         PCRE2_INFO_MATCHLIMIT
+
+       If  the  pattern  set  a  match  limit by including an item of the form
+       (*LIMIT_MATCH=nnnn) at the start, the  value  is  returned.  The  third
+       argument  should  point to an unsigned 32-bit integer. If no such value
+       has been set,  the  call  to  pcre2_pattern_info()  returns  the  error
+       PCRE2_ERROR_UNSET.
+
+         PCRE2_INFO_MAXLOOKBEHIND
+
+       Return the number of characters (not code units) in the longest lookbe-
+       hind assertion in the pattern. The third argument should  point  to  an
+       unsigned  32-bit  integer. This information is useful when doing multi-
+       segment matching using the partial matching facilities. Note  that  the
+       simple assertions \b and \B require a one-character lookbehind. \A also
+       registers a one-character  lookbehind,  though  it  does  not  actually
+       inspect  the  previous  character.  This is to ensure that at least one
+       character from the old segment is retained when a new segment  is  pro-
+       cessed. Otherwise, if there are no lookbehinds in the pattern, \A might
+       match incorrectly at the start of a new segment.
+
+         PCRE2_INFO_MINLENGTH
+
+       If a minimum length for matching  subject  strings  was  computed,  its
+       value  is  returned.  Otherwise the returned value is 0. The value is a
+       number of characters, which in UTF mode may be different from the  num-
+       ber  of  code  units.   The  third argument should point to an uint32_t
+       variable. The value is a lower bound to  the  length  of  any  matching
+       string.  There  may  not be any strings of that length that do actually
+       match, but every string that does match is at least that long.
+
+         PCRE2_INFO_NAMECOUNT
+         PCRE2_INFO_NAMEENTRYSIZE
+         PCRE2_INFO_NAMETABLE
+
+       PCRE2 supports the use of named as well as numbered capturing parenthe-
+       ses.  The names are just an additional way of identifying the parenthe-
+       ses, which still acquire numbers. Several convenience functions such as
+       pcre2_substring_get_byname()  are provided for extracting captured sub-
+       strings by name. It is also possible to extract the data  directly,  by
+       first  converting  the  name to a number in order to access the correct
+       pointers in the output vector (described with pcre2_match() below).  To
+       do  the  conversion,  you  need to use the name-to-number map, which is
+       described by these three values.
+
+       The map consists of a number of  fixed-size  entries.  PCRE2_INFO_NAME-
+       COUNT  gives  the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives
+       the size of each entry; both of these  return  a  uint32_t  value.  The
+       entry   size   depends   on   the   length   of   the   longest   name.
+       PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table.
+       This  is  a  PCRE2_SPTR  pointer to a block of code units. In the 8-bit
+       library, the first two bytes of each entry are the number of  the  cap-
+       turing parenthesis, most significant byte first. In the 16-bit library,
+       the pointer points to 16-bit data units, the first  of  which  contains
+       the  parenthesis  number.  In the 32-bit library, the pointer points to
+       32-bit data units, the first of which contains the parenthesis  number.
+       The rest of the entry is the corresponding name, zero terminated.
+
+       The  names are in alphabetical order. If (?| is used to create multiple
+       groups with the same number, as described in the section  on  duplicate
+       subpattern  numbers  in  the pcre2pattern page, the groups may be given
+       the same name, but there is only one  entry  in  the  table.  Different
+       names for groups of the same number are not permitted.
+
+       Duplicate  names  for subpatterns with different numbers are permitted,
+       but only if PCRE2_DUPNAMES is set. They appear  in  the  table  in  the
+       order  in  which  they were found in the pattern. In the absence of (?|
+       this is the order of increasing number; when (?| is used  this  is  not
+       necessarily the case because later subpatterns may have lower numbers.
+
+       As  a  simple  example of the name/number table, consider the following
+       pattern after compilation by the 8-bit library  (assume  PCRE2_EXTENDED
+       is set, so white space - including newlines - is ignored):
+
+         (?<date> (?<year>(\d\d)?\d\d) -
+         (?<month>\d\d) - (?<day>\d\d) )
+
+       There  are  four  named subpatterns, so the table has four entries, and
+       each entry in the table is eight bytes long. The table is  as  follows,
+       with non-printing bytes shows in hexadecimal, and undefined bytes shown
+       as ??:
+
+         00 01 d  a  t  e  00 ??
+         00 05 d  a  y  00 ?? ??
+         00 04 m  o  n  t  h  00
+         00 02 y  e  a  r  00 ??
+
+       When writing code to extract data  from  named  subpatterns  using  the
+       name-to-number  map,  remember that the length of the entries is likely
+       to be different for each compiled pattern.
+
+         PCRE2_INFO_NEWLINE
+
+       The output is a uint32_t whose value specifies  the  default  character
+       sequence  that  will be recognized as meaning "newline" while matching.
+       The values are:
+
+         1  Carriage return (CR)
+         2  Linefeed (LF)
+         3  Carriage return, linefeed (CRLF)
+         4  Any Unicode line ending
+         5  Any of CR, LF, or CRLF
+
+       The default can be overridden when a pattern is matched.
+
+         PCRE2_INFO_RECURSIONLIMIT
+
+       If the pattern set a recursion limit by including an item of  the  form
+       (*LIMIT_RECURSION=nnnn)  at the start, the value is returned. The third
+       argument should point to an unsigned 32-bit integer. If no  such  value
+       has  been  set,  the  call  to  pcre2_pattern_info()  returns the error
+       PCRE2_ERROR_UNSET.
+
+         PCRE2_INFO_SIZE
+
+       Return the size of  the  compiled  pattern  in  bytes  (for  all  three
+       libraries).  The third argument should point to a size_t variable. This
+       value does not include the size of the  pcre2_code  structure  that  is
+       returned by pcre_compile(). The value that is used when pcre2_compile()
+       is getting memory in which to place the  compiled  data  is  the  value
+       returned by this option plus the size of the pcre2_code structure. Pro-
+       cessing a pattern with the  JIT  compiler  does  not  alter  the  value
+       returned by this option.
+
+
+THE MATCH DATA BLOCK
+
+       pcre2_match_data_create(uint32_t ovecsize,
+         pcre2_general_context *gcontext);
+
+       pcre2_match_data_create_from_pattern(pcre2_code *code,
+         pcre2_general_context *gcontext);
+
+       void pcre2_match_data_free(pcre2_match_data *match_data);
+
+       Information  about  successful  and unsuccessful matches is placed in a
+       match data block, which is an opaque  structure  that  is  accessed  by
+       function  calls.  In particular, the match data block contains a vector
+       of offsets into the subject string that define the matched part of  the
+       subject and any substrings that were capured. This is know as the ovec-
+       tor.
+
+       Before calling pcre2_match() or pcre2_dfa_match()  you  must  create  a
+       match  data  block  by calling one of the creation functions above. For
+       pcre2_match_data_create(), the first argument is the number of pairs of
+       offsets in the ovector. One pair of offsets is required to identify the
+       string that matched the whole pattern, with another pair for each  cap-
+       tured  substring.  For  example,  a  value of 4 creates enough space to
+       record the matched portion of the  subject  plus  three  captured  sub-
+       strings.
+
+       For  pcre2_match_data_create_from_pattern(),  the  first  argument is a
+       pointer to a compiled pattern. In this case the ovector is  created  to
+       be  exactly  the  right size to hold all the substrings a pattern might
+       capture.
+
+       The second argument of both these functions ia a pointer to  a  general
+       context,  which  can specify custom memory management for obtaining the
+       memory for the match data block. If you are  not  using  custom  memory
+       management, pass NULL.
+
+       A  match  data block can be used many times, with the same or different
+       compiled patterns. When it is no longer needed, it should be  freed  by
+       calling  pcre2_match_data_free().  How  to  extract  information from a
+       match data block after a match operation is described in  the  sections
+       on matched strings and other match data below.
+
+
+MATCHING A PATTERN: THE TRADITIONAL FUNCTION
+
+       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext);
+
+       The  function pcre2_match() is called to match a subject string against
+       a compiled pattern, which is passed in the code argument. You can  call
+       pcre2_match() with the same code argument as many times as you like, in
+       order to find multiple matches in the subject string or to  match  dif-
+       ferent subject strings with the same pattern.
+
+       This  function  is  the  main  matching facility of the library, and it
+       operates in a Perl-like manner. For specialist use  there  is  also  an
+       alternative  matching function, which is described below in the section
+       about the pcre2_dfa_match() function.
+
+       Here is an example of a simple call to pcre2_match():
+
+         pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+         int rc = pcre2_match(
+           re,             /* result of pcre2_compile() */
+           "some string",  /* the subject string */
+           11,             /* the length of the subject string */
+           0,              /* start at offset 0 in the subject */
+           0,              /* default options */
+           match_data,     /* the match data block */
+           NULL);          /* a match context; NULL means use defaults */
+
+       If the subject string is zero-terminated, the length can  be  given  as
+       PCRE2_ZERO_TERMINATED. A match context must be provided if certain less
+       common matching parameters are to be changed. For details, see the sec-
+       tion on the match context above.
+
+   The string to be matched by pcre2_match()
+
+       The  subject string is passed to pcre2_match() as a pointer in subject,
+       a length in length, and a starting offset in  startoffset.  The  length
+       and  offset  are  in  code units, not characters.  That is, they are in
+       bytes for the 8-bit library, 16-bit code units for the 16-bit  library,
+       and  32-bit  code units for the 32-bit library, whether or not UTF pro-
+       cessing is enabled.
+
+       If startoffset is greater than the length of the subject, pcre2_match()
+       returns  PCRE2_ERROR_BADOFFSET.  When  the starting offset is zero, the
+       search for a match starts at the beginning of the subject, and this  is
+       by far the most common case. In UTF-8 or UTF-16 mode, the starting off-
+       set must point to the start of a character, or to the end of  the  sub-
+       ject  (in  UTF-32 mode, one code unit equals one character, so all off-
+       sets are valid). Like the  pattern  string,  the  subject  may  contain
+       binary zeroes.
+
+       A  non-zero  starting offset is useful when searching for another match
+       in the same subject by calling pcre2_match()  again  after  a  previous
+       success.   Setting  startoffset  differs  from passing over a shortened
+       string and setting PCRE2_NOTBOL in the case of a  pattern  that  begins
+       with any kind of lookbehind. For example, consider the pattern
+
+         \Biss\B
+
+       which  finds  occurrences  of "iss" in the middle of words. (\B matches
+       only if the current position in the subject is not  a  word  boundary.)
+       When applied to the string "Mississipi" the first call to pcre2_match()
+       finds the first occurrence. If pcre2_match() is called again with  just
+       the  remainder  of  the  subject,  namely  "issipi", it does not match,
+       because \B is always false at the start of the subject, which is deemed
+       to  be  a word boundary. However, if pcre2_match() is passed the entire
+       string again, but with startoffset set to 4, it finds the second occur-
+       rence  of "iss" because it is able to look behind the starting point to
+       discover that it is preceded by a letter.
+
+       Finding all the matches in a subject is tricky  when  the  pattern  can
+       match an empty string. It is possible to emulate Perl's /g behaviour by
+       first  trying  the  match  again  at  the   same   offset,   with   the
+       PCRE2_NOTEMPTY_ATSTART  and  PCRE2_ANCHORED  options,  and then if that
+       fails, advancing the starting  offset  and  trying  an  ordinary  match
+       again.  There  is  some  code  that  demonstrates how to do this in the
+       pcre2demo sample program. In the most general case, you have  to  check
+       to  see  if the newline convention recognizes CRLF as a newline, and if
+       so, and the current character is CR followed by LF, advance the  start-
+       ing offset by two characters instead of one.
+
+       If  a  non-zero starting offset is passed when the pattern is anchored,
+       one attempt to match at the given offset is made. This can only succeed
+       if  the  pattern  does  not require the match to be at the start of the
+       subject.
+
+   Option bits for pcre2_match()
+
+       The unused bits of the options argument for pcre2_match() must be zero.
+       The  only  bits  that  may  be  set  are  PCRE2_ANCHORED, PCRE2_NOTBOL,
+       PCRE2_NOTEOL,          PCRE2_NOTEMPTY,          PCRE2_NOTEMPTY_ATSTART,
+       PCRE2_NO_START_OPTIMIZE,  PCRE2_NO_UTF_CHECK,  PCRE2_PARTIAL_HARD,  and
+       PCRE2_PARTIAL_SOFT. Their action is described below.
+
+       If the pattern was successfully processed  by  the  just-in-time  (JIT)
+       compiler,  the  only  supported options for matching using the JIT code
+       are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+       PCRE2_NO_UTF_CHECK,  PCRE2_PARTIAL_HARD,  and PCRE2_PARTIAL_SOFT. If an
+       unsupported option is used, JIT matching is  disabled  and  the  normal
+       interpretive code in pcre2_match() is run.
+
+         PCRE2_ANCHORED
+
+       The PCRE2_ANCHORED option limits pcre2_match() to matching at the first
+       matching position. If a pattern was compiled  with  PCRE2_ANCHORED,  or
+       turned  out to be anchored by virtue of its contents, it cannot be made
+       unachored at matching time. Note that setting the option at match  time
+       disables JIT matching.
+
+         PCRE2_NOTBOL
+
+       This option specifies that first character of the subject string is not
+       the beginning of a line, so the  circumflex  metacharacter  should  not
+       match before it. Setting this without PCRE2_MULTILINE (at compile time)
+       causes circumflex never to match. This option affects only  the  behav-
+       iour of the circumflex metacharacter. It does not affect \A.
+
+         PCRE2_NOTEOL
+
+       This option specifies that the end of the subject string is not the end
+       of a line, so the dollar metacharacter should not match it nor  (except
+       in  multiline mode) a newline immediately before it. Setting this with-
+       out PCRE2_MULTILINE (at compile time) causes  dollar  never  to  match.
+       This  option affects only the behaviour of the dollar metacharacter. It
+       does not affect \Z or \z.
+
+         PCRE2_NOTEMPTY
+
+       An empty string is not considered to be a valid match if this option is
+       set.  If  there are alternatives in the pattern, they are tried. If all
+       the alternatives match the empty string, the entire  match  fails.  For
+       example, if the pattern
+
+         a?b?
+
+       is  applied  to  a  string not beginning with "a" or "b", it matches an
+       empty string at the start of the subject. With PCRE2_NOTEMPTY set, this
+       match  is  not  valid,  so  PCRE2  searches further into the string for
+       occurrences of "a" or "b".
+
+         PCRE2_NOTEMPTY_ATSTART
+
+       This is like PCRE2_NOTEMPTY, except that an empty string match that  is
+       not  at  the  start  of  the  subject  is  permitted. If the pattern is
+       anchored, such a match can occur only if the pattern contains \K.
+
+         PCRE2_NO_START_OPTIMIZE
+
+       There are a number of optimizations  that  pcre2_match()  uses  at  the
+       start  of a match, in order to speed up the process. For example, if it
+       is known that an unanchored match must start with a specific character,
+       it searches the subject for that character, and fails immediately if it
+       cannot find it, without actually running the  main  matching  function.
+       This means that a special item such as (*COMMIT) at the start of a pat-
+       tern is not considered until after a suitable starting  point  for  the
+       match  has been found. Also, when callouts or (*MARK) items are in use,
+       these "start-up" optimizations can cause them to be skipped if the pat-
+       tern is never actually used. The start-up optimizations are in effect a
+       pre-scan of the subject that takes place before the pattern is run.
+
+       The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
+       possibly  causing  performance  to  suffer,  but ensuring that in cases
+       where the result is "no match", the callouts do occur, and  that  items
+       such as (*COMMIT) and (*MARK) are considered at every possible starting
+       position in the subject string. If PCRE2_NO_START_OPTIMIZE  is  set  at
+       compile  time,  it  cannot  be  unset  at  matching  time.  The  use of
+       PCRE2_NO_START_OPTIMIZE at  matching  time  (that  is,  passing  it  to
+       pcre2_match())  disables  JIT execution; in this situation, matching is
+       always done using interpretively.
+
+       Setting PCRE2_NO_START_OPTIMIZE can change the outcome  of  a  matching
+       operation.  Consider the pattern
+
+         (*COMMIT)ABC
+
+       When  this  is compiled, PCRE2 records the fact that a match must start
+       with the character "A". Suppose the subject  string  is  "DEFABC".  The
+       start-up  optimization  scans along the subject, finds "A" and runs the
+       first match attempt from there. The (*COMMIT) item means that the  pat-
+       tern  must  match the current starting position, which in this case, it
+       does. However, if the same match is  run  with  PCRE2_NO_START_OPTIMIZE
+       set,  the  initial  scan  along the subject string does not happen. The
+       first match attempt is run starting  from  "D"  and  when  this  fails,
+       (*COMMIT)  prevents  any  further  matches  being tried, so the overall
+       result is "no match". There are also other start-up optimizations.  For
+       example, a minimum length for the subject may be recorded. Consider the
+       pattern
+
+         (*MARK:A)(X|Y)
+
+       The minimum length for a match is one  character.  If  the  subject  is
+       "ABC", there will be attempts to match "ABC", "BC", and "C". An attempt
+       to match an empty string at the end of the subject does not take place,
+       because  PCRE2  knows  that  the  subject  is now too short, and so the
+       (*MARK) is never encountered. In this case, the optimization  does  not
+       affect the overall match result, which is still "no match", but it does
+       affect the auxiliary information that is returned.
+
+         PCRE2_NO_UTF_CHECK
+
+       When PCRE2_UTF is set at compile time, the validity of the subject as a
+       UTF  string  is  checked  by default when pcre2_match() is subsequently
+       called.  The entire string is checked before any other processing takes
+       place,  and a negative error code is returned if the check fails. There
+       are several UTF error codes for each code unit width, corresponding  to
+       different  problems with the code unit sequence. The value of startoff-
+       set is also checked, to ensure that it points to the start of a charac-
+       ter  or  to  the  end  of  the subject. There are discussions about the
+       validity of UTF-8 strings, UTF-16 strings, and UTF-32  strings  in  the
+       pcre2unicode page.
+
+       If  you  know  that  your  subject is valid, and you want to skip these
+       checks for performance reasons,  you  can  set  the  PCRE2_NO_UTF_CHECK
+       option  when  calling  pcre2_match(). You might want to do this for the
+       second and subsequent calls to pcre2_match() if you are making repeated
+       calls to find all the matches in a single subject string.
+
+       NOTE:  When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
+       string as a subject, or an invalid value of startoffset, is  undefined.
+       Your program may crash or loop indefinitely.
+
+         PCRE2_PARTIAL_HARD
+         PCRE2_PARTIAL_SOFT
+
+       These  options  turn  on  the partial matching feature. A partial match
+       occurs if the end of the subject string is  reached  successfully,  but
+       there  are not enough subject characters to complete the match. If this
+       happens when PCRE2_PARTIAL_SOFT (but not  PCRE2_PARTIAL_HARD)  is  set,
+       matching  continues  by  testing any remaining alternatives. Only if no
+       complete match can be found is PCRE2_ERROR_PARTIAL returned instead  of
+       PCRE2_ERROR_NOMATCH.  In  other words, PCRE2_PARTIAL_SOFT says that the
+       caller is prepared to handle a partial match, but only if  no  complete
+       match can be found.
+
+       If  PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this
+       case, if a partial match is found,  pcre2_match()  immediately  returns
+       PCRE2_ERROR_PARTIAL,  without  considering  any  other alternatives. In
+       other words, when PCRE2_PARTIAL_HARD is set, a partial match is consid-
+       ered to be more important that an alternative complete match.
+
+       There is a more detailed discussion of partial and multi-segment match-
+       ing, with examples, in the pcre2partial documentation.
+
+
+NEWLINE HANDLING WHEN MATCHING
+
+       When PCRE2 is built, a default newline convention is set; this is  usu-
+       ally  the standard convention for the operating system. The default can
+       be overridden in either a compile context or a match context.  However,
+       changing  the  newline  convention at match time disables JIT matching.
+       During matching, the newline choice affects the behaviour of  the  dot,
+       circumflex,  and  dollar  metacharacters. It may also alter the way the
+       match position is advanced after a match failure for an unanchored pat-
+       tern.
+
+       When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is
+       set, and a match attempt for an unanchored pattern fails when the  cur-
+       rent  position  is  at  a  CRLF  sequence,  and the pattern contains no
+       explicit matches for  CR  or  LF  characters,  the  match  position  is
+       advanced by two characters instead of one, in other words, to after the
+       CRLF.
+
+       The above rule is a compromise that makes the most common cases work as
+       expected.  For  example,  if  the  pattern is .+A (and the PCRE2_DOTALL
+       option is not set), it does not match the string "\r\nA" because, after
+       failing  at the start, it skips both the CR and the LF before retrying.
+       However, the pattern [\r\n]A does match that string,  because  it  con-
+       tains an explicit CR or LF reference, and so advances only by one char-
+       acter after the first failure.
+
+       An explicit match for CR of LF is either a literal appearance of one of
+       those  characters  in  the  pattern,  or  one  of  the  \r or \n escape
+       sequences. Implicit matches such as [^X] do  not  count,  nor  does  \s
+       (which includes CR and LF in the characters that it matches).
+
+       Notwithstanding  the above, anomalous effects may still occur when CRLF
+       is a valid newline sequence and explicit \r or \n escapes appear in the
+       pattern.
+
+
+HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
+
+       uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data);
+
+       PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
+
+       In  general, a pattern matches a certain portion of the subject, and in
+       addition, further substrings from the subject  may  be  picked  out  by
+       parenthesized  parts  of  the  pattern.  Following the usage in Jeffrey
+       Friedl's book, this is called "capturing"  in  what  follows,  and  the
+       phrase  "capturing subpattern" is used for a fragment of a pattern that
+       picks out a substring.  PCRE2 supports several other kinds of parenthe-
+       sized  subpattern  that  do  not  cause  substrings to be captured. The
+       pcre2_pattern_info() function can be used to find out how many  captur-
+       ing subpatterns there are in a compiled pattern.
+
+       The  overall matched string and any captured substrings are returned to
+       the caller via a vector of PCRE2_SIZE values, called the ovector.  This
+       is contained within the match data block.  You can obtain direct access
+       to the ovector  by  calling  pcre2_get_ovector_pointer()  to  find  its
+       address,  and  pcre2_get_ovector_count() to find the number of pairs of
+       values it contains. Alternatively, you can use the auxiliary  functions
+       for accessing captured substrings by number or by name (see below).
+
+       Within the ovector, the first in each pair of values is set to the off-
+       set of the first code unit of a substring, and the second is set to the
+       offset  of the first code unit after the end of a substring. These val-
+       ues are always code unit offsets, not character offsets. That is,  they
+       are  byte  offsets  in  the 8-bit library, 16-bit offsets in the 16-bit
+       library, and 32-bit offsets in the 32-bit library.
+
+       The first pair of offsets (that is, ovector[0] and ovector[1])  identi-
+       fies  the  portion of the subject string that was matched by the entire
+       pattern. The next pair is used for the first capturing subpattern,  and
+       so  on.  The value returned by pcre2_match() is one more than the high-
+       est numbered pair that has been set. For  example,  if  two  substrings
+       have  been captured, the returned value is 3. If there are no capturing
+       subpatterns, the return value from a successful match is 1,  indicating
+       that just the first pair of offsets has been set.
+
+       If  a  capturing subpattern is matched repeatedly within a single match
+       operation, it is the last portion of the string that it matched that is
+       returned.
+
+       If the ovector is too small to hold all the captured substring offsets,
+       as much as possible is filled in, and the function returns a  value  of
+       zero.  If neither the actual string matched nor any captured substrings
+       are of interest, pcre2_match() may be called with a  match  data  block
+       whose  ovector is of zero length. However, if the pattern contains back
+       references and the ovector is not big enough to  remember  the  related
+       substrings, PCRE2 has to get additional memory for use during matching.
+       Thus it is usually advisable to set up a match data block containing an
+       ovector of reasonable size.
+
+       It  is  possible for capturing subpattern number n+1 to match some part
+       of the subject when subpattern n has not been used at all. For example,
+       if  the  string  "abc"  is  matched against the pattern (a|(z))(bc) the
+       return from the function is 4, and subpatterns 1 and 3 are matched, but
+       2  is  not.  When  this happens, both values in the offset pairs corre-
+       sponding to unused subpatterns are set to PCRE2_UNSET.
+
+       Offset values that correspond to unused subpatterns at the end  of  the
+       expression  are  also  set  to  PCRE2_UNSET. For example, if the string
+       "abc" is matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3
+       are  not matched.  The return from the function is 2, because the high-
+       est used capturing subpattern number is 1. The offsets for for the sec-
+       ond  and  third  capturing  subpatterns  (assuming  the vector is large
+       enough, of course) are set to PCRE2_UNSET.
+
+       Elements in the ovector that do not correspond to capturing parentheses
+       in the pattern are never changed. That is, if a pattern contains n cap-
+       turing parentheses, no more than ovector[0] to ovector[2n+1] are set by
+       pcre2_match().  The  other  elements retain whatever values they previ-
+       ously had.
+
+   Other information about the match
+
+       PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data);
+
+       PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *match_data);
+
+       PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *match_data);
+
+       PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
+
+       In addition to the offsets in the ovector, other  information  about  a
+       match  is  retained in the match data block and can be retrieved by the
+       above functions.
+
+       When a (*MARK) name is to be passed back,  pcre2_get_mark()  returns  a
+       pointer  to the zero-terminated name, which is within the compiled pat-
+       tern.  Otherwise NULL is returned. A  (*MARK)  name  may  be  available
+       after  a failed match or a partial match, as well as after a successful
+       one.
+
+       The other three functions yield values that give information about  the
+       part of the subject string that was inspected during a successful match
+       or a partial match. Their results are undefined after a  failed  match.
+       They return the following values, respectively:
+
+       (1)  The offset of the leftmost character that was inspected during the
+       match.  This can be earlier than the point at which the  match  started
+       if the pattern contains lookbehind assertions or \b or \B at the start.
+
+       (2)  The  offset  of the character that follows the rightmost character
+       that was inspected during the match. This can be after the end  of  the
+       match if the pattern contains lookahead assertions.
+
+       (3)  The  offset  of  the  character at which the successful or partial
+       match started. This can be different to the value of ovector[0] if  the
+       pattern contains the \K escape sequence.
+
+       For  example,  if  the pattern (?<=abc)xx\Kyy(?=def) is matched against
+       the string "123abcxxyydef123", the resulting offsets are:
+
+         ovector[0]   8
+         ovector[1]  10
+         leftchar     3
+         rightchar   13
+         startchar    6
+
+       The allusedtext modifier in pcre2test can be used to display  a  longer
+       string  that  shows  the  leftmost  and rightmost characters in a match
+       instead of just the matched string.
+
+   Error return values from pcre2_match()
+
+       If pcre2_match() fails, it returns a negative number. This can be  con-
+       verted  to a text string by calling pcre2_get_error_message(). Negative
+       error codes are also returned by other functions,  and  are  documented
+       with them.  The codes are given names in the header file. If UTF check-
+       ing is in force and an invalid UTF subject string is detected, one of a
+       number  of  UTF-specific  negative error codes is returned. Details are
+       given in the pcre2unicode page. The following are the other errors that
+       may be returned by pcre2_match():
+
+         PCRE2_ERROR_NOMATCH
+
+       The subject string did not match the pattern.
+
+         PCRE2_ERROR_PARTIAL
+
+       The  subject  string did not match, but it did match partially. See the
+       pcre2partial documentation for details of partial matching.
+
+         PCRE2_ERROR_BADMAGIC
+
+       PCRE2 stores a 4-byte "magic number" at the start of the compiled code,
+       to  catch  the case when it is passed a junk pointer. This is the error
+       that is returned when the magic number is not present.
+
+         PCRE2_ERROR_BADMODE
+
+       This error is given when a pattern  that  was  compiled  by  the  8-bit
+       library  is  passed  to  a  16-bit  or 32-bit library function, or vice
+       versa.
+
+         PCRE2_ERROR_BADOFFSET
+
+       The value of startoffset greater than the length of the subject.
+
+         PCRE2_ERROR_BADOPTION
+
+       An unrecognized bit was set in the options argument.
+
+         PCRE2_ERROR_BADUTFOFFSET
+
+       The UTF code unit sequence that was passed as a subject was checked and
+       found  to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the
+       value of startoffset did not point to the beginning of a UTF  character
+       or the end of the subject.
+
+         PCRE2_ERROR_CALLOUT
+
+       This  error  is never generated by pcre2_match() itself. It is provided
+       for use by callout functions that want to cause pcre2_match() to return
+       a  distinctive  error  code.  See  the  pcre2callout  documentation for
+       details.
+
+         PCRE2_ERROR_INTERNAL
+
+       An unexpected internal error has occurred. This error could  be  caused
+       by a bug in PCRE2 or by overwriting of the compiled pattern.
+
+         PCRE2_ERROR_JIT_BADOPTION
+
+       This  error  is  returned  when a pattern that was successfully studied
+       using JIT is being matched, but the matching mode (partial or  complete
+       match)  does  not  correspond to any JIT compilation mode. When the JIT
+       fast path function is used, this error may be also  given  for  invalid
+       options. See the pcre2jit documentation for more details.
+
+         PCRE2_ERROR_JIT_STACKLIMIT
+
+       This  error  is  returned  when a pattern that was successfully studied
+       using JIT is being matched, but the memory available for  the  just-in-
+       time  processing stack is not large enough. See the pcre2jit documenta-
+       tion for more details.
+
+         PCRE2_ERROR_MATCHLIMIT
+
+       The backtracking limit was reached.
+
+         PCRE2_ERROR_NOMEMORY
+
+       If a pattern contains back references,  but  the  ovector  is  not  big
+       enough  to  remember  the  referenced substrings, PCRE2 gets a block of
+       memory at the start of matching to use for this purpose. There are some
+       other  special cases where extra memory is needed during matching. This
+       error is given when memory cannot be obtained.
+
+         PCRE2_ERROR_NULL
+
+       Either the code, subject, or match_data argument was passed as NULL.
+
+         PCRE2_ERROR_RECURSELOOP
+
+       This error is returned when  pcre2_match()  detects  a  recursion  loop
+       within  the  pattern. Specifically, it means that either the whole pat-
+       tern or a subpattern has been called recursively for the second time at
+       the  same  position  in  the  subject string. Some simple patterns that
+       might do this are detected and faulted at compile time, but  more  com-
+       plicated  cases,  in particular mutual recursions between two different
+       subpatterns, cannot be detected until run time.
+
+         PCRE2_ERROR_RECURSIONLIMIT
+
+       The internal recursion limit was reached.
+
+
+EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
+
+       int pcre2_substring_length_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_SIZE *length);
+
+       int pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_UCHAR *buffer,
+         PCRE2_SIZE *bufflen);
+
+       int pcre2_substring_get_bynumber(pcre2_match_data *match_data,
+         unsigned int number, PCRE2_UCHAR **bufferptr,
+         PCRE2_SIZE *bufflen);
+
+       void pcre2_substring_free(PCRE2_UCHAR *buffer);
+
+       Captured substrings can be accessed directly by using  the  ovector  as
+       described above.  For convenience, auxiliary functions are provided for
+       extracting  captured  substrings  as  new,  separate,   zero-terminated
+       strings.  The  functions in this section identify substrings by number.
+       The next section describes similar functions for extracting  substrings
+       by name. A substring that contains a binary zero is correctly extracted
+       and has a further zero added on the end, but  the  result  is  not,  of
+       course, a C string.
+
+       You  can  find the length in code units of a captured substring without
+       extracting it by calling pcre2_substring_length_bynumber().  The  first
+       argument  is a pointer to the match data block, the second is the group
+       number, and the third is a pointer to a variable into which the  length
+       is placed.
+
+       The  pcre2_substring_copy_bynumber()  function copies one string into a
+       supplied buffer, whereas pcre2_substring_get_bynumber() copies it  into
+       new memory, obtained using the same memory allocation function that was
+       used for the match data block. The first two arguments of  these  func-
+       tions  are a pointer to the match data block and a capturing group num-
+       ber. A group number of zero extracts the  substring  that  matched  the
+       entire pattern, and higher values extract the captured substrings.
+
+       The final arguments of pcre2_substring_copy_bynumber() are a pointer to
+       the buffer and a pointer to a variable that contains its length in code
+       units.   This  is  updated  to  contain the actual number of code units
+       used, excluding the terminating zero.
+
+       For pcre2_substring_get_bynumber() the third and fourth arguments point
+       to  variables that are updated with a pointer to the new memory and the
+       number of code units that comprise the substring, again  excluding  the
+       terminating  zero.  When  the substring is no longer needed, the memory
+       should be freed by calling pcre2_substring_free().
+
+       The return value from these functions is zero for success,  or  one  of
+       these error codes:
+
+         PCRE2_ERROR_NOMEMORY
+
+       The  buffer  was  too small for pcre2_substring_copy_bynumber(), or the
+       attempt to get memory failed for pcre2_substring_get_bynumber().
+
+         PCRE2_ERROR_NOSUBSTRING
+
+       No substring with the given number was captured. This could be  because
+       there  is  no capturing group of that number in the pattern, or because
+       the group with that number did not participate in the match, or because
+       the ovector was too small to capture that group.
+
+
+EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
+
+       int pcre2_substring_list_get(pcre2_match_data *match_data,
+         PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
+
+       void pcre2_substring_list_free(PCRE2_SPTR *list);
+
+       The  pcre2_substring_list_get()  function  extracts  all available sub-
+       strings and builds a list of pointers to them, and a second  list  that
+       contains  their  lengths  (in code units), excluding a terminating zero
+       that is added to each of them. All this is done in a  single  block  of
+       memory  that is obtained using the same memory allocation function that
+       was used to get the match data block.
+
+       The address of the memory block is returned via listptr, which is  also
+       the start of the list of string pointers. The end of the list is marked
+       by a NULL pointer. The address of the list of lengths is  returned  via
+       lengthsptr.  If your strings do not contain binary zeros and you do not
+       therefore need the lengths, you may supply NULL as the lengthsptr argu-
+       ment  to  disable  the  creation of a list of lengths. The yield of the
+       function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the  mem-
+       ory  block could not be obtained. When the list is no longer needed, it
+       should be freed by calling pcre2_substring_list_free().
+
+       If this function encounters a substring that is unset, which can happen
+       when  capturing subpattern number n+1 matches some part of the subject,
+       but subpattern n has not been used at all, it returns an empty  string.
+       This  can  be  distinguished  from  a  genuine zero-length substring by
+       inspecting the  appropriate  offset  in  the  ovector,  which  contains
+       PCRE2_UNSET for unset substrings.
+
+
+EXTRACTING CAPTURED SUBSTRINGS BY NAME
+
+       int pcre2_substring_number_from_name(const pcre2_code *code,
+         PCRE2_SPTR name);
+
+       int pcre2_substring_length_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_SIZE *length);
+
+       int pcre2_substring_copy_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen);
+
+       int pcre2_substring_get_byname(pcre2_match_data *match_data,
+         PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen);
+
+       void pcre2_substring_free(PCRE2_UCHAR *buffer);
+
+       To  extract a substring by name, you first have to find associated num-
+       ber.  For example, for this pattern:
+
+         (a+)b(?<xxx>\d+)...
+
+       the number of the subpattern called "xxx" is 2. If the name is known to
+       be  unique  (PCRE2_DUPNAMES  was not set), you can find the number from
+       the name by calling pcre2_substring_number_from_name(). The first argu-
+       ment  is the compiled pattern, and the second is the name. The yield of
+       the function is the subpattern number,  or  PCRE2_ERROR_NOSUBSTRING  if
+       there is no subpattern of that name.
+
+       Given the number, you can extract the substring directly, or use one of
+       the functions described in the previous section. For convenience, there
+       are  also  "byname"  functions  that correspond to the "bynumber" func-
+       tions, the only difference being that the second  argument  is  a  name
+       instead  of  a number.  However, if PCRE2_DUPNAMES is set and there are
+       duplicate names, the behaviour may not be what you want (see  the  next
+       section).
+
+       Warning: If the pattern uses the (?| feature to set up multiple subpat-
+       terns with the same number, as described in the  section  on  duplicate
+       subpattern  numbers  in  the pcre2pattern page, you cannot use names to
+       distinguish the different subpatterns, because names are  not  included
+       in  the compiled code. The matching process uses only numbers. For this
+       reason, the use of different names for subpatterns of the  same  number
+       causes an error at compile time.
+
+
+DUPLICATE SUBPATTERN NAMES
+
+       int pcre2_substring_nametable_scan(const pcre2_code *code,
+         PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
+
+       When  a  pattern  is compiled with the PCRE2_DUPNAMES option, names for
+       subpatterns are not required to be unique. Duplicate names  are  always
+       allowed  for subpatterns with the same number, created by using the (?|
+       feature. Indeed, if such subpatterns are named, they  are  required  to
+       use the same names.
+
+       Normally, patterns with duplicate names are such that in any one match,
+       only one of the named subpatterns participates. An example is shown  in
+       the pcre2pattern documentation.
+
+       When   duplicates   are   present,   pcre2_substring_copy_byname()  and
+       pcre2_substring_get_byname() return the first  substring  corresponding
+       to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
+       is returned. The  pcre2_substring_number_from_name()  function  returns
+       one  of  the  numbers  that are associated with the name, but it is not
+       defined which it is.
+
+       If you want to get full details of all captured substrings for a  given
+       name,  you  must use the pcre2_substring_nametable_scan() function. The
+       first argument is the compiled pattern, and the second is the name.  If
+       the  third  and fourth arguments are NULL, the function returns a group
+       number (it is not defined which). Otherwise, the third and fourth argu-
+       ments  must  be pointers to variables that are updated by the function.
+       After it has run, they point to the first and last entries in the name-
+       to-number table for the given name, and the function returns the length
+       of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING  is  returned  if
+       there are no entries for the given name.
+
+       The format of the name table is described above in the section entitled
+       Information about a pattern above.  Given all the relevant entries  for
+       the name, you can extract each of their numbers, and hence the captured
+       data.
+
+
+FINDING ALL POSSIBLE MATCHES
+
+       The traditional matching function uses a  similar  algorithm  to  Perl,
+       which stops when it finds the first match, starting at a given point in
+       the subject. If you want to find all possible matches, or  the  longest
+       possible  match  at  a  given  position, consider using the alternative
+       matching function (see below) instead.  If you cannot use the  alterna-
+       tive function, you can kludge it up by making use of the callout facil-
+       ity, which is described in the pcre2callout documentation.
+
+       What you have to do is to insert a callout right at the end of the pat-
+       tern.   When your callout function is called, extract and save the cur-
+       rent matched substring. Then return 1, which  forces  pcre2_match()  to
+       backtrack  and  try other alternatives. Ultimately, when it runs out of
+       matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
+
+
+MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
+
+       int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject,
+         PCRE2_SIZE length, PCRE2_SIZE startoffset,
+         uint32_t options, pcre2_match_data *match_data,
+         pcre2_match_context *mcontext,
+         int *workspace, PCRE2_SIZE wscount);
+
+       The function pcre2_dfa_match() is called  to  match  a  subject  string
+       against  a  compiled pattern, using a matching algorithm that scans the
+       subject string just once, and does not backtrack.  This  has  different
+       characteristics  to  the  normal  algorithm, and is not compatible with
+       Perl. Some of the features of PCRE2 patterns are not supported.  Never-
+       theless,  there are times when this kind of matching can be useful. For
+       a discussion of the two matching algorithms, and  a  list  of  features
+       that pcre2_dfa_match() does not support, see the pcre2matching documen-
+       tation.
+
+       The arguments for the pcre2_dfa_match() function are the  same  as  for
+       pcre2_match(), plus two extras. The ovector within the match data block
+       is used in a different way, and this is described below. The other com-
+       mon  arguments  are used in the same way as for pcre2_match(), so their
+       description is not repeated here.
+
+       The two additional arguments provide workspace for  the  function.  The
+       workspace  vector  should  contain at least 20 elements. It is used for
+       keeping  track  of  multiple  paths  through  the  pattern  tree.  More
+       workspace  is needed for patterns and subjects where there are a lot of
+       potential matches.
+
+       Here is an example of a simple call to pcre2_dfa_match():
+
+         int wspace[20];
+         pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+         int rc = pcre2_dfa_match(
+           re,             /* result of pcre2_compile() */
+           "some string",  /* the subject string */
+           11,             /* the length of the subject string */
+           0,              /* start at offset 0 in the subject */
+           0,              /* default options */
+           match_data,     /* the match data block */
+           NULL,           /* a match context; NULL means use defaults */
+           wspace,         /* working space vector */
+           20);            /* number of elements (NOT size in bytes) */
+
+   Option bits for pcre_dfa_match()
+
+       The unused bits of the options argument for pcre2_dfa_match()  must  be
+       zero.  The  only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
+       PCRE2_NOTEOL,          PCRE2_NOTEMPTY,          PCRE2_NOTEMPTY_ATSTART,
+       PCRE2_NO_UTF_CHECK,     PCRE2_NO_START_OPTIMIZE,    PCRE2_PARTIAL_HARD,
+       PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All  but
+       the  last  four  of these are exactly the same as for pcre2_match(), so
+       their description is not repeated here.
+
+         PCRE2_PARTIAL_HARD
+         PCRE2_PARTIAL_SOFT
+
+       These have the same general effect as they do  for  pcre2_match(),  but
+       the  details are slightly different. When PCRE2_PARTIAL_HARD is set for
+       pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if  the  end  of  the
+       subject is reached and there is still at least one matching possibility
+       that requires additional characters. This happens even if some complete
+       matches  have  already  been found. When PCRE2_PARTIAL_SOFT is set, the
+       return code PCRE2_ERROR_NOMATCH is converted  into  PCRE2_ERROR_PARTIAL
+       if  the  end  of  the  subject  is reached, there have been no complete
+       matches, but there is still at least one matching possibility. The por-
+       tion  of  the  string that was inspected when the longest partial match
+       was found is set as the first matching string in both cases. There is a
+       more  detailed  discussion  of partial and multi-segment matching, with
+       examples, in the pcre2partial documentation.
+
+         PCRE2_DFA_SHORTEST
+
+       Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm  to
+       stop as soon as it has found one match. Because of the way the alterna-
+       tive algorithm works, this is necessarily the shortest  possible  match
+       at the first possible matching point in the subject string.
+
+         PCRE2_DFA_RESTART
+
+       When  pcre2_dfa_match() returns a partial match, it is possible to call
+       it again, with additional subject characters, and have it continue with
+       the same match. The PCRE2_DFA_RESTART option requests this action; when
+       it is set, the workspace and wscount options must  reference  the  same
+       vector  as  before  because data about the match so far is left in them
+       after a partial match. There is more discussion of this facility in the
+       pcre2partial documentation.
+
+   Successful returns from pcre2_dfa_match()
+
+       When pcre2_dfa_match() succeeds, it may have matched more than one sub-
+       string in the subject. Note, however, that all the matches from one run
+       of  the  function  start  at the same point in the subject. The shorter
+       matches are all initial substrings of the longer matches. For  example,
+       if the pattern
+
+         <.*>
+
+       is matched against the string
+
+         This is <something> <something else> <something further> no more
+
+       the three matched strings are
+
+         <something>
+         <something> <something else>
+         <something> <something else> <something further>
+
+       On  success,  the  yield of the function is a number greater than zero,
+       which is the number of matched substrings.  The  offsets  of  the  sub-
+       strings  are  returned in the ovector, and can be extracted in the same
+       way as for pcre2_match().   They  are  returned  in  reverse  order  of
+       length;  that  is, the longest matching string is given first. If there
+       were too many matches to fit into the ovector, the yield of  the  func-
+       tion is zero, and the vector is filled with the longest matches.
+
+       NOTE:  PCRE2's  "auto-possessification" optimization usually applies to
+       character repeats at the end of a pattern (as well as internally).  For
+       example,  the  pattern "a\d+" is compiled as if it were "a\d++" because
+       there is no point in backtracking into the  repeated  digits.  For  DFA
+       matching,  this  means  that  only  one possible match is found. If you
+       really do want multiple matches in such cases, either use  an  ungreedy
+       repeat  ("a\d+?")  or set the PCRE2_NO_AUTO_POSSESS option when compil-
+       ing.
+
+   Error returns from pcre2_dfa_match()
+
+       The pcre2_dfa_match() function returns a negative number when it fails.
+       Many  of  the  errors  are  the same as for pcre2_match(), as described
+       above.  There are in addition the following errors that are specific to
+       pcre2_dfa_match():
+
+         PCRE2_ERROR_DFA_UITEM
+
+       This  return  is  given  if pcre2_dfa_match() encounters an item in the
+       pattern that it does not support, for instance, the use of \C or a back
+       reference.
+
+         PCRE2_ERROR_DFA_UCOND
+
+       This  return  is given if pcre2_dfa_match() encounters a condition item
+       that uses a back reference for the condition, or a test  for  recursion
+       in a specific group. These are not supported.
+
+         PCRE2_ERROR_DFA_WSSIZE
+
+       This  return  is  given  if  pcre2_dfa_match() runs out of space in the
+       workspace vector.
+
+         PCRE2_ERROR_DFA_RECURSE
+
+       When a recursive subpattern is processed, the matching  function  calls
+       itself recursively, using private memory for the ovector and workspace.
+       This error is given if the internal ovector is not large  enough.  This
+       should be extremely rare, as a vector of size 1000 is used.
+
+         PCRE2_ERROR_DFA_BADRESTART
+
+       When  pcre2_dfa_match()  is  called  with the pcre2_dfa_RESTART option,
+       some plausibility checks are made on the  contents  of  the  workspace,
+       which  should  contain data about the previous partial match. If any of
+       these checks fail, this error is given.
+
+
+SEE ALSO
+
+       pcre2build(3),   pcre2libs(3),    pcre2callout(3),    pcre2matching(3),
+       pcre2partial(3),     pcre2posix(3),    pcre2demo(3),    pcre2sample(3),
+       pcre2stack(3).
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 16 September 2014
+       Copyright (c) 1997-2014 University of Cambridge.
+------------------------------------------------------------------------------
+ 
+ 
+PCRE2CALLOUT(3)            Library Functions Manual            PCRE2CALLOUT(3)
+
+
+
+NAME
+       PCRE2 - Perl-compatible regular expressions (revised API)
+
+SYNOPSIS
+
+       #include <pcre2.h>
+
+       int (*pcre2_callout)(pcre2_callout_block *);
+
+
+DESCRIPTION
+
+       PCRE2  provides  a feature called "callout", which is a means of tempo-
+       rarily passing control to the caller of PCRE2 in the middle of  pattern
+       matching.  The caller of PCRE2 provides an external function by putting
+       its entry point in a match context  (see  pcre2_set_callout())  in  the
+       pcre2api documentation).
+
+       Within  a  regular  expression,  (?C) indicates the points at which the
+       external function is to be called.  Different  callout  points  can  be
+       identified  by  putting  a number less than 256 after the letter C. The
+       default value is zero.  For  example,  this  pattern  has  two  callout
+       points:
+
+         (?C1)abc(?C2)def
+
+       If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled,
+       PCRE2 automatically inserts callouts, all with number 255, before  each
+       item  in  the  pattern. For example, if PCRE2_AUTO_CALLOUT is used with
+       the pattern
+
+         A(\d{2}|--)
+
+       it is processed as if it were
+
+       (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+
+       Notice that there is a callout before and after  each  parenthesis  and
+       alternation bar. If the pattern contains a conditional group whose con-
+       dition is an assertion, an automatic callout  is  inserted  immediately
+       before  the  condition. Such a callout may also be inserted explicitly,
+       for example:
+
+         (?(?C9)(?=a)ab|de)
+
+       This applies only to assertion conditions (because they are  themselves
+       independent groups).
+
+       Automatic  callouts  can  be  used for tracking the progress of pattern
+       matching.  The pcre2test program has a pattern  qualifier  (/auto_call-
+       out)  that  sets  automatic callouts; when it is used, the output indi-
+       cates how the pattern is being matched. This is useful information when
+       you are trying to optimize the performance of a particular pattern.
+
+
+MISSING CALLOUTS
+
+       You  should  be  aware  that, because of optimizations in the way PCRE2
+       compiles and matches patterns, callouts sometimes do not happen exactly
+       as you might expect.
+
+       At compile time, PCRE2 "auto-possessifies" repeated items when it knows
+       that what follows cannot be part of the repeat. For example, a+[bc]  is
+       compiled  as if it were a++[bc]. The pcre2test output when this pattern
+       is anchored and then applied with  automatic  callouts  to  the  string
+       "aaaa" is:
+
+         --->aaaa
+          +0 ^        ^
+          +1 ^        a+
+          +3 ^   ^    [bc]
+         No match
+
+       This  indicates that when matching [bc] fails, there is no backtracking
+       into a+ and therefore the callouts that would be taken  for  the  back-
+       tracks  do  not  occur.  You can disable the auto-possessify feature by
+       passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting the  pat-
+       tern  with  (*NO_AUTO_POSSESS). If this is done in pcre2test (using the
+       /no_auto_possess qualifier), the output changes to this:
+
+         --->aaaa
+          +0 ^        ^
+          +1 ^        a+
+          +3 ^   ^    [bc]
+          +3 ^  ^     [bc]
+          +3 ^ ^      [bc]
+          +3 ^^       [bc]
+         No match
+
+       This time, when matching [bc] fails, the matcher backtracks into a+ and
+       tries again, repeatedly, until a+ itself fails.
+
+       Other  optimizations  that  provide fast "no match" results also affect
+       callouts.  For example, if the pattern is
+
+         ab(?C4)cd
+
+       PCRE2 knows that any matching string must contain the  letter  "d".  If
+       the  subject  string  is  "abyz",  the  lack of "d" means that matching
+       doesn't ever start, and the callout is  never  reached.  However,  with
+       "abyd", though the result is still no match, the callout is obeyed.
+
+       PCRE2  also  knows  the  minimum  length of a matching string, and will
+       immediately give a "no match" return without actually running  a  match
+       if  the  subject is not long enough, or, for unanchored patterns, if it
+       has been scanned far enough.
+
+       You can disable these optimizations by passing the PCRE2_NO_START_OPTI-
+       MIZE  option  to the matching function, or by starting the pattern with
+       (*NO_START_OPT). This slows down the matching process, but does  ensure
+       that callouts such as the example above are obeyed.
+
+
+THE CALLOUT INTERFACE
+
+       During matching, when PCRE2 reaches a callout point, the external func-
+       tion that is set in the match context is called (if it  is  set).  This
+       applies to both normal and DFA matching. The only argument to the call-
+       out function is a pointer to a pcre2_callout block. This structure con-
+       tains the following fields:
+
+         uint32_t      version;
+         uint32_t      callout_number;
+         uint32_t      capture_top;
+         uint32_t      capture_last;
+         void         *callout_data;
+         PCRE2_SIZE   *offset_vector;
+         PCRE2_SPTR    mark;
+         PCRE2_SPTR    subject;
+         PCRE2_SIZE    subject_length;
+         PCRE2_SIZE    start_match;
+         PCRE2_SIZE    current_position;
+         PCRE2_SIZE    pattern_position;
+         PCRE2_SIZE    next_item_length;
+
+       The  version field contains the version number of the block format. The
+       current version is 0. The version number will change in future if addi-
+       tional  fields  are  added, but the intention is never to remove any of
+       the existing fields.
+
+       The callout_number field contains the number of the  callout,  as  com-
+       piled  into  the pattern (that is, the number after ?C for manual call-
+       outs, and 255 for automatically generated callouts).
+
+       The offset_vector field is a pointer to the vector of capturing offsets
+       (the  "ovector")  that was passed to the matching function in the match
+       data block. When pcre2_match() is used, the contents can be  inspected,
+       in  order  to  extract substrings that have been matched so far, in the
+       same way as for extracting substrings after a match has completed.  For
+       the DFA matching function, this field is not useful.
+
+       The subject and subject_length fields contain copies of the values that
+       were passed to the matching function.
+
+       The start_match field normally contains the offset within  the  subject
+       at  which  the  current  match  attempt started. However, if the escape
+       sequence \K has been encountered, this value is changed to reflect  the
+       modified  starting  point.  If the pattern is not anchored, the callout
+       function may be called several times from the same point in the pattern
+       for different starting points in the subject.
+
+       The  current_position  field  contains the offset within the subject of
+       the current match pointer.
+
+       When the pcre2_match() is used, the capture_top field contains one more
+       than  the  number of the highest numbered captured substring so far. If
+       no substrings have been captured, the value of capture_top is one. This
+       is always the case when the DFA functions are used, because they do not
+       support captured substrings.
+
+       The capture_last field contains the number of the  most  recently  cap-
+       tured  substring. However, when a recursion exits, the value reverts to
+       what it was outside the recursion, as do the  values  of  all  captured
+       substrings.  If  no  substrings  have  been captured, the value of cap-
+       ture_last is 0. This is always the case for the DFA matching functions.
+
+       The callout_data field contains a value that is passed  to  a  matching
+       function  specifically so that it can be passed back in callouts. It is
+       set in the match  context  when  the  callout  is  set  up  by  calling
+       pcre2_set_callout() (see the pcre2api documentation).
+
+       The  pattern_position  field contains the offset to the next item to be
+       matched in the pattern string.
+
+       The next_item_length field contains the length of the next item  to  be
+       matched in the pattern string. When the callout immediately precedes an
+       alternation bar, a closing parenthesis, or the end of the pattern,  the
+       length  is  zero. When the callout precedes an opening parenthesis, the
+       length is that of the entire subpattern.
+
+       The pattern_position and next_item_length fields are intended  to  help
+       in  distinguishing between different automatic callouts, which all have
+       the same callout number. However, they are set for all callouts.
+
+       In callouts from pcre2_match() the mark field contains a pointer to the
+       zero-terminated  name of the most recently passed (*MARK), (*PRUNE), or
+       (*THEN) item in the match, or NULL if no such items have  been  passed.
+       Instances  of  (*PRUNE)  or  (*THEN) without a name do not obliterate a
+       previous (*MARK). In callouts from the DFA matching function this field
+       always contains NULL.
+
+
+RETURN VALUES
+
+       The external callout function returns an integer to PCRE2. If the value
+       is zero, matching proceeds as normal. If  the  value  is  greater  than
+       zero,  matching  fails  at  the current point, but the testing of other
+       matching possibilities goes ahead, just as if a lookahead assertion had
+       failed. If the value is less than zero, the match is abandoned, and the
+       matching function returns the negative value.
+
+       Negative  values  should  normally  be   chosen   from   the   set   of
+       PCRE2_ERROR_xxx  values.  In  particular,  PCRE2_ERROR_NOMATCH forces a
+       standard "no match" failure. The error  number  PCRE2_ERROR_CALLOUT  is
+       reserved  for  use by callout functions; it will never be used by PCRE2
+       itself.
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 19 October 2014
+       Copyright (c) 1997-2014 University of Cambridge.
+------------------------------------------------------------------------------
+ 
+ 
+PCRE2UNICODE(3)            Library Functions Manual            PCRE2UNICODE(3)
+
+
+
+NAME
+       PCRE - Perl-compatible regular expressions (revised API)
+
+UNICODE AND UTF SUPPORT
+
+       When PCRE2 is built with Unicode support, it acquires knowledge of Uni-
+       code character properties  and  can  process  text  strings  in  UTF-8,
+       UTF-16,  or  UTF-32  format  (depending  on  the  code  unit width). By
+       default, PCRE2 assumes that one code unit is one character. To  process
+       a  pattern as a UTF string, where a character may require more than one
+       code unit, you must call  pcre2_compile()  with  the  PCRE2_UTF  option
+       flag,  or  the pattern must start with the sequence (*UTF). When either
+       of these is the case, both the pattern and any subject strings that are
+       matched  against  it  are  treated as UTF strings instead of strings of
+       individual one-code-unit characters.
+
+       If you build PCRE2 with Unicode support, the library  will  be  bigger,
+       but  the  additional  run  time  overhead  is  limited  to  testing the
+       PCRE2_UTF flag occasionally, so should not be very much.
+
+
+UNICODE PROPERTY SUPPORT
+
+       When PCRE2 is built with Unicode support, the escape sequences  \p{..},
+       \P{..},  and  \X can be used. The Unicode properties that can be tested
+       are limited to the general category properties such as Lu for an  upper
+       case  letter  or Nd for a decimal number, the Unicode script names such
+       as Arabic or Han, and the derived properties Any and L&. Full lists are
+       given in the pcre2pattern and pcre2syntax documentation. Only the short
+       names for properties are supported. For example, \p{L} matches  a  let-
+       ter.  Its  Perl synonym, \p{Letter}, is not supported.  Furthermore, in
+       Perl, many properties may optionally be prefixed by "Is", for  compati-
+       bility with Perl 5.6. PCRE does not support this.
+
+
+WIDE CHARACTERS AND UTF MODES
+
+       Codepoints  less than 256 can be specified in patterns by either braced
+       or unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3).
+       Larger  values have to use braced sequences. Unbraced octal code points
+       up to \777 are also recognized; larger ones can be coded using \o{...}.
+
+       In UTF modes, repeat quantifiers apply to complete UTF characters,  not
+       to individual code units.
+
+       In  UTF  modes, the dot metacharacter matches one UTF character instead
+       of a single code unit.
+
+       The escape sequence \C can be used to match a single code  unit,  in  a
+       UTF  mode,  but  its  use  can  lead to some strange effects because it
+       breaks up multi-unit characters (see  the  description  of  \C  in  the
+       pcre2pattern  documentation).  The  use  of  \C is not supported in the
+       alternative matching function pcre2_dfa_exec(), nor is it supported  in
+       UTF  mode by the JIT optimization. If JIT optimization is requested for
+       a UTF pattern that contains \C, it will not succeed, and so the  match-
+       ing will be carried out by the normal interpretive function.
+
+       The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
+       characters of any code value, but,  by  default,  the  characters  that
+       PCRE2  recognizes as digits, spaces, or word characters remain the same
+       set as in non-UTF mode, all  with  code  points  less  than  256.  This
+       remains  true  even  when  PCRE2  is  built to include Unicode support,
+       because to do otherwise would slow down matching in many common  cases.
+       Note  that  this also applies to \b and \B, because they are defined in
+       terms of \w and \W. If you want to test for  a  wider  sense  of,  say,
+       "digit",  you  can  use explicit Unicode property tests such as \p{Nd}.
+       Alternatively, if you set the PCRE2_UCP option, the way that the  char-
+       acter  escapes  work  is changed so that Unicode properties are used to
+       determine which characters match. There are more details in the section
+       on generic character types in the pcre2pattern documentation.
+
+       Similarly,  characters that match the POSIX named character classes are
+       all low-valued characters, unless the PCRE2_UCP option is set.
+
+       However, the special  horizontal  and  vertical  white  space  matching
+       escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
+       acters, whether or not PCRE2_UCP is set.
+
+       Case-insensitive matching in UTF mode makes use of Unicode  properties.
+       A  few  Unicode characters such as Greek sigma have more than two code-
+       points that are case-equivalent, and these are treated as such.
+
+
+VALIDITY OF UTF STRINGS
+
+       When the PCRE2_UTF option is set, the strings passed  as  patterns  and
+       subjects are (by default) checked for validity on entry to the relevant
+       functions.  If an invalid UTF string is  passed,  an  error  return  is
+       given.
+
+       UTF-16 and UTF-32 strings can indicate their endianness by special code
+       knows as a byte-order mark (BOM). The PCRE2  functions  do  not  handle
+       this, expecting strings to be in host byte order.
+
+       The  entire  string is checked before any other processing takes place.
+       In addition to checking the format of the string, there is a  check  to
+       ensure that all code points lie in the range U+0 to U+10FFFF, excluding
+       the surrogate area.  The so-called "non-character" code points are  not
+       excluded because Unicode corrigendum #9 makes it clear that they should
+       not be.
+
+       Characters in the "Surrogate Area" of Unicode are reserved for  use  by
+       UTF-16,  where they are used in pairs to encode code points with values
+       greater than 0xFFFF. The code points that are encoded by  UTF-16  pairs
+       are  available  independently  in  the  UTF-8 and UTF-32 encodings. (In
+       other words, the whole surrogate thing is  a  fudge  for  UTF-16  which
+       unfortunately messes up UTF-8 and UTF-32.)
+
+       In  some  situations, you may already know that your strings are valid,
+       and therefore want to skip these checks in  order  to  improve  perfor-
+       mance,  for  example in the case of a long subject string that is being
+       scanned repeatedly.  If you set the PCRE2_NO_UTF_CHECK flag at  compile
+       time  or  at  run time, PCRE2 assumes that the pattern or subject it is
+       given (respectively) contains only valid UTF code unit sequences.
+
+       Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables  the  check
+       for the pattern; it does not also apply to subject strings. If you want
+       to disable the check for a subject string you must pass this option  to
+       pcre2_exec() or pcre2_dfa_exec().
+
+       If  you  pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the
+       result is undefined and your program may crash or loop indefinitely.
+
+   Errors in UTF-8 strings
+
+       The following negative error codes are given for invalid UTF-8 strings:
+
+         PCRE2_ERROR_UTF8_ERR1
+         PCRE2_ERROR_UTF8_ERR2
+         PCRE2_ERROR_UTF8_ERR3
+         PCRE2_ERROR_UTF8_ERR4
+         PCRE2_ERROR_UTF8_ERR5
+
+       The string ends with a truncated UTF-8 character;  the  code  specifies
+       how  many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8
+       characters to be no longer than 4 bytes, the  encoding  scheme  (origi-
+       nally  defined  by  RFC  2279)  allows  for  up to 6 bytes, and this is
+       checked first; hence the possibility of 4 or 5 missing bytes.
+
+         PCRE2_ERROR_UTF8_ERR6
+         PCRE2_ERROR_UTF8_ERR7
+         PCRE2_ERROR_UTF8_ERR8
+         PCRE2_ERROR_UTF8_ERR9
+         PCRE2_ERROR_UTF8_ERR10
+
+       The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of
+       the  character  do  not have the binary value 0b10 (that is, either the
+       most significant bit is 0, or the next bit is 1).
+
+         PCRE2_ERROR_UTF8_ERR11
+         PCRE2_ERROR_UTF8_ERR12
+
+       A character that is valid by the RFC 2279 rules is either 5 or 6  bytes
+       long; these code points are excluded by RFC 3629.
+
+         PCRE2_ERROR_UTF8_ERR13
+
+       A  4-byte character has a value greater than 0x10fff; these code points
+       are excluded by RFC 3629.
+
+         PCRE2_ERROR_UTF8_ERR14
+
+       A 3-byte character has a value in the  range  0xd800  to  0xdfff;  this
+       range  of code points are reserved by RFC 3629 for use with UTF-16, and
+       so are excluded from UTF-8.
+
+         PCRE2_ERROR_UTF8_ERR15
+         PCRE2_ERROR_UTF8_ERR16
+         PCRE2_ERROR_UTF8_ERR17
+         PCRE2_ERROR_UTF8_ERR18
+         PCRE2_ERROR_UTF8_ERR19
+
+       A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it  codes
+       for  a  value that can be represented by fewer bytes, which is invalid.
+       For example, the two bytes 0xc0, 0xae give the value 0x2e,  whose  cor-
+       rect coding uses just one byte.
+
+         PCRE2_ERROR_UTF8_ERR20
+
+       The two most significant bits of the first byte of a character have the
+       binary value 0b10 (that is, the most significant bit is 1 and the  sec-
+       ond  is  0). Such a byte can only validly occur as the second or subse-
+       quent byte of a multi-byte character.
+
+         PCRE2_ERROR_UTF8_ERR21
+
+       The first byte of a character has the value 0xfe or 0xff. These  values
+       can never occur in a valid UTF-8 string.
+
+   Errors in UTF-16 strings
+
+       The  following  negative  error  codes  are  given  for  invalid UTF-16
+       strings:
+
+         PCRE_UTF16_ERR1  Missing low surrogate at end of string
+         PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
+         PCRE_UTF16_ERR3  Isolated low surrogate
+
+
+   Errors in UTF-32 strings
+
+       The following  negative  error  codes  are  given  for  invalid  UTF-32
+       strings:
+
+         PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
+         PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 16 September 2014
+       Copyright (c) 1997-2014 University of Cambridge.
+------------------------------------------------------------------------------
+ 
+ 
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index b7b350e..f5528eb 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -214,7 +214,7 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
 .sp
 .B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP, PCRE2_SIZE \fIlength\fP);
-.sp
+.fi
 .
 .
 .SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3
new file mode 100644
index 0000000..13535b2
--- /dev/null
+++ b/doc/pcre2demo.3
@@ -0,0 +1,441 @@
+.\" Start example.
+.de EX
+.  nr mE \\n(.f
+.  nf
+.  nh
+.  ft CW
+..
+.
+.
+.\" End example.
+.de EE
+.  ft \\n(mE
+.  fi
+.  hy \\n(HY
+..
+.
+.EX
+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+calling the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit 
+width. This demonstration program uses the 8-bit library.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* This macro must be defined before including pcre2.h. For a program that uses 
+only one code unit width, it makes it possible to use generic function names 
+such as pcre2_compile(). */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <stdio.h>
+#include <string.h>
+#include <pcre2.h>
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int namecount;
+int name_entry_size;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+
+size_t subject_length;
+pcre2_match_data *match_data;
+
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present. Apart from that, there must be exactly two *
+* arguments.                                                              *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i < argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+    else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Two arguments required: a regex and a subject string\en");
+  return 1;
+  }
+
+/* As pattern and subject are char arguments, they can be straightforwardly
+cast to PCRE2_SPTR as we are working in 8-bit code units. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,              /* the pattern */
+  -1,                   /* indicates pattern is zero-terminated */ 
+  0,                    /* default options */
+  &errornumber,         /* for error number */
+  &erroroffset,         /* for error offset */
+  NULL);                /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256]; 
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, 
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE again, in order to do a     *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result.        *
+*************************************************************************/
+
+/* Using this function ensures that the block is exactly the right size for
+the number of capturing parentheses in the pattern. */
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc < 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\en"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\en", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /* data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeded. Get a pointer to the output vector, where string offsets are 
+stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used 
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\en");
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i < rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  size_t substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &namecount);          /* where to put the answer */
+
+if (namecount <= 0) printf("No named substrings\en"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\en");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two 
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i < namecount; i++)
+    {
+    int n = (tabptr[0] << 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */ 
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
+utf8 = (option_bits & PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF; 
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                    /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];  /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &&                      /* If CRLF is newline & */
+        start_offset < subject_length - 1 &&    /* we are at CRLF, */
+        subject[start_offset] == '\er' &&
+        subject[start_offset + 1] == '\en')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] < subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc < 0)
+    {
+    printf("Matching error %d\en", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeded */
+
+  printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\en");
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i < rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount <= 0) printf("No named substrings\en"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\en");
+    for (i = 0; i < namecount; i++)
+      {
+      int n = (tabptr[0] << 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\en");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+.EE
diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index 1da6dfa..71be47d 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -154,7 +154,7 @@ Do not output the version number of \fBpcre2test\fP at the start of execution.
 \fB-S\fP \fIsize\fP
 On Unix-like systems, set the size of the run-time stack to \fIsize\fP
 megabytes.
-.TP10
+.TP 10
 \fB-subject\fP \fImodifier-list\fP
 Behave as if each subject line contains the given modifiers.
 .TP 10
@@ -366,7 +366,7 @@ include a closing square bracket in the characters, code it as \ex5D.
 A backslash followed by an equals sign marke the end of the subject string and
 the start of a modifier list. For example:
 .sp
-  abc\=notbol,notempty
+  abc\e=notbol,notempty
 .sp
 A backslash followed by any other non-alphanumeric character just escapes that
 character. A backslash followed by anything else causes an error. However, if
@@ -746,7 +746,7 @@ the actual match are indicated in the output by '<' or '>' characters
 underneath them. Here is an example:
 .sp
   /(?<=pqr)abc(?=xyz)/
-      123pqrabcxyz456\=allusedtext
+      123pqrabcxyz456\e=allusedtext
    0: pqrabcxyz
       <<<   >>>
 .sp
@@ -789,7 +789,7 @@ The \fBcopy\fP and \fBget\fP modifiers can be used to test the
 They can be given more than once, and each can specify a group name or number,
 for example:
 .sp
-   abcd\=copy=1,copy=3,get=G1
+   abcd\e=copy=1,copy=3,get=G1
 .sp
 If the \fB#subject\fP command is used to set default copy and get lists, these
 can be unset by specifying a negative number for numbered groups and an empty
diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
new file mode 100644
index 0000000..34c1a14
--- /dev/null
+++ b/doc/pcre2test.txt
@@ -0,0 +1,1073 @@
+PCRE2TEST(1)                General Commands Manual               PCRE2TEST(1)
+
+
+
+NAME
+       pcre2test - a program for testing Perl-compatible regular expressions.
+
+SYNOPSIS
+
+       pcre2test [options] [input file [output file]]
+
+       pcre2test is a test program for the PCRE2 regular expression libraries,
+       but it can also be used for  experimenting  with  regular  expressions.
+       This  document  describes the features of the test program; for details
+       of the regular expressions themselves, see the pcre2pattern  documenta-
+       tion.  For  details  of  the  PCRE2  library  function  calls and their
+       options, see the pcre2api documentation.
+
+       The input for pcre2test is a sequence of  regular  expression  patterns
+       and  subject strings to be matched. The output shows the result of each
+       match attempt. Modifiers on the command line,  the  patterns,  and  the
+       subject  lines  specify PCRE2 function options, control how the subject
+       is processed, and what output is produced.
+
+       As the original fairly simple PCRE library evolved,  it  acquired  many
+       different  features,  and  as  a  result, the original pcretest program
+       ended up with a lot of options in a messy, arcane syntax,  for  testing
+       all the features. The move to the new PCRE2 API provided an opportunity
+       to re-implement the test program as pcre2test, with a cleaner  modifier
+       syntax.  Nevertheless,  there are still many obscure modifiers, some of
+       which are specifically designed for use in conjunction  with  the  test
+       script  and  data  files that are distributed as part of PCRE2. All the
+       modifiers are documented here, some  without  much  justification,  but
+       many  of  them  are  unlikely  to  be  of  use  except when testing the
+       libraries.
+
+
+PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
+
+       Different versions of the PCRE2 library can be built to support charac-
+       ter  strings  that  are encoded in 8-bit, 16-bit, or 32-bit code units.
+       One, two, or  all  three  of  these  libraries  may  be  simultaneously
+       installed. The pcre2test program can be used to test all the libraries.
+       However, its own input and output are  always  in  8-bit  format.  When
+       testing  the  16-bit  or 32-bit libraries, patterns and subject strings
+       are converted to 16- or  32-bit  format  before  being  passed  to  the
+       library  functions.  Results are converted back to 8-bit code units for
+       output.
+
+       In the rest of this document, the names of library functions and struc-
+       tures  are  given  in  generic  form,  for example, pcre_compile(). The
+       actual names used in the libraries have a suffix _8, _16,  or  _32,  as
+       appropriate.
+
+
+INPUT ENCODING
+
+       Input  to  pcre2test is processed line by line, either by calling the C
+       library's fgets() function, or via the libreadline library (see below).
+       In  Unix-like environments, fgets() treats any bytes other than newline
+       as data characters. However, in some Windows environments character  26
+       (hex  1A) causes an immediate end of file, and no further data is read.
+       For maximum portability, therefore, it is safest to avoid  non-printing
+       characters in pcre2test input files.
+
+
+COMMAND LINE OPTIONS
+
+       -8        If the 8-bit library has been built, this option causes it to
+                 be used (this is the default). If the 8-bit library  has  not
+                 been built, this option causes an error.
+
+       -16       If  the  16-bit library has been built, this option causes it
+                 to be used. If only the 16-bit library has been  built,  this
+                 is  the  default.  If  the 16-bit library has not been built,
+                 this option causes an error.
+
+       -32       If the 32-bit library has been built, this option  causes  it
+                 to  be  used. If only the 32-bit library has been built, this
+                 is the default. If the 32-bit library  has  not  been  built,
+                 this option causes an error.
+
+       -b        Behave  as if each pattern has the /fullbincode modifier; the
+                 full internal binary form of the pattern is output after com-
+                 pilation.
+
+       -C        Output  the  version  number  of  the  PCRE2 library, and all
+                 available information about the optional  features  that  are
+                 included,  and  then  exit  with  zero  exit  code. All other
+                 options are ignored.
+
+       -C option Output information about a specific build-time  option,  then
+                 exit.  This functionality is intended for use in scripts such
+                 as RunTest. The following options output the  value  and  set
+                 the exit code as indicated:
+
+                   ebcdic-nl  the code for LF (= NL) in an EBCDIC environment:
+                                0x15 or 0x25
+                                0 if used in an ASCII environment
+                                exit code is always 0
+                   linksize   the configured internal link size (2, 3, or 4)
+                                exit code is set to the link size
+                   newline    the default newline setting:
+                                CR, LF, CRLF, ANYCRLF, or ANY
+                                exit code is always 0
+                   bsr        the default setting for what \R matches:
+                                ANYCRLF or ANY
+                                exit code is always 0
+
+                 The  following  options output 1 for true or 0 for false, and
+                 set the exit code to the same value:
+
+                   ebcdic     compiled for an EBCDIC environment
+                   jit        just-in-time support is available
+                   pcre16     the 16-bit library was built
+                   pcre32     the 32-bit library was built
+                   pcre8      the 8-bit library was built
+                   unicode    Unicode support is available
+
+                 If an unknown option is given, an error  message  is  output;
+                 the exit code is 0.
+
+       -d        Behave  as if each pattern has the debug modifier; the inter-
+                 nal form and information about the compiled pattern is output
+                 after compilation; -d is equivalent to -b -i.
+
+       -dfa      Behave as if each subject line has the dfa modifier; matching
+                 is done using the pcre2_dfa_match() function instead  of  the
+                 default pcre2_match().
+
+       -help     Output a brief summary these options and then exit.
+
+       -i        Behave as if each pattern has the /info modifier; information
+                 about the compiled pattern is given after compilation.
+
+       -jit      Behave as if each pattern line has the  jit  modifier;  after
+                 successful  compilation,  each pattern is passed to the just-
+                 in-time compiler, if available.
+
+       -pattern modifier-list
+                 Behave as if each pattern line contains the given modifiers.
+
+       -q        Do not output the version number of pcre2test at the start of
+                 execution.
+
+       -S size   On  Unix-like  systems, set the size of the run-time stack to
+                 size megabytes.
+
+       -subject modifier-list
+                 Behave as if each subject line contains the given modifiers.
+
+       -t        Run each compile and match many times with a timer, and  out-
+                 put the resulting times per compile or match. You can control
+                 the number of iterations that are used for timing by  follow-
+                 ing  -t  with  a  number  (as  a separate item on the command
+                 line). For  example,  "-t  1000"  iterates  1000  times.  The
+                 default is to iterate 500,000 times.
+
+       -tm       This is like -t except that it times only the matching phase,
+                 not the compile phase.
+
+       -T -TM    These behave like -t and -tm, but in addition, at the end  of
+                 a  run, the total times for all compiles and matches are out-
+                 put.
+
+       -version  Output the PCRE2 version number and then exit.
+
+
+DESCRIPTION
+
+       If pcre2test is given two filename arguments, it reads from  the  first
+       and writes to the second. If it is given only one filename argument, it
+       reads from that file and writes to stdout.  Otherwise,  it  reads  from
+       stdin  and  writes to stdout, and prompts for each line of input, using
+       "re>" to prompt for regular expression patterns, and "data>" to  prompt
+       for subject lines.
+
+       When  pcre2test  is  built,  a configuration option can specify that it
+       should be linked with the libreadline or libedit library. When this  is
+       done,  if the input is from a terminal, it is read using the readline()
+       function. This provides line-editing and history facilities. The output
+       from the -help option states whether or not readline() will be used.
+
+       The  program  handles  any number of tests, each of which consists of a
+       set of input lines. Each set starts with a regular expression  pattern,
+       followed by any number of subject lines to be matched against that pat-
+       tern. In between sets of test data, command lines  that  begin  with  a
+       hash  (#)  character  may  appear. This file format, with some restric-
+       tions, can also be processed by the perltest.pl script that is distrib-
+       uted  with PCRE2 as a means of checking that the behaviour of PCRE2 and
+       Perl is the same.
+
+       Each subject line is matched separately and independently. If you  want
+       to do multi-line matches, you have to use the \n escape sequence (or \r
+       or \r\n, etc., depending on the newline setting) in a  single  line  of
+       input  to encode the newline sequences. There is no limit on the length
+       of subject lines; the input buffer is automatically extended if  it  is
+       too  small.  There  is  a replication feature that makes it possible to
+       generate long subject lines without having to supply them explicitly.
+
+       An empty line or the end of the file signals the  end  of  the  subject
+       lines  for  a  test,  at  which  point a new pattern or command line is
+       expected if there is still input to be read.
+
+
+COMMAND LINES
+
+       In between sets of test data, a line that begins with a hash (#)  char-
+       acter  is interpreted as a command line. If the first character is fol-
+       lowed by white space or an exclamation mark, the line is treated  as  a
+       comment,  and  ignored.   Otherwise,  the following commands are recog-
+       nized:
+
+         #forbid_utf
+
+       Subsequent  patterns  automatically  have   the   PCRE2_NEVER_UTF   and
+       PCRE2_NEVER_UCP options set, which locks out the use of UTF and Unicode
+       property features. This is a trigger guard that is used in  test  files
+       to  ensure  that  UTF/Unicode tests are not accidentally added to files
+       that are used when UTF support is not included  in  the  library.  This
+       effect  can  also be obtained by the use of #pattern; the difference is
+       that #forbid_utf cannot be unset, and the  automatic  options  are  not
+       displayed in pattern information, to avoid cluttering up test output.
+
+         #pattern <modifier-list>
+
+       This  command  sets  a default modifier list that applies to all subse-
+       quent patterns. Modifiers on a pattern can change these settings.
+
+         #perltest
+
+       The appearance of this line causes all subsequent modifier settings  to
+       be checked for compatibility with the perltest.pl script, which is used
+       to confirm that Perl gives the same results as PCRE2. Also, apart  from
+       comment  lines,  none of the other command lines are permitted, because
+       they and many of the modifiers are specific to  pcre2test,  and  should
+       not  be  used in test files that are also processed by perltest.pl. The
+       #perltest command helps detect tests that are accidentally put  in  the
+       wrong file.
+
+         #subject <modifier-list>
+
+       This  command  sets  a default modifier list that applies to all subse-
+       quent subject lines. Modifiers on a subject line can change these  set-
+       tings.
+
+
+MODIFIER SYNTAX
+
+       Modifier lists are used with both pattern and subject lines. Items in a
+       list are separated by commas and optional white space.  Some  modifiers
+       may  be  given  for both patterns and subject lines, whereas others are
+       valid for one or the other only. Each modifier has  a  long  name,  for
+       example "anchored", and some of them must be followed by an equals sign
+       and a value, for example, "offset=12".  Modifiers that do not take val-
+       ues may be preceded by a minus sign to turn off a previous default set-
+       ting.
+
+       A few of the more common modifiers can also be specified as single let-
+       ters,  for  example "i" for "caseless". In documentation, following the
+       Perl convention, these are written with a slash ("the /i modifier") for
+       clarity.  Abbreviated  modifiers  must all be concatenated in the first
+       item of a modifier list. If the first item is not recognized as a  long
+       modifier  name, it is interpreted as a sequence of these abbreviations.
+       For example:
+
+         /abc/ig,newline=cr,jit=3
+
+       This is a pattern line whose modifier list starts with  two  one-letter
+       modifiers  (/i  and  /g).  The lower-case abbreviated modifiers are the
+       same as used in Perl.
+
+
+PATTERN SYNTAX
+
+       A pattern line must start with one of the following characters  (common
+       symbols, excluding pattern meta-characters):
+
+         / ! " ' ` - = _ : ; , % & @ ~
+
+       This  is  interpreted  as the pattern's delimiter. A regular expression
+       may be continued over several input lines, in which  case  the  newline
+       characters are included within it. It is possible to include the delim-
+       iter within the pattern by escaping it with a backslash, for example
+
+         /abc\/def/
+
+       If you do this, the escape and the delimiter form part of the  pattern,
+       but since the delimiters are all non-alphanumeric, this does not affect
+       its interpretation. If the terminating delimiter  is  immediately  fol-
+       lowed by a backslash, for example,
+
+         /abc/\
+
+       then  a  backslash  is added to the end of the pattern. This is done to
+       provide a way of testing the error condition that arises if  a  pattern
+       finishes with a backslash, because
+
+         /abc\/
+
+       is  interpreted as the first line of a pattern that starts with "abc/",
+       causing pcre2test to read the next line as a continuation of the  regu-
+       lar expression.
+
+       A pattern can be followed by a modifier list (details below).
+
+
+SUBJECT LINE SYNTAX
+
+       Before    each   subject   line   is   passed   to   pcre2_match()   or
+       pcre2_dfa_match(), leading and trailing white space is removed, and the
+       line is scanned for backslash escapes. The following provide a means of
+       encoding non-printing characters in a visible way:
+
+         \a         alarm (BEL, \x07)
+         \b         backspace (\x08)
+         \e         escape (\x27)
+         \f         form feed (\x0c)
+         \n         newline (\x0a)
+         \r         carriage return (\x0d)
+         \t         tab (\x09)
+         \v         vertical tab (\x0b)
+         \nnn       octal character (up to 3 octal digits); always
+                      a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
+         \o{dd...}  octal character (any number of octal digits}
+         \xhh       hexadecimal byte (up to 2 hex digits)
+         \x{hh...}  hexadecimal character (any number of hex digits)
+
+       The use of \x{hh...} is not dependent on the use of the utf modifier on
+       the  pattern. It is recognized always. There may be any number of hexa-
+       decimal digits inside the braces; invalid  values  provoke  error  mes-
+       sages.
+
+       Note  that  \xhh  specifies one byte rather than one character in UTF-8
+       mode; this makes it possible to construct invalid UTF-8  sequences  for
+       testing  purposes.  On the other hand, \x{hh} is interpreted as a UTF-8
+       character in UTF-8 mode, generating more than one byte if the value  is
+       greater  than  127.   When testing the 8-bit library not in UTF-8 mode,
+       \x{hh} generates one byte for values less than 256, and causes an error
+       for greater values.
+
+       In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
+       possible to construct invalid UTF-16 sequences for testing purposes.
+
+       In UTF-32 mode, all 4- to 8-digit \x{...}  values  are  accepted.  This
+       makes  it  possible  to  construct invalid UTF-32 sequences for testing
+       purposes.
+
+       There is a special backslash sequence that specifies replication of one
+       or more characters:
+
+         \[<characters>]{<count>}
+
+       This  makes  it possible to test long strings without having to provide
+       them as part of the file. For example:
+
+         \[abc]{4}
+
+       is converted to "abcabcabcabc". This feature does not support  nesting.
+       To include a closing square bracket in the characters, code it as \x5D.
+
+       A  backslash  followed  by  an equals sign marke the end of the subject
+       string and the start of a modifier list. For example:
+
+         abc\=notbol,notempty
+
+       A backslash followed  by  any  other  non-alphanumeric  character  just
+       escapes that character. A backslash followed by anything else causes an
+       error. However, if the very last character in the line is  a  backslash
+       (and  there  is  no  modifier list), it is ignored. This gives a way of
+       passing an empty line as data, since a real empty line  terminates  the
+       data input.
+
+
+PATTERN MODIFIERS
+
+       There are three types of modifier that can appear in pattern lines, two
+       of which may also be used in a #pattern command. A  pattern's  modifier
+       list can add to or override default modifiers that were set by a previ-
+       ous #pattern command.
+
+   Setting compilation options
+
+       The following modifiers set options for pcre2_compile(). The most  com-
+       mon  ones  have single-letter abbreviations. See pcreapi for a descrip-
+       tion of their effects.
+
+             allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+             alt_bsux                  set PCRE2_ALT_BSUX
+             anchored                  set PCRE2_ANCHORED
+             auto_callout              set PCRE2_AUTO_CALLOUT
+         /i  caseless                  set PCRE2_CASELESS
+             dollar_endonly            set PCRE2_DOLLAR_ENDONLY
+         /s  dotall                    set PCRE2_DOTALL
+             dupnames                  set PCRE2_DUPNAMES
+         /x  extended                  set PCRE2_EXTENDED
+             firstline                 set PCRE2_FIRSTLINE
+             match_unset_backref       set PCRE2_MATCH_UNSET_BACKREF
+         /m  multiline                 set PCRE2_MULTILINE
+             never_ucp                 set PCRE2_NEVER_UCP
+             never_utf                 set PCRE2_NEVER_UTF
+             no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+             no_auto_possess           set PCRE2_NO_AUTO_POSSESS
+             no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+             no_utf_check              set PCRE2_NO_UTF_CHECK
+             ucp                       set PCRE2_UCP
+             ungreedy                  set PCRE2_UNGREEDY
+             utf                       set PCRE2_UTF
+
+       As well as turning on the PCRE2_UTF option, the utf modifier causes all
+       non-printing  characters  in  output  strings  to  be printed using the
+       \x{hh...} notation. Otherwise, those less than 0x100 are output in  hex
+       without the curly brackets.
+
+   Setting compilation controls
+
+       The  following  modifiers  affect  the  compilation  process or request
+       information about the pattern:
+
+             bsr=[anycrlf|unicode]     specify \R handling
+         /B  bincode                   show binary code without lengths
+             debug                     same as info,fullbincode
+             fullbincode               show binary code with lengths
+         /I  info                      show info about compiled pattern
+             hex                       pattern is coded in hexadecimal
+             jit[=<number>]            use JIT
+             locale=<name>             use this locale
+             memory                    show memory used
+             newline=<type>            set newline type
+             parens_nest_limit=<n>     set maximum parentheses depth
+             perlcompat                lock out non-Perl modifiers
+             posix                     use the POSIX API
+             stackguard=<number>       test the stackguard feature
+             tables=[0|1|2]            select internal tables
+             use_length                use the pattern's length
+
+       The effects of these modifiers are described in the following sections.
+       FIXME: Give more examples.
+
+   Newline and \R handling
+
+       The  bsr modifier specifies what \R in a pattern should match. If it is
+       set to "anycrlf", \R matches CR, LF, or CRLF only.  If  it  is  set  to
+       "unicode",  \R  matches  any  Unicode  newline sequence. The default is
+       specified when PCRE2 is built, with the default default being Unicode.
+
+       The newline modifier specifies which characters are to  be  interpreted
+       as newlines, both in the pattern and (by default) in subject lines. The
+       type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
+
+       Both the \R and newline settings can be changed at match time,  but  if
+       this is done, JIT matching is disabled.
+
+   Information about a pattern
+
+       The  debug modifier is a shorthand for info,fullbincode, requesting all
+       available information.
+
+       The bincode modifier causes a representation of the compiled code to be
+       output  after compilation. This information does not contain length and
+       offset values, which ensures that the same output is generated for dif-
+       ferent  internal  link  sizes  and different code unit widths. By using
+       bincode, the same regression tests can be used  in  different  environ-
+       ments.
+
+       The  fullbincode  modifier, by contrast, does include length and offset
+       values. This is used in a few special tests and is also useful for one-
+       off tests.
+
+       The  info  modifier  requests  information  about  the compiled pattern
+       (whether it is anchored, has a fixed first character, and so  on).  The
+       information is obtained from the pcre2_pattern_info() function.
+
+   Specifying a pattern in hex
+
+       The hex modifier specifies that the characters of the pattern are to be
+       interpreted as pairs of hexadecimal digits. White  space  is  permitted
+       between pairs. For example:
+
+         /ab 32 59/hex
+
+       This  feature  is  provided  as a way of creating patterns that contain
+       binary zero characters. When hex is set, it implies use_length.
+
+   Using the pattern's length
+
+       By default, pcre2test passes patterns  as  zero-terminated  strings  to
+       pcre2_compile(),  giving  the  length  as -1. If use_length is set, the
+       length of the pattern is passed. This is implied if hex is set.
+
+   JIT compilation
+
+       The /jit modifier may optionally be followed by a number in the range 0
+       to 7:
+
+         0  disable JIT
+         1  normal match only
+         2  soft partial match only
+         3  normal match and soft partial match
+         4  hard partial match only
+         6  soft and hard partial match
+         7  all three modes
+
+       If  no number is given, 7 is assumed. If JIT compilation is successful,
+       the compiled JIT code will automatically be used when pcre2_match()  is
+       run,  except when incompatible run-time options are specified. For more
+       details, see the pcre2jit documentation. See also the jitstack modifier
+       below for a way of setting the size of the JIT stack.
+
+       If  the  jitverify  modifier is specified, the text "(JIT)" is added to
+       the first output line after a match or non match when JIT-compiled code
+       was actually used. This modifier can also be set on a subject line.
+
+   Setting a locale
+
+       The /locale modifier must specify the name of a locale, for example:
+
+         /pattern/locale=fr_FR
+
+       The given locale is set, pcre2_maketables() is called to build a set of
+       character tables for the locale, and this is then passed to  pcre2_com-
+       pile()  when compiling the regular expression. The same tables are used
+       when matching the following subject lines. The /locale modifier applies
+       only to the pattern on which it appears, but can be given in a #pattern
+       command if a default is needed. Setting a locale and alternate  charac-
+       ter tables are mutually exclusive.
+
+   Showing pattern memory
+
+       The  /memory modifier causes the size in bytes of the memory block used
+       to hold the compiled pattern to be output. This does  not  include  the
+       size  of  the pcre2_code block; it is just the actual compiled data. If
+       the pattern is subsequently passed to the JIT compiler, the size of the
+       JIT compiled code is also output.
+
+   Limiting nested parentheses
+
+       The  parens_nest_limit  modifier  sets  a  limit on the depth of nested
+       parentheses in a pattern. Breaching  the  limit  causes  a  compilation
+       error.
+
+   Using the POSIX wrapper API
+
+       The  /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
+       per API rather than its  native  API.  This  supports  only  the  8-bit
+       library.  When the POSIX API is being used, the following pattern modi-
+       fiers set options for the regcomp() function:
+
+         caseless           REG_ICASE
+         multiline          REG_NEWLINE
+         no_auto_capture    REG_NOSUB
+         dotall             REG_DOTALL     )
+         ungreedy           REG_UNGREEDY   ) These options are not part of
+         ucp                REG_UCP        )   the POSIX standard
+         utf                REG_UTF8       )
+
+       The aftertext and allaftertext  subject  modifiers  work  as  described
+       below. All other modifiers cause an error.
+
+   Testing the stack guard feature
+
+       The  /stackguard  modifier  is  used  to test the use of pcre2_set_com-
+       pile_recursion_guard(), a function that is  provided  to  enable  stack
+       availability  to  be checked during compilation (see the pcre2api docu-
+       mentation for details). If the number  specified  by  the  modifier  is
+       greater than zero, pcre2_set_compile_recursion_guard() is called to set
+       up callback from pcre2_compile() to a local function. The  argument  it
+       is  passed is the current nesting parenthesis depth; if this is greater
+       than the value given by the modifier, non-zero is returned, causing the
+       compilation to be aborted.
+
+   Using alternative character tables
+
+       The  /tables  modifier  must be followed by a single digit. It causes a
+       specific set of built-in character tables to be  passed  to  pcre2_com-
+       pile(). This is used in the PCRE2 tests to check behaviour with differ-
+       ent character tables. The digit specifies the tables as follows:
+
+         0   do not pass any special character tables
+         1   the default ASCII tables, as distributed in
+               pcre2_chartables.c.dist
+         2   a set of tables defining ISO 8859 characters
+
+       In table 2, some characters whose codes are greater than 128 are  iden-
+       tified  as  letters,  digits,  spaces, etc. Setting alternate character
+       tables and a locale are mutually exclusive.
+
+   Setting certain match controls
+
+       The following modifiers are really subject modifiers, and are described
+       below.   However, they may be included in a pattern's modifier list, in
+       which case they are applied to every subject  line  that  is  processed
+       with that pattern. They do not affect the compilation process.
+
+             aftertext                 show text after match
+             allaftertext              show text after captures
+             allcaptures               show all captures
+             allusedtext               show all consulted text
+         /g  global                    global matching
+             jitverify                 verify JIT usage
+             mark                      show mark values
+
+       These  modifiers may not appear in a #pattern command. If you want them
+       as defaults, set them in a #subject command.
+
+
+SUBJECT MODIFIERS
+
+       The modifiers that can appear in subject lines and the #subject command
+       are of two types.
+
+   Setting match options
+
+       The    following   modifiers   set   options   for   pcre2_match()   or
+       pcre2_dfa_match(). See pcreapi for a description of their effects.
+
+             anchored                  set PCRE2_ANCHORED
+             dfa_restart               set PCRE2_DFA_RESTART
+             dfa_shortest              set PCRE2_DFA_SHORTEST
+             no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+             no_utf_check              set PCRE2_NO_UTF_CHECK
+             notbol                    set PCRE2_NOTBOL
+             notempty                  set PCRE2_NOTEMPTY
+             notempty_atstart          set PCRE2_NOTEMPTY_ATSTART
+             noteol                    set PCRE2_NOTEOL
+             partial_hard (or ph)      set PCRE2_PARTIAL_HARD
+             partial_soft (or ps)      set PCRE2_PARTIAL_SOFT
+
+       The partial matching modifiers are provided with abbreviations  because
+       they appear frequently in tests.
+
+       If  the  /posix  modifier was present on the pattern, causing the POSIX
+       wrapper API to be used, the only option-setting modifiers that have any
+       effect   are   notbol,   notempty,   and  noteol,  causing  REG_NOTBOL,
+       REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to  regexec().
+       Any other modifiers cause an error.
+
+   Setting match controls
+
+       The  following  modifiers  affect the matching process or request addi-
+       tional information. Some of them may also be  specified  on  a  pattern
+       line  (see  above), in which case they apply to every subject line that
+       is matched against that pattern.
+
+             aftertext                 show text after match
+             allaftertext              show text after captures
+             allcaptures               show all captures
+             allusedtext               show all consulted text
+             altglobal                 alternative global matching
+             bsr=[anycrlf|unicode]     specify \R handling
+             callout_capture           show captures at callout time
+             callout_data=<n>          set a value to pass via callouts
+             callout_fail=<n>[:<m>]    control callout failure
+             callout_none              do not supply a callout function
+             copy=<number or name>     copy captured substring
+             dfa                       use pcre2_dfa_match()
+             find_limits               find match and recursion limits
+             get=<number or name>      extract captured substring
+             getall                    extract all captured substrings
+         /g  global                    global matching
+             jitstack=<n>              set size of JIT stack
+             jitverify                 verify JIT usage
+             mark                      show mark values
+             match_limit=>n>           set a match limit
+             memory                    show memory usage
+             newline=<type>            set newline type
+             offset=<n>                set starting offset
+             ovector=<n>               set size of output vector
+             recursion_limit=<n>       set a recursion limit
+
+       The effects of these modifiers are described in the following sections.
+       FIXME: Give more examples.
+
+   Newline and \R handling
+
+       These  modifiers  set the newline and \R processing conventions for the
+       subject line, overriding any values that were set at compile  time  (as
+       described  above).   JIT  matching  is  disabled  if these settings are
+       changed at match time.
+
+   Showing more text
+
+       The aftertext modifier requests that as well  as  outputting  the  sub-
+       string  that  matched  the entire pattern, pcre2test should in addition
+       output the remainder of the subject string. This is  useful  for  tests
+       where  the  subject contains multiple copies of the same substring. The
+       allaftertext modifier requests the same action for captured  substrings
+       as  well  as  the main matched substring. In each case the remainder is
+       output on the following line with a plus character following  the  cap-
+       ture number.
+
+       The  allusedtext modifier requests that all the text that was consulted
+       during a successful pattern match be shown. This affects the output  if
+       there  is  a  lookbehind at the start of a match, or a lookahead at the
+       end, or if \K is used in the pattern. Characters that precede or follow
+       the  start  and  end of the actual match are indicated in the output by
+       '<' or '>' characters underneath them. Here is an example:
+
+         /(?<=pqr)abc(?=xyz)/
+             123pqrabcxyz456\=allusedtext
+          0: pqrabcxyz
+             <<<   >>>
+
+       This shows that the matched string is "abc",  with  the  preceding  and
+       following strings "pqr" and "xyz" also consulted during the match.
+
+   Showing the value of all capture groups
+
+       The allcaptures modifier requests that the values of all potential cap-
+       tured parentheses be output after a match. By default, only those up to
+       the highest one actually used in the match are output (corresponding to
+       the return code from pcre2_match()). Groups that did not take  part  in
+       the match are output as "<unset>".
+
+   Testing callouts
+
+       A  callout function is supplied when pcre2test calls the library match-
+       ing functions, unless callout_none is specified. If callout_capture  is
+       set, the current captured groups are output when a callout occurs.
+
+       The  callout_fail modifier can be given one or two numbers. If there is
+       only one number, 1 is returned instead of 0 when a callout of that num-
+       ber  is  reached.  If two numbers are given, 1 is returned when callout
+       <n> is reached for the <m>th time.
+
+       The callout_data modifier can be given an unsigned or a  negative  num-
+       ber.   Any  value  other than zero is used as a return from pcre2test's
+       callout function.
+
+   Testing substring extraction functions
+
+       The copy  and  get  modifiers  can  be  used  to  test  the  pcre2_sub-
+       string_copy_xxx() and pcre2_substring_get_xxx() functions.  They can be
+       given more than once, and each can specify a group name or number,  for
+       example:
+
+          abcd\=copy=1,copy=3,get=G1
+
+       If  the  #subject  command  is  used to set default copy and get lists,
+       these can be unset by specifying a negative number for numbered  groups
+       and an empty name for named groups.
+
+       The  getall  modifier  tests pcre2_substring_list_get(), which extracts
+       all captured substrings.
+
+       If the subject line is successfully matched, the  substrings  extracted
+       by  the  convenience  functions  are  output  with C, G, or L after the
+       string number instead of a colon. This is in  addition  to  the  normal
+       full  list.  The string length (that is, the return from the extraction
+       function) is given in parentheses after each substring.
+
+   Finding all matches in a string
+
+       Searching for all possible matches within a subject can be requested by
+       the  global or /altglobal modifier. After finding a match, the matching
+       function is called again to search the remainder of  the  subject.  The
+       difference  between  global  and  altglobal is that the former uses the
+       start_offset argument to pcre2_match() or  pcre2_dfa_match()  to  start
+       searching  at  a new point within the entire string (which is what Perl
+       does), whereas the latter passes over a shortened substring. This makes
+       a difference to the matching process if the pattern begins with a look-
+       behind assertion (including \b or \B).
+
+       If an empty string  is  matched,  the  next  match  is  done  with  the
+       PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
+       for another, non-empty, match at the same point in the subject. If this
+       match  fails,  the  start  offset  is advanced, and the normal match is
+       retried. This imitates the way Perl handles such cases when  using  the
+       /g  modifier  or  the  split()  function. Normally, the start offset is
+       advanced by one character, but if  the  newline  convention  recognizes
+       CRLF  as  a newline, and the current character is CR followed by LF, an
+       advance of two is used.
+
+   Setting the JIT stack size
+
+       The jitstack modifier provides a way of setting the maximum stack  size
+       that  is  used  by the just-in-time optimization code. It is ignored if
+       JIT optimization is not being used. Providing a stack  that  is  larger
+       than the default 32K is necessary only for very complicated patterns.
+
+   Setting match and recursion limits
+
+       The  match_limit and recursion_limit modifiers set the appropriate lim-
+       its in the match context. These values are ignored when the find_limits
+       modifier is specified.
+
+   Finding minimum limits
+
+       If  the  find_limits modifier is present, pcre2test calls pcre2_match()
+       several times, setting  different  values  in  the  match  context  via
+       pcre2_set_match_limit()  and pcre2_set_recursion_limit() until it finds
+       the minimum values for each parameter that allow pcre2_match() to  com-
+       plete without error.
+
+       The  match_limit number is a measure of the amount of backtracking that
+       takes place, and learning the minimum value  can  be  instructive.  For
+       most  simple  matches, the number is quite small, but for patterns with
+       very large numbers of matching possibilities, it can become large  very
+       quickly    with    increasing    length    of   subject   string.   The
+       match_limit_recursion number is a measure of how  much  stack  (or,  if
+       PCRE2  is  compiled with NO_RECURSE, how much heap) memory is needed to
+       complete the match attempt.
+
+   Showing MARK names
+
+
+       The mark modifier causes the names from backtracking control verbs that
+       are  returned from calls to pcre2_match() to be displayed. If a mark is
+       returned for a match, non-match, or partial match, pcre2test shows  it.
+       For  a  match, it is on a line by itself, tagged with "MK:". Otherwise,
+       it is added to the non-match message.
+
+   Showing memory usage
+
+       The memory modifier causes pcre2test to log all memory  allocation  and
+       freeing calls that occur during a match operation.
+
+   Setting a starting offset
+
+       The  offset  modifier  sets  an  offset  in the subject string at which
+       matching starts. Its value is a number of code units, not characters.
+
+   Setting the size of the output vector
+
+       The ovector modifier applies only to  the  subject  line  in  which  it
+       appears,  though  of  course  it can also be used to set a default in a
+       #subject command. It specifies the number of pairs of offsets that  are
+       available for storing matching information. The default is 15.
+
+
+THE ALTERNATIVE MATCHING FUNCTION
+
+       By  default,  pcre2test  uses  the  standard  PCRE2  matching function,
+       pcre2_match() to match each subject line. PCRE2 also supports an alter-
+       native  matching  function, pcre2_dfa_match(), which operates in a dif-
+       ferent way, and has some restrictions. The differences between the  two
+       functions are described in the pcre2matching documentation.
+
+       If  the dfa modifier is set, the alternative matching function is used.
+       This function finds all possible matches at a given point in  the  sub-
+       ject.  If,  however, the dfa_shortest modifier is set, processing stops
+       after the first match is found. This is always  the  shortest  possible
+       match.
+
+
+DEFAULT OUTPUT FROM pcre2test
+
+       This  section  describes  the output when the normal matching function,
+       pcre2_match(), is being used.
+
+       When a match succeeds, pcre2test outputs  the  list  of  captured  sub-
+       strings,  starting  with number 0 for the string that matched the whole
+       pattern.   Otherwise,  it  outputs  "No  match"  when  the  return   is
+       PCRE2_ERROR_NOMATCH,  or  "Partial  match:"  followed  by the partially
+       matching substring when the return is PCRE2_ERROR_PARTIAL.  (Note  that
+       this  is  the  entire  substring  that was inspected during the partial
+       match; it may include characters before the actual  match  start  if  a
+       lookbehind assertion, \K, \b, or \B was involved.)
+
+       For any other return, pcre2test outputs the PCRE2 negative error number
+       and a short descriptive phrase. If the error is  a  failed  UTF  string
+       check,  the offset of the start of the failing character and the reason
+       code are also output. Here is an example of  an  interactive  pcre2test
+       run.
+
+         $ pcre2test
+         PCRE2 version 9.00 2014-05-10
+
+           re> /^abc(\d+)/
+         data> abc123
+          0: abc123
+          1: 123
+         data> xyz
+         No match
+
+       Unset capturing substrings that are not followed by one that is set are
+       not returned by pcre2_match(), and are not shown by pcre2test.  In  the
+       following  example,  there  are  two capturing substrings, but when the
+       first data line is matched, the second, unset substring is  not  shown.
+       An  "internal" unset substring is shown as "<unset>", as for the second
+       data line.
+
+           re> /(a)|(b)/
+         data> a
+          0: a
+          1: a
+         data> b
+          0: b
+          1: <unset>
+          2: b
+
+       If the strings contain any non-printing characters, they are output  as
+       \xhh  escapes  if  the  value is less than 256 and UTF mode is not set.
+       Otherwise they are output as \x{hh...} escapes. See below for the defi-
+       nition  of  non-printing characters. If the /aftertext modifier is set,
+       the output for substring 0 is followed by the the rest of  the  subject
+       string, identified by "0+" like this:
+
+           re> /cat/aftertext
+         data> cataract
+          0: cat
+          0+ aract
+
+       If  global  matching  is  requested, the results of successive matching
+       attempts are output in sequence, like this:
+
+           re> /\Bi(\w\w)/g
+         data> Mississippi
+          0: iss
+          1: ss
+          0: iss
+          1: ss
+          0: ipp
+          1: pp
+
+       "No match" is output only if the first match attempt fails. Here is  an
+       example  of a failure message (the offset 4 that is specified by \>4 is
+       past the end of the subject string):
+
+           re> /xyz/
+         data> xyz\=offset=4
+         Error -24 (bad offset value)
+
+       Note that whereas patterns can be continued over several lines (a plain
+       ">"  prompt  is used for continuations), subject lines may not. However
+       newlines can be included in a subject by means of the \n escape (or \r,
+       \r\n, etc., depending on the newline sequence setting).
+
+
+OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
+
+       When the alternative matching function, pcre2_dfa_match(), is used, the
+       output consists of a list of all the matches that start  at  the  first
+       point in the subject where there is at least one match. For example:
+
+           re> /(tang|tangerine|tan)/
+         data> yellow tangerine\=dfa
+          0: tangerine
+          1: tang
+          2: tan
+
+       (Using  the  normal  matching function on this data finds only "tang".)
+       The longest matching string is always given first (and numbered  zero).
+       After  a  PCRE2_ERROR_PARTIAL  return,  the output is "Partial match:",
+       followed by the partially matching substring. (Note that  this  is  the
+       entire  substring  that  was inspected during the partial match; it may
+       include characters before the actual match start if a lookbehind asser-
+       tion, \K, \b, or \B was involved.)
+
+       If global matching is requested, the search for further matches resumes
+       at the end of the longest match. For example:
+
+           re> /(tang|tangerine|tan)/g
+         data> yellow tangerine and tangy sultana\=dfa
+          0: tangerine
+          1: tang
+          2: tan
+          0: tang
+          1: tan
+          0: tan
+
+       The alternative matching function does not support  substring  capture,
+       so  the  modifiers  that are concerned with captured substrings are not
+       relevant.
+
+
+RESTARTING AFTER A PARTIAL MATCH
+
+       When the alternative matching function has given  the  PCRE2_ERROR_PAR-
+       TIAL return, indicating that the subject partially matched the pattern,
+       you can restart the match with additional subject data by means of  the
+       dfa_restart modifier. For example:
+
+           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+         data> 23ja\=P,dfa
+         Partial match: 23ja
+         data> n05\=dfa,dfa_restart
+          0: n05
+
+       For  further  information  about partial matching, see the pcre2partial
+       documentation.
+
+
+CALLOUTS
+
+       If the pattern contains any callout requests, pcre2test's callout func-
+       tion  is  called  during  matching. This works with both matching func-
+       tions. By default, the called function displays the callout number, the
+       start  and  current  positions in the text at the callout time, and the
+       next pattern item to be tested. For example:
+
+         --->pqrabcdef
+           0    ^  ^     \d
+
+       This output indicates that  callout  number  0  occurred  for  a  match
+       attempt  starting  at  the fourth character of the subject string, when
+       the pointer was at the seventh character, and  when  the  next  pattern
+       item  was  \d.  Just  one circumflex is output if the start and current
+       positions are the same.
+
+       Callouts numbered 255 are assumed to be automatic callouts, inserted as
+       a  result  of the /auto_callout pattern modifier. In this case, instead
+       of showing the callout number, the offset in the pattern, preceded by a
+       plus, is output. For example:
+
+           re> /\d?[A-E]\*/auto_callout
+         data> E*
+         --->E*
+          +0 ^      \d?
+          +3 ^      [A-E]
+          +8 ^^     \*
+         +10 ^ ^
+          0: E*
+
+       If a pattern contains (*MARK) items, an additional line is output when-
+       ever a change of latest mark is passed to  the  callout  function.  For
+       example:
+
+           re> /a(*MARK:X)bc/auto_callout
+         data> abc
+         --->abc
+          +0 ^       a
+          +1 ^^      (*MARK:X)
+         +10 ^^      b
+         Latest Mark: X
+         +11 ^ ^     c
+         +12 ^  ^
+          0: abc
+
+       The  mark  changes between matching "a" and "b", but stays the same for
+       the rest of the match, so nothing more is output. If, as  a  result  of
+       backtracking,  the  mark  reverts to being unset, the text "<unset>" is
+       output.
+
+       The callout function in pcre2test returns zero (carry on  matching)  by
+       default,  but you can use a callout_fail modifier in a subject line (as
+       described above) to change this and other parameters of the callout.
+
+       Inserting callouts can be helpful when using pcre2test to check compli-
+       cated  regular expressions. For further information about callouts, see
+       the pcre2callout documentation.
+
+
+NON-PRINTING CHARACTERS
+
+       When pcre2test is outputting text in the compiled version of a pattern,
+       bytes  other  than 32-126 are always treated as non-printing characters
+       and are therefore shown as hex escapes.
+
+       When pcre2test is outputting text that is a matched part of  a  subject
+       string,  it behaves in the same way, unless a different locale has been
+       set for the pattern (using the /locale modifier).  In  this  case,  the
+       isprint()  function  is  used  to distinguish printing and non-printing
+       characters.
+
+
+SEE ALSO
+
+       pcre2(3), pcre16(3), pcre32(3), pcre2api(3), pcre2callout(3), pcre2jit,
+       pcre2matching(3), pcre2partial(d), pcre2pattern(3), pcre2precompile(3).
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 19 August 2014
+       Copyright (c) 1997-2014 University of Cambridge.
diff --git a/src/pcre2demo.c b/src/pcre2demo.c
index 6153ffa..8e37832 100644
--- a/src/pcre2demo.c
+++ b/src/pcre2demo.c
@@ -420,4 +420,4 @@ pcre2_code_free(re);
 return 0;
 }
 
-/* End of pcredemo.c */
+/* End of pcre2demo.c */
author	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2014-09-23 11:35:51 +0000
committer	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2014-09-23 11:35:51 +0000
commit	fd8438eb9b6bec69a456b69a7dece77aadc06a36 (patch)
tree	b0f09f3d92934ea3ad0570599c861891cf360362
parent	cf3d2f48e3a1281a47cd544cfd2457b8342037f9 (diff)
download	pcre2-fd8438eb9b6bec69a456b69a7dece77aadc06a36.tar.gz