More document tidies, pre-release.

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@182 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-06-13 15:09:54 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-06-13 15:09:54 +0000
commit: 534f2ef23d3192cd74ec86f44c60ff5a7cb957a0 (patch)
tree: 5b29d0fe9b45bef3e8ae979251ddfcc9dbe3a39e
parent: a24e9c9aff88d3b9f6022cbdfee49d758cfde0f7 (diff)
download: pcre-534f2ef23d3192cd74ec86f44c60ff5a7cb957a0.tar.gz
23 files changed, 428 insertions, 258 deletions
diff --git a/ChangeLog b/ChangeLog
index e1197f4..fb2747f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,11 +40,11 @@ Version 7.2 13-June-07
 
     (f) \g{name} is another synonym - part of Perl 5.10's unification of
         reference syntax.
-        
+
     (g) (?| introduces a group in which the numbering of parentheses in each
-        alternative starts with the same number.  
-        
-    (h) \h, \H, \v, and \V match horizontal and vertical whitespace. 
+        alternative starts with the same number.
+
+    (h) \h, \H, \v, and \V match horizontal and vertical whitespace.
 
  7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and
     PCRE_INFO_JCHANGED.
@@ -59,15 +59,15 @@ Version 7.2 13-June-07
     bit of new cunning has reduced the workspace needed for groups with
     alternatives. The 1000-alternative test pattern now uses 12 bytes of
     workspace instead of running out of the 4096 that are available.
-    
+
 10. Inserted some missing (unsigned int) casts to get rid of compiler warnings.
 
 11. Applied patch from Google to remove an optimization that didn't quite work.
     The report of the bug said:
-    
+
       pcrecpp::RE("a*").FullMatch("aaa") matches, while
       pcrecpp::RE("a*?").FullMatch("aaa") does not, and
-      pcrecpp::RE("a*?\\z").FullMatch("aaa") does again. 
+      pcrecpp::RE("a*?\\z").FullMatch("aaa") does again.
 
 
 Version 7.1 24-Apr-07
diff --git a/NEWS b/NEWS
index 867a623..26d0999 100644
--- a/NEWS
+++ b/NEWS
@@ -22,17 +22,17 @@ Some more features from Perl 5.10 have been added:
   (?-n) and (?+n) relative references for recursion and subroutines.
 
   (?(-n) and (?(+n) relative references as conditions.
-  
+
   \k{name} and \g{name} are synonyms for \k<name>.
 
   \K to reset the start of the matched string; for example, (foo)\Kbar
   matches bar preceded by foo, but only sets bar as the matched string.
-  
-  (?| introduces a group where the capturing parentheses in each alternative 
-  start from the same number; for example, (?|(abc)|(xyz)) sets capturing 
+
+  (?| introduces a group where the capturing parentheses in each alternative
+  start from the same number; for example, (?|(abc)|(xyz)) sets capturing
   parentheses number 1 in both cases.
-  
-  \h, \H, \v, \V match horizontal and vertical whitespace, respectively. 
+
+  \h, \H, \v, \V match horizontal and vertical whitespace, respectively.
 
 
 Release 7.1 24-Apr-07
diff --git a/NON-UNIX-USE b/NON-UNIX-USE
index f2ead00..a10c704 100644
--- a/NON-UNIX-USE
+++ b/NON-UNIX-USE
@@ -45,7 +45,7 @@ The following are generic comments about building the PCRE C library "by hand".
 
     An alternative approach is not to edit config.h, but to use -D on the
     compiler command line to make any changes that you need.
-    
+
     NOTE: There have been occasions when the way in which certain parameters in
     config.h are used has changed between releases. (In the configure/make
     world, this is handled automatically.) When upgrading to a new release, you
@@ -165,10 +165,10 @@ On both MinGW and Cygwin, PCRE should build correctly using:
   ./configure && make && make install
 
 This should create two libraries called libpcre and libpcreposix, and, if you
-have enabled building the C++ wrapper, a third one called libpcrecpp. These are 
-independent libraries: when you like with libpcreposix or libpcrecpp you must 
-also link with libpcre, which contains the basic functions. (Some earlier 
-releases of PCRE included the basic libpcre functions in libpcreposix. This no 
+have enabled building the C++ wrapper, a third one called libpcrecpp. These are
+independent libraries: when you like with libpcreposix or libpcrecpp you must
+also link with libpcre, which contains the basic functions. (Some earlier
+releases of PCRE included the basic libpcre functions in libpcreposix. This no
 longer happens.)
 
 If you want to statically link your program against a non-dll .a file, you must
diff --git a/doc/html/pcre.html b/doc/html/pcre.html
index 5859b68..7b24f78 100644
--- a/doc/html/pcre.html
+++ b/doc/html/pcre.html
@@ -228,7 +228,11 @@ must use Unicode property tests such as \p{Nd}.
 low-valued characters.
 </P>
 <P>
-9. Case-insensitive matching applies only to characters whose values are less
+9. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
+(\h, \H, \v, and \V) do match all the appropriate Unicode characters.
+</P>
+<P>
+10. Case-insensitive matching applies only to characters whose values are less
 than 128, unless PCRE is built with Unicode property support. Even when Unicode
 property support is available, PCRE still uses its own character tables when
 checking the case of low-valued characters, so as not to degrade performance.
@@ -254,7 +258,7 @@ two digits 10, at the domain cam.ac.uk.
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 18 April 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcre_fullinfo.html b/doc/html/pcre_fullinfo.html
index ac24f8e..7cda0d3 100644
--- a/doc/html/pcre_fullinfo.html
+++ b/doc/html/pcre_fullinfo.html
@@ -42,12 +42,13 @@ The following information is available:
                               -1 for start of string
                                  or after newline, or
                               -2 otherwise
-  PCRE_INFO_FIRSTTABLE      Table of first bytes
-                              (after studying)
+  PCRE_INFO_FIRSTTABLE      Table of first bytes (after studying)
+  PCRE_INFO_JCHANGED        Return 1 if (?J) was used
   PCRE_INFO_LASTLITERAL     Literal last byte required
   PCRE_INFO_NAMECOUNT       Number of named subpatterns
   PCRE_INFO_NAMEENTRYSIZE   Size of name table entry
   PCRE_INFO_NAMETABLE       Pointer to name table
+  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried
   PCRE_INFO_OPTIONS         Option bits used for compilation
   PCRE_INFO_SIZE            Size of compiled pattern
   PCRE_INFO_STUDYSIZE       Size of study data
diff --git a/doc/html/pcreapi.html b/doc/html/pcreapi.html
index 7830ef8..da06476 100644
--- a/doc/html/pcreapi.html
+++ b/doc/html/pcreapi.html
@@ -658,7 +658,7 @@ out of use. To avoid confusion, they have not been re-used.
   26  malformed number or name after (?(
   27  conditional group contains more than two branches
   28  assertion expected after (?(
-  29  (?R or (?digits must be followed by )
+  29  (?R or (?[+-]digits must be followed by )
   30  unknown POSIX class name
   31  POSIX collating elements are not supported
   32  this version of PCRE is not compiled with PCRE_UTF8 support
@@ -686,6 +686,9 @@ out of use. To avoid confusion, they have not been re-used.
   54  DEFINE group contains more than one branch
   55  repeating a DEFINE group is not allowed
   56  inconsistent NEWLINE options"
+  57  \g is not followed by a braced name or an optionally braced
+        non-zero number
+  58  (?+ or (?- or (?(+ or (?(- must be followed by a non-zero number
 </PRE>
 </P>
 <br><a name="SEC9" href="#TOC1">STUDYING A PATTERN</a><br>
@@ -892,7 +895,7 @@ fourth argument should point to an <b>unsigned char *</b> variable.
 </pre>
 Return 1 if the (?J) option setting is used in the pattern, otherwise 0. The
 fourth argument should point to an <b>int</b> variable. The (?J) internal option
-setting changes the local PCRE_DUPNAMES value.
+setting changes the local PCRE_DUPNAMES option.
 <pre>
   PCRE_INFO_LASTLITERAL
 </pre>
@@ -1873,7 +1876,7 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC22" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 04 June 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcrebuild.html b/doc/html/pcrebuild.html
index 1284646..b2a013e 100644
--- a/doc/html/pcrebuild.html
+++ b/doc/html/pcrebuild.html
@@ -180,13 +180,18 @@ build a version of PCRE that works this way, add
 </pre>
 to the <b>configure</b> command. With this configuration, PCRE will use the
 <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
-management functions. Separate functions are provided because the usage is very
-predictable: the block sizes requested are always the same, and the blocks are
-always freed in reverse order. A calling program might be able to implement
-optimized functions that perform better than the standard <b>malloc()</b> and
-<b>free()</b> functions. PCRE runs noticeably more slowly when built in this
-way. This option affects only the <b>pcre_exec()</b> function; it is not
-relevant for the the <b>pcre_dfa_exec()</b> function.
+management functions. By default these point to <b>malloc()</b> and
+<b>free()</b>, but you can replace the pointers so that your own functions are
+used.
+</P>
+<P>
+Separate functions are provided rather than using <b>pcre_malloc</b> and
+<b>pcre_free</b> because the usage is very predictable: the block sizes
+requested are always the same, and the blocks are always freed in reverse
+order. A calling program might be able to implement optimized functions that
+perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
+slowly when built in this way. This option affects only the <b>pcre_exec()</b>
+function; it is not relevant for the the <b>pcre_dfa_exec()</b> function.
 </P>
 <br><a name="SEC10" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
 <P>
@@ -260,7 +265,7 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC15" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 16 April 2007
+Last updated: 05 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcrecompat.html b/doc/html/pcrecompat.html
index 638658a..6e1089d 100644
--- a/doc/html/pcrecompat.html
+++ b/doc/html/pcrecompat.html
@@ -18,8 +18,8 @@ DIFFERENCES BETWEEN PCRE AND PERL
 <P>
 This document describes the differences in the ways that PCRE and Perl handle
 regular expressions. The differences described here are mainly with respect to
-Perl 5.8, though PCRE version 7.0 contains some features that are expected to
-be in the forthcoming Perl 5.10.
+Perl 5.8, though PCRE versions 7.0 and later contain some features that are
+expected to be in the forthcoming Perl 5.10.
 </P>
 <P>
 1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
@@ -111,8 +111,8 @@ meta-character matches only at the very end of the string.
 <br>
 <br>
 (c) If PCRE_EXTRA is set, a backslash followed by a letter with no special
-meaning is faulted. Otherwise, like Perl, the backslash is ignored. (Perl can
-be made to issue a warning.)
+meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored.
+(Perl can be made to issue a warning.)
 <br>
 <br>
 (d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
@@ -156,7 +156,7 @@ Cambridge CB2 3QH, England.
 REVISION
 </b><br>
 <P>
-Last updated: 06 March 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcrepattern.html b/doc/html/pcrepattern.html
index 15ccf85..8d603a1 100644
--- a/doc/html/pcrepattern.html
+++ b/doc/html/pcrepattern.html
@@ -24,19 +24,20 @@ man page, in case the conversion went wrong.
 <li><a name="TOC9" href="#SEC9">VERTICAL BAR</a>
 <li><a name="TOC10" href="#SEC10">INTERNAL OPTION SETTING</a>
 <li><a name="TOC11" href="#SEC11">SUBPATTERNS</a>
-<li><a name="TOC12" href="#SEC12">NAMED SUBPATTERNS</a>
-<li><a name="TOC13" href="#SEC13">REPETITION</a>
-<li><a name="TOC14" href="#SEC14">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
-<li><a name="TOC15" href="#SEC15">BACK REFERENCES</a>
-<li><a name="TOC16" href="#SEC16">ASSERTIONS</a>
-<li><a name="TOC17" href="#SEC17">CONDITIONAL SUBPATTERNS</a>
-<li><a name="TOC18" href="#SEC18">COMMENTS</a>
-<li><a name="TOC19" href="#SEC19">RECURSIVE PATTERNS</a>
-<li><a name="TOC20" href="#SEC20">SUBPATTERNS AS SUBROUTINES</a>
-<li><a name="TOC21" href="#SEC21">CALLOUTS</a>
-<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
-<li><a name="TOC23" href="#SEC23">AUTHOR</a>
-<li><a name="TOC24" href="#SEC24">REVISION</a>
+<li><a name="TOC12" href="#SEC12">DUPLICATE SUBPATTERN NUMBERS</a>
+<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
+<li><a name="TOC14" href="#SEC14">REPETITION</a>
+<li><a name="TOC15" href="#SEC15">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
+<li><a name="TOC16" href="#SEC16">BACK REFERENCES</a>
+<li><a name="TOC17" href="#SEC17">ASSERTIONS</a>
+<li><a name="TOC18" href="#SEC18">CONDITIONAL SUBPATTERNS</a>
+<li><a name="TOC19" href="#SEC19">COMMENTS</a>
+<li><a name="TOC20" href="#SEC20">RECURSIVE PATTERNS</a>
+<li><a name="TOC21" href="#SEC21">SUBPATTERNS AS SUBROUTINES</a>
+<li><a name="TOC22" href="#SEC22">CALLOUTS</a>
+<li><a name="TOC23" href="#SEC23">SEE ALSO</a>
+<li><a name="TOC24" href="#SEC24">AUTHOR</a>
+<li><a name="TOC25" href="#SEC25">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION DETAILS</a><br>
 <P>
@@ -270,8 +271,12 @@ following are always recognized:
 <pre>
   \d     any decimal digit
   \D     any character that is not a decimal digit
+  \h     any horizontal whitespace character
+  \H     any character that is not a horizontal whitespace character
   \s     any whitespace character
   \S     any character that is not a whitespace character
+  \v     any vertical whitespace character
+  \V     any character that is not a vertical whitespace character
   \w     any "word" character
   \W     any "non-word" character
 </pre>
@@ -287,9 +292,52 @@ there is no character to match.
 <P>
 For compatibility with Perl, \s does not match the VT character (code 11).
 This makes it different from the the POSIX "space" class. The \s characters
-are HT (9), LF (10), FF (12), CR (13), and space (32). (If "use locale;" is
+are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
 included in a Perl script, \s may match the VT character. In PCRE, it never
-does.)
+does.
+</P>
+<P>
+In UTF-8 mode, characters with values greater than 128 never match \d, \s, or
+\w, and always match \D, \S, and \W. This is true even when Unicode
+character property support is available. These sequences retain their original
+meanings from before UTF-8 support was available, mainly for efficiency
+reasons.
+</P>
+<P>
+The sequences \h, \H, \v, and \V are Perl 5.10 features. In contrast to the
+other sequences, these do match certain high-valued codepoints in UTF-8 mode.
+The horizontal space characters are:
+<pre>
+  U+0009     Horizontal tab
+  U+0020     Space
+  U+00A0     Non-break space
+  U+1680     Ogham space mark
+  U+180E     Mongolian vowel separator
+  U+2000     En quad
+  U+2001     Em quad
+  U+2002     En space
+  U+2003     Em space
+  U+2004     Three-per-em space
+  U+2005     Four-per-em space
+  U+2006     Six-per-em space
+  U+2007     Figure space
+  U+2008     Punctuation space
+  U+2009     Thin space
+  U+200A     Hair space
+  U+202F     Narrow no-break space
+  U+205F     Medium mathematical space
+  U+3000     Ideographic space
+</pre>
+The vertical space characters are:
+<pre>
+  U+000A     Linefeed
+  U+000B     Vertical tab
+  U+000C     Formfeed
+  U+000D     Carriage return
+  U+0085     Next line
+  U+2028     Line separator
+  U+2029     Paragraph separator
+</PRE>
 </P>
 <P>
 A "word" character is an underscore or any character less than 256 that is a
@@ -301,20 +349,15 @@ in the
 <a href="pcreapi.html"><b>pcreapi</b></a>
 page). For example, in a French locale such as "fr_FR" in Unix-like systems,
 or "french" in Windows, some character codes greater than 128 are used for
-accented letters, and these are matched by \w.
-</P>
-<P>
-In UTF-8 mode, characters with values greater than 128 never match \d, \s, or
-\w, and always match \D, \S, and \W. This is true even when Unicode
-character property support is available. The use of locales with Unicode is
-discouraged.
+accented letters, and these are matched by \w. The use of locales with Unicode
+is discouraged.
 </P>
 <br><b>
 Newline sequences
 </b><br>
 <P>
 Outside a character class, the escape sequence \R matches any Unicode newline
-sequence. This is an extension to Perl. In non-UTF-8 mode \R is equivalent to
+sequence. This is a Perl 5.10 feature. In non-UTF-8 mode \R is equivalent to
 the following:
 <pre>
   (?&#62;\r\n|\n|\x0b|\f|\r|\x85)
@@ -966,7 +1009,38 @@ from left to right, and options are not reset until the end of the subpattern
 is reached, an option setting in one branch does affect subsequent branches, so
 the above patterns match "SUNDAY" as well as "Saturday".
 </P>
-<br><a name="SEC12" href="#TOC1">NAMED SUBPATTERNS</a><br>
+<br><a name="SEC12" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
+<P>
+Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
+the same numbers for its capturing parentheses. Such a subpattern starts with
+(?| and is itself a non-capturing subpattern. For example, consider this
+pattern:
+<pre>
+  (?|(Sat)ur|(Sun))day
+</pre>
+Because the two alternatives are inside a (?| group, both sets of capturing
+parentheses are numbered one. Thus, when the pattern matches, you can look
+at captured substring number one, whichever alternative matched. This construct
+is useful when you want to capture part, but not all, of one of a number of
+alternatives. Inside a (?| group, parentheses are numbered as usual, but the
+number is reset at the start of each branch. The numbers of any capturing
+buffers that follow the subpattern start after the highest number used in any
+branch. The following example is taken from the Perl documentation.
+The numbers underneath show in which buffer the captured content will be
+stored.
+<pre>
+  # before  ---------------branch-reset----------- after
+  / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+  # 1            2         2  3        2     3     4
+</pre>
+A backreference or a recursive call to a numbered subpattern always refers to
+the first one in the pattern with the given number.
+</P>
+<P>
+An alternative approach to using this "branch reset" feature is to use
+duplicate named subpatterns, as described in the next section.
+</P>
+<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
 <P>
 Identifying capturing parentheses by number is simple, but it can be very hard
 to keep track of the numbers in complicated regular expressions. Furthermore,
@@ -1008,6 +1082,10 @@ abbreviation. This pattern (ignoring the line breaks) does the job:
   (?&#60;DN&#62;Sat)(?:urday)?
 </pre>
 There are five capturing substrings, but only one is ever set after a match.
+(An alternative way of solving this problem is to use a "branch reset"
+subpattern, as described in the previous section.)
+</P>
+<P>
 The convenience function for extracting the data by name returns the substring
 for the first (and in this example, the only) subpattern of that name that
 matched. This saves searching to find which numbered subpattern it was. If you
@@ -1017,7 +1095,7 @@ details of the interfaces for handling named subpatterns, see the
 <a href="pcreapi.html"><b>pcreapi</b></a>
 documentation.
 </P>
-<br><a name="SEC13" href="#TOC1">REPETITION</a><br>
+<br><a name="SEC14" href="#TOC1">REPETITION</a><br>
 <P>
 Repetition is specified by quantifiers, which can follow any of the following
 items:
@@ -1168,7 +1246,7 @@ example, after
 </pre>
 matches "aba" the value of the second captured substring is "b".
 <a name="atomicgroup"></a></P>
-<br><a name="SEC14" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
+<br><a name="SEC15" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
 <P>
 With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
 repetition, failure of what follows normally causes the repeated item to be
@@ -1267,7 +1345,7 @@ an atomic group, like this:
 </pre>
 sequences of non-digits cannot be broken, and failure happens quickly.
 <a name="backreferences"></a></P>
-<br><a name="SEC15" href="#TOC1">BACK REFERENCES</a><br>
+<br><a name="SEC16" href="#TOC1">BACK REFERENCES</a><br>
 <P>
 Outside a character class, a backslash followed by a digit greater than 0 (and
 possibly further digits) is a back reference to a capturing subpattern earlier
@@ -1380,7 +1458,7 @@ that the first iteration does not need to match the back reference. This can be
 done using alternation, as in the example above, or by a quantifier with a
 minimum of zero.
 <a name="bigassertions"></a></P>
-<br><a name="SEC16" href="#TOC1">ASSERTIONS</a><br>
+<br><a name="SEC17" href="#TOC1">ASSERTIONS</a><br>
 <P>
 An assertion is a test on the characters following or preceding the current
 matching point that does not actually consume any characters. The simple
@@ -1540,7 +1618,7 @@ preceded by "foo", while
 is another pattern that matches "foo" preceded by three digits and any three
 characters that are not "999".
 <a name="conditions"></a></P>
-<br><a name="SEC17" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
+<br><a name="SEC18" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
 <P>
 It is possible to cause the matching process to obey a subpattern
 conditionally or to choose between two alternative subpatterns, depending on
@@ -1678,7 +1756,7 @@ subject is matched against the first alternative; otherwise it is matched
 against the second. This pattern matches strings in one of the two forms
 dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
 <a name="comments"></a></P>
-<br><a name="SEC18" href="#TOC1">COMMENTS</a><br>
+<br><a name="SEC19" href="#TOC1">COMMENTS</a><br>
 <P>
 The sequence (?# marks the start of a comment that continues up to the next
 closing parenthesis. Nested parentheses are not permitted. The characters
@@ -1689,7 +1767,7 @@ If the PCRE_EXTENDED option is set, an unescaped # character outside a
 character class introduces a comment that continues to immediately after the
 next newline in the pattern.
 <a name="recursion"></a></P>
-<br><a name="SEC19" href="#TOC1">RECURSIVE PATTERNS</a><br>
+<br><a name="SEC20" href="#TOC1">RECURSIVE PATTERNS</a><br>
 <P>
 Consider the problem of matching a string in parentheses, allowing for
 unlimited nested parentheses. Without the use of recursion, the best that can
@@ -1819,7 +1897,7 @@ In this pattern, (?(R) is the start of a conditional subpattern, with two
 different alternatives for the recursive and non-recursive cases. The (?R) item
 is the actual recursive call.
 <a name="subpatternsassubroutines"></a></P>
-<br><a name="SEC20" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
+<br><a name="SEC21" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
 <P>
 If the syntax for a recursive subpattern reference (either by number or by
 name) is used outside the parentheses to which it refers, it operates like a
@@ -1859,7 +1937,7 @@ changed for different calls. For example, consider this pattern:
 It matches "abcabc". It does not match "abcABC" because the change of
 processing option does not affect the called subpattern.
 </P>
-<br><a name="SEC21" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC22" href="#TOC1">CALLOUTS</a><br>
 <P>
 Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
 code to be obeyed in the middle of matching a regular expression. This makes it
@@ -1894,11 +1972,11 @@ description of the interface to the callout function is given in the
 <a href="pcrecallout.html"><b>pcrecallout</b></a>
 documentation.
 </P>
-<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC23" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3), <b>pcre</b>(3).
 </P>
-<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC24" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@@ -1907,9 +1985,9 @@ University Computing Service
 Cambridge CB2 3QH, England.
 <br>
 </P>
-<br><a name="SEC24" href="#TOC1">REVISION</a><br>
+<br><a name="SEC25" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 29 May 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcreprecompile.html b/doc/html/pcreprecompile.html
index 0e4cb74..83da226 100644
--- a/doc/html/pcreprecompile.html
+++ b/doc/html/pcreprecompile.html
@@ -124,19 +124,9 @@ usual way.
 </P>
 <br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
 <P>
-The layout of the control block that is at the start of the data that makes up
-a compiled pattern was changed for release 5.0. If you have any saved patterns
-that were compiled with previous releases (not a facility that was previously
-advertised), you will have to recompile them for release 5.0 and above.
-</P>
-<P>
-If you have any saved patterns in UTF-8 mode that use \p or \P that were
-compiled with any release up to and including 6.4, you will have to recompile
-them for release 6.5 and above.
-</P>
-<P>
-All saved patterns from earlier releases must be recompiled for release 7.0 or
-higher, because there was an internal reorganization at that release.
+In general, it is safest to recompile all saved patterns when you update to a
+new PCRE release, though not all updates actually require this. Recompiling is
+definitely needed for release 7.2.
 </P>
 <br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
 <P>
@@ -149,7 +139,7 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 24 April 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcresample.html b/doc/html/pcresample.html
index b3c924d..44c5bfb 100644
--- a/doc/html/pcresample.html
+++ b/doc/html/pcresample.html
@@ -33,9 +33,10 @@ string. The logic is a little bit tricky because of the possibility of matching
 an empty string. Comments in the code explain what is going on.
 </P>
 <P>
-If PCRE is installed in the standard include and library directories for your
-system, you should be able to compile the demonstration program using this
-command:
+The demonstration program is automatically built if you use "./configure;make"
+to build PCRE. Otherwise, if PCRE is installed in the standard include and
+library directories for your system, you should be able to compile the
+demonstration program using this command:
 <pre>
   gcc -o pcredemo pcredemo.c -lpcre
 </pre>
@@ -87,7 +88,7 @@ Cambridge CB2 3QH, England.
 REVISION
 </b><br>
 <P>
-Last updated: 06 March 2007
+Last updated: 13 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/html/pcrestack.html b/doc/html/pcrestack.html
index 7236400..2cc7d26 100644
--- a/doc/html/pcrestack.html
+++ b/doc/html/pcrestack.html
@@ -83,7 +83,13 @@ PCRE to use heap memory instead of stack for remembering back-up points. This
 makes it run a lot more slowly, however. Details of how to do this are given in
 the
 <a href="pcrebuild.html"><b>pcrebuild</b></a>
-documentation.
+documentation. When built in this way, instead of using the stack, PCRE obtains
+and frees memory by calling the functions that are pointed to by the
+<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables. By default, these
+point to <b>malloc()</b> and <b>free()</b>, but you can replace the pointers to
+cause PCRE to use your own functions. Since the block sizes are always the
+same, and are always freed in reverse order, it may be possible to implement
+customized memory handlers that are more efficient than the standard functions.
 </P>
 <P>
 In Unix-like environments, there is not often a problem with the stack unless
@@ -139,7 +145,7 @@ Cambridge CB2 3QH, England.
 REVISION
 </b><br>
 <P>
-Last updated: 12 March 2007
+Last updated: 05 June 2007
 <br>
 Copyright &copy; 1997-2007 University of Cambridge.
 <br>
diff --git a/doc/pcre.3 b/doc/pcre.3
index 40aec35..f731b16 100644
--- a/doc/pcre.3
+++ b/doc/pcre.3
@@ -219,7 +219,7 @@ must use Unicode property tests such as \ep{Nd}.
 8. Similarly, characters that match the POSIX named character classes are all
 low-valued characters.
 .P
-9. However, the Perl 5.10 horizontal and vertical whitespace matching escapes 
+9. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
 (\eh, \eH, \ev, and \eV) do match all the appropriate Unicode characters.
 .P
 10. Case-insensitive matching applies only to characters whose values are less
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 601812c..e55cf01 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -197,7 +197,11 @@ UTF-8 AND UNICODE PROPERTY SUPPORT
        8. Similarly, characters that match the POSIX named  character  classes
        are all low-valued characters.
 
-       9.  Case-insensitive  matching  applies only to characters whose values
+       9.  However,  the Perl 5.10 horizontal and vertical whitespace matching
+       escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
+       acters.
+
+       10.  Case-insensitive  matching applies only to characters whose values
        are less than 128, unless PCRE is built with Unicode property  support.
        Even  when  Unicode  property support is available, PCRE still uses its
        own character tables when checking the case of  low-valued  characters,
@@ -222,7 +226,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 18 April 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -390,13 +394,17 @@ AVOIDING EXCESSIVE STACK USAGE
 
        to  the  configure  command. With this configuration, PCRE will use the
        pcre_stack_malloc and pcre_stack_free variables to call memory  manage-
-       ment  functions.  Separate  functions are provided because the usage is
-       very predictable: the block sizes requested are always  the  same,  and
-       the  blocks  are always freed in reverse order. A calling program might
-       be able to implement optimized functions that perform better  than  the
-       standard  malloc()  and  free()  functions.  PCRE  runs noticeably more
-       slowly when built in this way. This option affects only the pcre_exec()
-       function; it is not relevant for the the pcre_dfa_exec() function.
+       ment  functions. By default these point to malloc() and free(), but you
+       can replace the pointers so that your own functions are used.
+
+       Separate functions are  provided  rather  than  using  pcre_malloc  and
+       pcre_free  because  the  usage  is  very  predictable:  the block sizes
+       requested are always the same, and  the  blocks  are  always  freed  in
+       reverse  order.  A calling program might be able to implement optimized
+       functions that perform better  than  malloc()  and  free().  PCRE  runs
+       noticeably more slowly when built in this way. This option affects only
+       the  pcre_exec()  function;  it   is   not   relevant   for   the   the
+       pcre_dfa_exec() function.
 
 
 LIMITING PCRE RESOURCE USAGE
@@ -474,7 +482,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 16 April 2007
+       Last updated: 05 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -1259,7 +1267,7 @@ COMPILATION ERROR CODES
          26  malformed number or name after (?(
          27  conditional group contains more than two branches
          28  assertion expected after (?(
-         29  (?R or (?digits must be followed by )
+         29  (?R or (?[+-]digits must be followed by )
          30  unknown POSIX class name
          31  POSIX collating elements are not supported
          32  this version of PCRE is not compiled with PCRE_UTF8 support
@@ -1288,6 +1296,9 @@ COMPILATION ERROR CODES
          54  DEFINE group contains more than one branch
          55  repeating a DEFINE group is not allowed
          56  inconsistent NEWLINE options"
+         57  \g is not followed by a braced name or an optionally braced
+               non-zero number
+         58  (?+ or (?- or (?(+ or (?(- must be followed by a non-zero number
 
 
 STUDYING A PATTERN
@@ -1480,7 +1491,7 @@ INFORMATION ABOUT A PATTERN
 
        Return  1  if the (?J) option setting is used in the pattern, otherwise
        0. The fourth argument should point to an int variable. The (?J) inter-
-       nal option setting changes the local PCRE_DUPNAMES value.
+       nal option setting changes the local PCRE_DUPNAMES option.
 
          PCRE_INFO_LASTLITERAL
 
@@ -2406,7 +2417,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 04 June 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -2593,8 +2604,8 @@ DIFFERENCES BETWEEN PCRE AND PERL
 
        This  document describes the differences in the ways that PCRE and Perl
        handle regular expressions. The differences described here  are  mainly
-       with  respect  to  Perl 5.8, though PCRE version 7.0 contains some fea-
-       tures that are expected to be in the forthcoming Perl 5.10.
+       with  respect  to  Perl 5.8, though PCRE versions 7.0 and later contain
+       some features that are expected to be in the forthcoming Perl 5.10.
 
        1. PCRE has only a subset of Perl's UTF-8 and Unicode support.  Details
        of  what  it does have are given in the section on UTF-8 support in the
@@ -2672,8 +2683,8 @@ DIFFERENCES BETWEEN PCRE AND PERL
        meta-character matches only at the very end of the string.
 
        (c) If PCRE_EXTRA is set, a backslash followed by a letter with no spe-
-       cial  meaning  is  faulted.  Otherwise,  like  Perl,  the  backslash is
-       ignored. (Perl can be made to issue a warning.)
+       cial meaning is faulted. Otherwise, like Perl, the backslash is quietly
+       ignored.  (Perl can be made to issue a warning.)
 
        (d) If PCRE_UNGREEDY is set, the greediness of the  repetition  quanti-
        fiers is inverted, that is, by default they are not greedy, but if fol-
@@ -2705,7 +2716,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 06 March 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -2938,8 +2949,12 @@ BACKSLASH
 
          \d     any decimal digit
          \D     any character that is not a decimal digit
+         \h     any horizontal whitespace character
+         \H     any character that is not a horizontal whitespace character
          \s     any whitespace character
          \S     any character that is not a whitespace character
+         \v     any vertical whitespace character
+         \V     any character that is not a vertical whitespace character
          \w     any "word" character
          \W     any "non-word" character
 
@@ -2954,9 +2969,49 @@ BACKSLASH
 
        For compatibility with Perl, \s does not match the VT  character  (code
        11).   This makes it different from the the POSIX "space" class. The \s
-       characters are HT (9), LF (10), FF (12), CR (13), and space  (32).  (If
+       characters are HT (9), LF (10), FF (12), CR (13), and  space  (32).  If
        "use locale;" is included in a Perl script, \s may match the VT charac-
-       ter. In PCRE, it never does.)
+       ter. In PCRE, it never does.
+
+       In UTF-8 mode, characters with values greater than 128 never match  \d,
+       \s, or \w, and always match \D, \S, and \W. This is true even when Uni-
+       code character property support is available.  These  sequences  retain
+       their original meanings from before UTF-8 support was available, mainly
+       for efficiency reasons.
+
+       The sequences \h, \H, \v, and \V are Perl 5.10 features. In contrast to
+       the  other  sequences, these do match certain high-valued codepoints in
+       UTF-8 mode.  The horizontal space characters are:
+
+         U+0009     Horizontal tab
+         U+0020     Space
+         U+00A0     Non-break space
+         U+1680     Ogham space mark
+         U+180E     Mongolian vowel separator
+         U+2000     En quad
+         U+2001     Em quad
+         U+2002     En space
+         U+2003     Em space
+         U+2004     Three-per-em space
+         U+2005     Four-per-em space
+         U+2006     Six-per-em space
+         U+2007     Figure space
+         U+2008     Punctuation space
+         U+2009     Thin space
+         U+200A     Hair space
+         U+202F     Narrow no-break space
+         U+205F     Medium mathematical space
+         U+3000     Ideographic space
+
+       The vertical space characters are:
+
+         U+000A     Linefeed
+         U+000B     Vertical tab
+         U+000C     Formfeed
+         U+000D     Carriage return
+         U+0085     Next line
+         U+2028     Line separator
+         U+2029     Paragraph separator
 
        A "word" character is an underscore or any character less than 256 that
        is  a  letter  or  digit.  The definition of letters and digits is con-
@@ -2964,17 +3019,13 @@ BACKSLASH
        specific  matching is taking place (see "Locale support" in the pcreapi
        page). For example, in a French locale such  as  "fr_FR"  in  Unix-like
        systems,  or "french" in Windows, some character codes greater than 128
-       are used for accented letters, and these are matched by \w.
-
-       In UTF-8 mode, characters with values greater than 128 never match  \d,
-       \s, or \w, and always match \D, \S, and \W. This is true even when Uni-
-       code character property support is available. The use of  locales  with
-       Unicode is discouraged.
+       are used for accented letters, and these are matched by \w. The use  of
+       locales with Unicode is discouraged.
 
    Newline sequences
 
        Outside  a  character class, the escape sequence \R matches any Unicode
-       newline sequence. This is an extension to Perl. In non-UTF-8 mode \R is
+       newline sequence. This is a Perl 5.10 feature. In non-UTF-8 mode \R  is
        equivalent to the following:
 
          (?>\r\n|\n|\x0b|\f|\r|\x85)
@@ -3537,6 +3588,37 @@ SUBPATTERNS
        "Saturday".
 
 
+DUPLICATE SUBPATTERN NUMBERS
+
+       Perl 5.10 introduced a feature whereby each alternative in a subpattern
+       uses the same numbers for its capturing parentheses. Such a  subpattern
+       starts  with (?| and is itself a non-capturing subpattern. For example,
+       consider this pattern:
+
+         (?|(Sat)ur|(Sun))day
+
+       Because the two alternatives are inside a (?| group, both sets of  cap-
+       turing  parentheses  are  numbered one. Thus, when the pattern matches,
+       you can look at captured substring number  one,  whichever  alternative
+       matched.  This  construct  is useful when you want to capture part, but
+       not all, of one of a number of alternatives. Inside a (?| group, paren-
+       theses  are  numbered as usual, but the number is reset at the start of
+       each branch. The numbers of any capturing buffers that follow the  sub-
+       pattern  start after the highest number used in any branch. The follow-
+       ing example is taken from the Perl documentation.  The  numbers  under-
+       neath show in which buffer the captured content will be stored.
+
+         # before  ---------------branch-reset----------- after
+         / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+         # 1            2         2  3        2     3     4
+
+       A  backreference  or  a  recursive call to a numbered subpattern always
+       refers to the first one in the pattern with the given number.
+
+       An alternative approach to using this "branch reset" feature is to  use
+       duplicate named subpatterns, as described in the next section.
+
+
 NAMED SUBPATTERNS
 
        Identifying  capturing  parentheses  by number is simple, but it can be
@@ -3576,14 +3658,16 @@ NAMED SUBPATTERNS
          (?<DN>Sat)(?:urday)?
 
        There  are  five capturing substrings, but only one is ever set after a
-       match.  The convenience  function  for  extracting  the  data  by  name
-       returns  the  substring  for  the first (and in this example, the only)
-       subpattern of that name that matched.  This  saves  searching  to  find
-       which  numbered  subpattern  it  was. If you make a reference to a non-
-       unique named subpattern from elsewhere in the  pattern,  the  one  that
-       corresponds  to  the  lowest number is used. For further details of the
-       interfaces for handling named subpatterns, see the  pcreapi  documenta-
-       tion.
+       match.  (An alternative way of solving this problem is to use a "branch
+       reset" subpattern, as described in the previous section.)
+
+       The  convenience  function  for extracting the data by name returns the
+       substring for the first (and in this example, the only)  subpattern  of
+       that  name  that  matched.  This saves searching to find which numbered
+       subpattern it was. If you make a reference to a non-unique  named  sub-
+       pattern  from elsewhere in the pattern, the one that corresponds to the
+       lowest number is used. For further details of the interfaces  for  han-
+       dling named subpatterns, see the pcreapi documentation.
 
 
 REPETITION
@@ -4455,7 +4539,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 29 May 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -4786,19 +4870,9 @@ RE-USING A PRECOMPILED PATTERN
 
 COMPATIBILITY WITH DIFFERENT PCRE RELEASES
 
-       The layout of the control block that is at the start of the  data  that
-       makes  up  a  compiled pattern was changed for release 5.0. If you have
-       any saved patterns that were compiled with  previous  releases  (not  a
-       facility  that  was  previously advertised), you will have to recompile
-       them for release 5.0 and above.
-
-       If you have any saved patterns in UTF-8 mode that use  \p  or  \P  that
-       were  compiled  with any release up to and including 6.4, you will have
-       to recompile them for release 6.5 and above.
-
-       All saved patterns from earlier releases must be recompiled for release
-       7.0  or  higher,  because  there was an internal reorganization at that
-       release.
+       In general, it is safest to  recompile  all  saved  patterns  when  you
+       update  to  a new PCRE release, though not all updates actually require
+       this. Recompiling is definitely needed for release 7.2.
 
 
 AUTHOR
@@ -4810,7 +4884,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 24 April 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
@@ -5545,28 +5619,29 @@ PCRE SAMPLE PROGRAM
        bility  of  matching an empty string. Comments in the code explain what
        is going on.
 
-       If PCRE is installed in the standard include  and  library  directories
-       for  your  system, you should be able to compile the demonstration pro-
-       gram using this command:
+       The demonstration program is automatically built if you use  "./config-
+       ure;make"  to  build PCRE. Otherwise, if PCRE is installed in the stan-
+       dard include and library directories for your  system,  you  should  be
+       able to compile the demonstration program using this command:
 
          gcc -o pcredemo pcredemo.c -lpcre
 
-       If PCRE is installed elsewhere, you may need to add additional  options
-       to  the  command line. For example, on a Unix-like system that has PCRE
-       installed in /usr/local, you  can  compile  the  demonstration  program
+       If  PCRE is installed elsewhere, you may need to add additional options
+       to the command line. For example, on a Unix-like system that  has  PCRE
+       installed  in  /usr/local,  you  can  compile the demonstration program
        using a command like this:
 
          gcc -o pcredemo -I/usr/local/include pcredemo.c \
              -L/usr/local/lib -lpcre
 
-       Once  you  have  compiled the demonstration program, you can run simple
+       Once you have compiled the demonstration program, you  can  run  simple
        tests like this:
 
          ./pcredemo 'cat|dog' 'the cat sat on the mat'
          ./pcredemo -g 'cat|dog' 'the dog sat on the cat'
 
-       Note that there is a  much  more  comprehensive  test  program,  called
-       pcretest,  which  supports  many  more  facilities  for testing regular
+       Note  that  there  is  a  much  more comprehensive test program, called
+       pcretest, which supports  many  more  facilities  for  testing  regular
        expressions and the PCRE library. The pcredemo program is provided as a
        simple coding example.
 
@@ -5574,10 +5649,10 @@ PCRE SAMPLE PROGRAM
        the standard library directory, you may get an error like this when you
        try to run pcredemo:
 
-         ld.so.1:  a.out:  fatal:  libpcre.so.0:  open failed: No such file or
+         ld.so.1: a.out: fatal: libpcre.so.0: open failed:  No  such  file  or
        directory
 
-       This is caused by the way shared library support works  on  those  sys-
+       This  is  caused  by the way shared library support works on those sys-
        tems. You need to add
 
          -R/usr/local/lib
@@ -5594,7 +5669,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 06 March 2007
+       Last updated: 13 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 PCRESTACK(3)                                                      PCRESTACK(3)
@@ -5664,17 +5739,24 @@ PCRE DISCUSSION OF STACK USAGE
        In environments where stack memory is constrained, you  might  want  to
        compile  PCRE to use heap memory instead of stack for remembering back-
        up points. This makes it run a lot more slowly, however. Details of how
-       to do this are given in the pcrebuild documentation.
-
-       In  Unix-like environments, there is not often a problem with the stack
-       unless very long strings are involved,  though  the  default  limit  on
-       stack  size  varies  from system to system. Values from 8Mb to 64Mb are
+       to do this are given in the pcrebuild documentation. When built in this
+       way, instead of using the stack, PCRE obtains and frees memory by call-
+       ing  the  functions  that  are  pointed to by the pcre_stack_malloc and
+       pcre_stack_free variables. By default,  these  point  to  malloc()  and
+       free(),  but you can replace the pointers to cause PCRE to use your own
+       functions. Since the block sizes are always the same,  and  are  always
+       freed in reverse order, it may be possible to implement customized mem-
+       ory handlers that are more efficient than the standard functions.
+
+       In Unix-like environments, there is not often a problem with the  stack
+       unless  very  long  strings  are  involved, though the default limit on
+       stack size varies from system to system. Values from 8Mb  to  64Mb  are
        common. You can find your default limit by running the command:
 
          ulimit -s
 
-       Unfortunately, the effect of running out of  stack  is  often  SIGSEGV,
-       though  sometimes  a more explicit error message is given. You can nor-
+       Unfortunately,  the  effect  of  running out of stack is often SIGSEGV,
+       though sometimes a more explicit error message is given. You  can  nor-
        mally increase the limit on stack size by code such as this:
 
          struct rlimit rlim;
@@ -5682,21 +5764,21 @@ PCRE DISCUSSION OF STACK USAGE
          rlim.rlim_cur = 100*1024*1024;
          setrlimit(RLIMIT_STACK, &rlim);
 
-       This reads the current limits (soft and hard) using  getrlimit(),  then
-       attempts  to  increase  the  soft limit to 100Mb using setrlimit(). You
+       This  reads  the current limits (soft and hard) using getrlimit(), then
+       attempts to increase the soft limit to  100Mb  using  setrlimit().  You
        must do this before calling pcre_exec().
 
-       PCRE has an internal counter that can be used to  limit  the  depth  of
-       recursion,  and  thus cause pcre_exec() to give an error code before it
-       runs out of stack. By default, the limit is very  large,  and  unlikely
-       ever  to operate. It can be changed when PCRE is built, and it can also
+       PCRE  has  an  internal  counter that can be used to limit the depth of
+       recursion, and thus cause pcre_exec() to give an error code  before  it
+       runs  out  of  stack. By default, the limit is very large, and unlikely
+       ever to operate. It can be changed when PCRE is built, and it can  also
        be set when pcre_exec() is called. For details of these interfaces, see
        the pcrebuild and pcreapi documentation.
 
        As a very rough rule of thumb, you should reckon on about 500 bytes per
-       recursion. Thus, if you want to limit your  stack  usage  to  8Mb,  you
-       should  set  the  limit at 16000 recursions. A 64Mb stack, on the other
-       hand, can support around 128000 recursions. The pcretest  test  program
+       recursion.  Thus,  if  you  want  to limit your stack usage to 8Mb, you
+       should set the limit at 16000 recursions. A 64Mb stack,  on  the  other
+       hand,  can  support around 128000 recursions. The pcretest test program
        has a command line option (-S) that can be used to increase the size of
        its stack.
 
@@ -5710,7 +5792,7 @@ AUTHOR
 
 REVISION
 
-       Last updated: 12 March 2007
+       Last updated: 05 June 2007
        Copyright (c) 1997-2007 University of Cambridge.
 ------------------------------------------------------------------------------
 
diff --git a/doc/pcre_fullinfo.3 b/doc/pcre_fullinfo.3
index 08b770f..067a6a8 100644
--- a/doc/pcre_fullinfo.3
+++ b/doc/pcre_fullinfo.3
@@ -36,7 +36,7 @@ The following information is available:
   PCRE_INFO_NAMECOUNT       Number of named subpatterns
   PCRE_INFO_NAMEENTRYSIZE   Size of name table entry
   PCRE_INFO_NAMETABLE       Pointer to name table
-  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried 
+  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried
   PCRE_INFO_OPTIONS         Option bits used for compilation
   PCRE_INFO_SIZE            Size of compiled pattern
   PCRE_INFO_STUDYSIZE       Size of study data
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 0c976f4..bfa4beb 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -676,7 +676,7 @@ out of use. To avoid confusion, they have not been re-used.
   54  DEFINE group contains more than one branch
   55  repeating a DEFINE group is not allowed
   56  inconsistent NEWLINE options"
-  57  \g is not followed by a braced name or an optionally braced 
+  57  \eg is not followed by a braced name or an optionally braced
         non-zero number
   58  (?+ or (?- or (?(+ or (?(- must be followed by a non-zero number
 .
diff --git a/doc/pcrebuild.3 b/doc/pcrebuild.3
index 7f10911..c111bf4 100644
--- a/doc/pcrebuild.3
+++ b/doc/pcrebuild.3
@@ -166,7 +166,7 @@ to the \fBconfigure\fP command. With this configuration, PCRE will use the
 \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP variables to call memory
 management functions. By default these point to \fBmalloc()\fP and
 \fBfree()\fP, but you can replace the pointers so that your own functions are
-used. 
+used.
 .P
 Separate functions are provided rather than using \fBpcre_malloc\fP and
 \fBpcre_free\fP because the usage is very predictable: the block sizes
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 1acb7d7..09e2da0 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -260,14 +260,14 @@ parenthesized subpatterns.
 Another use of backslash is for specifying generic character types. The
 following are always recognized:
 .sp
-  \ed     any decimal digit 
+  \ed     any decimal digit
   \eD     any character that is not a decimal digit
   \eh     any horizontal whitespace character
-  \eH     any character that is not a horizontal whitespace character  
+  \eH     any character that is not a horizontal whitespace character
   \es     any whitespace character
   \eS     any character that is not a whitespace character
   \ev     any vertical whitespace character
-  \eV     any character that is not a vertical whitespace character  
+  \eV     any character that is not a vertical whitespace character
   \ew     any "word" character
   \eW     any "non-word" character
 .sp
@@ -287,11 +287,11 @@ does.
 .P
 In UTF-8 mode, characters with values greater than 128 never match \ed, \es, or
 \ew, and always match \eD, \eS, and \eW. This is true even when Unicode
-character property support is available. These sequences retain their original 
-meanings from before UTF-8 support was available, mainly for efficiency 
+character property support is available. These sequences retain their original
+meanings from before UTF-8 support was available, mainly for efficiency
 reasons.
 .P
-The sequences \eh, \eH, \ev, and \eV are Perl 5.10 features. In contrast to the 
+The sequences \eh, \eH, \ev, and \eV are Perl 5.10 features. In contrast to the
 other sequences, these do match certain high-valued codepoints in UTF-8 mode.
 The horizontal space characters are:
 .sp
@@ -1001,28 +1001,28 @@ the above patterns match "SUNDAY" as well as "Saturday".
 .SH "DUPLICATE SUBPATTERN NUMBERS"
 .rs
 .sp
-Perl 5.10 introduced a feature whereby each alternative in a subpattern uses 
-the same numbers for its capturing parentheses. Such a subpattern starts with 
-(?| and is itself a non-capturing subpattern. For example, consider this 
+Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
+the same numbers for its capturing parentheses. Such a subpattern starts with
+(?| and is itself a non-capturing subpattern. For example, consider this
 pattern:
 .sp
   (?|(Sat)ur|(Sun))day
-.sp   
-Because the two alternatives are inside a (?| group, both sets of capturing 
-parentheses are numbered one. Thus, when the pattern matches, you can look 
-at captured substring number one, whichever alternative matched. This construct 
-is useful when you want to capture part, but not all, of one of a number of 
-alternatives. Inside a (?| group, parentheses are numbered as usual, but the 
+.sp
+Because the two alternatives are inside a (?| group, both sets of capturing
+parentheses are numbered one. Thus, when the pattern matches, you can look
+at captured substring number one, whichever alternative matched. This construct
+is useful when you want to capture part, but not all, of one of a number of
+alternatives. Inside a (?| group, parentheses are numbered as usual, but the
 number is reset at the start of each branch. The numbers of any capturing
-buffers that follow the subpattern start after the highest number used in any 
-branch. The following example is taken from the Perl documentation. 
+buffers that follow the subpattern start after the highest number used in any
+branch. The following example is taken from the Perl documentation.
 The numbers underneath show in which buffer the captured content will be
 stored.
 .sp
   # before  ---------------branch-reset----------- after
   / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
   # 1            2         2  3        2     3     4
-.sp   
+.sp
 A backreference or a recursive call to a numbered subpattern always refers to
 the first one in the pattern with the given number.
 .P
@@ -1079,7 +1079,7 @@ abbreviation. This pattern (ignoring the line breaks) does the job:
   (?<DN>Sat)(?:urday)?
 .sp
 There are five capturing substrings, but only one is ever set after a match.
-(An alternative way of solving this problem is to use a "branch reset" 
+(An alternative way of solving this problem is to use a "branch reset"
 subpattern, as described in the previous section.)
 .P
 The convenience function for extracting the data by name returns the substring
diff --git a/doc/pcreprecompile.3 b/doc/pcreprecompile.3
index e0ff922..aa52542 100644
--- a/doc/pcreprecompile.3
+++ b/doc/pcreprecompile.3
@@ -117,8 +117,8 @@ usual way.
 .SH "COMPATIBILITY WITH DIFFERENT PCRE RELEASES"
 .rs
 .sp
-In general, it is safest to recompile all saved patterns when you update to a 
-new PCRE release, though not all updates actually require this. Recompiling is 
+In general, it is safest to recompile all saved patterns when you update to a
+new PCRE release, though not all updates actually require this. Recompiling is
 definitely needed for release 7.2.
 .
 .
diff --git a/doc/pcrestack.3 b/doc/pcrestack.3
index 1c5955c..7e9bfc9 100644
--- a/doc/pcrestack.3
+++ b/doc/pcrestack.3
@@ -76,8 +76,8 @@ documentation. When built in this way, instead of using the stack, PCRE obtains
 and frees memory by calling the functions that are pointed to by the
 \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP variables. By default, these
 point to \fBmalloc()\fP and \fBfree()\fP, but you can replace the pointers to
-cause PCRE to use your own functions. Since the block sizes are always the 
-same, and are always freed in reverse order, it may be possible to implement 
+cause PCRE to use your own functions. Since the block sizes are always the
+same, and are always freed in reverse order, it may be possible to implement
 customized memory handlers that are more efficient than the standard functions.
 .P
 In Unix-like environments, there is not often a problem with the stack unless
diff --git a/pcre_compile.c b/pcre_compile.c
index bdc4120..c191539 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -2026,7 +2026,7 @@ switch(op_code)
 
     case ESC_W:
     return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
-    
+
     case ESC_h:
     case ESC_H:
     switch(item)
@@ -2053,8 +2053,8 @@ switch(op_code)
       return -next != ESC_h;
       default:
       return -next == ESC_h;
-      }    
-      
+      }
+
     case ESC_v:
     case ESC_V:
     switch(item)
@@ -2069,7 +2069,7 @@ switch(op_code)
       return -next != ESC_v;
       default:
       return -next == ESC_v;
-      }    
+      }
 
     default:
     return FALSE;
@@ -2093,20 +2093,20 @@ switch(op_code)
 
   case OP_NOT_HSPACE:
   return next == -ESC_h;
-  
+
   /* Can't have \S in here because VT matches \S (Perl anomaly) */
-  case OP_VSPACE:  
+  case OP_VSPACE:
   return next == -ESC_V || next == -ESC_d || next == -ESC_w;
 
   case OP_NOT_VSPACE:
-  return next == -ESC_v;  
+  return next == -ESC_v;
 
   case OP_WORDCHAR:
   return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
 
   case OP_NOT_WORDCHAR:
   return next == -ESC_w || next == -ESC_d;
-  
+
   default:
   return FALSE;
   }
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 120c2f6..87f9746 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -63,7 +63,7 @@ applications. */
 
 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
 into others, under special conditions. A gap of 20 between the blocks should be
-enough. The resulting opcodes don't have to be less than 256 because they are 
+enough. The resulting opcodes don't have to be less than 256 because they are
 never stored, so we push them well clear of the normal opcodes. */
 
 #define OP_PROP_EXTRA       300
@@ -585,9 +585,9 @@ for (;;)
           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
           case OP_NOT_HSPACE:
-          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; 
+          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
           case OP_NOT_VSPACE:
-          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; 
+          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
           default: break;
           }
         }
@@ -1105,7 +1105,7 @@ for (;;)
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x000a:
@@ -1116,15 +1116,15 @@ for (;;)
           case 0x2028:
           case 0x2029:
           OK = TRUE;
-          break; 
+          break;
 
           default:
           OK = FALSE;
-          break;  
+          break;
           }
 
         if (OK == (d == OP_VSPACE))
-          { 
+          {
           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1144,7 +1144,7 @@ for (;;)
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x09:      /* HT */
@@ -1168,14 +1168,14 @@ for (;;)
           case 0x3000:    /* IDEOGRAPHIC SPACE */
           OK = TRUE;
           break;
-          
+
           default:
           OK = FALSE;
           break;
           }
-           
+
         if (OK == (d == OP_HSPACE))
-          {          
+          {
           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1346,7 +1346,7 @@ for (;;)
       ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x000a:
@@ -1358,13 +1358,13 @@ for (;;)
           case 0x2029:
           OK = TRUE;
           break;
-          
+
           default:
           OK = FALSE;
           break;
           }
         if (OK == (d == OP_VSPACE))
-          {          
+          {
           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
             {
@@ -1392,7 +1392,7 @@ for (;;)
       ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x09:      /* HT */
@@ -1416,14 +1416,14 @@ for (;;)
           case 0x3000:    /* IDEOGRAPHIC SPACE */
           OK = TRUE;
           break;
-           
+
           default:
           OK = FALSE;
           break;
           }
-           
+
         if (OK == (d == OP_HSPACE))
-          {          
+          {
           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
             {
@@ -1574,7 +1574,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x000a:
@@ -1586,13 +1586,13 @@ for (;;)
           case 0x2029:
           OK = TRUE;
           break;
-          
+
           default:
           OK = FALSE;
           }
-           
+
         if (OK == (d == OP_VSPACE))
-          {         
+          {
           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1616,7 +1616,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        BOOL OK; 
+        BOOL OK;
         switch (c)
           {
           case 0x09:      /* HT */
@@ -1640,14 +1640,14 @@ for (;;)
           case 0x3000:    /* IDEOGRAPHIC SPACE */
           OK = TRUE;
           break;
-           
+
           default:
           OK = FALSE;
           break;
           }
-           
+
         if (OK == (d == OP_HSPACE))
-          {          
+          {
           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1771,8 +1771,8 @@ for (;;)
         case 0x2028:
         case 0x2029:
         break;
-         
-        default:  
+
+        default:
         ADD_NEW(state_offset + 1, 0);
         break;
         }
@@ -1791,7 +1791,7 @@ for (;;)
         case 0x2029:
         ADD_NEW(state_offset + 1, 0);
         break;
-        
+
         default: break;
         }
       break;
@@ -1820,8 +1820,8 @@ for (;;)
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
         case 0x3000:    /* IDEOGRAPHIC SPACE */
         break;
-         
-        default:  
+
+        default:
         ADD_NEW(state_offset + 1, 0);
         break;
         }
diff --git a/pcre_exec.c b/pcre_exec.c
index f5a2340..f62b5fc 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -2941,7 +2941,7 @@ for (;;)
             }
           }
         break;
-        
+
         case OP_HSPACE:
         for (i = 1; i <= min; i++)
           {
@@ -2973,7 +2973,7 @@ for (;;)
             }
           }
         break;
-        
+
         case OP_NOT_VSPACE:
         for (i = 1; i <= min; i++)
           {
@@ -2993,7 +2993,7 @@ for (;;)
             }
           }
         break;
-        
+
         case OP_VSPACE:
         for (i = 1; i <= min; i++)
           {
@@ -3009,7 +3009,7 @@ for (;;)
             case 0x85:      /* NEL */
             case 0x2028:    /* LINE SEPARATOR */
             case 0x2029:    /* PARAGRAPH SEPARATOR */
-            break; 
+            break;
             }
           }
         break;
@@ -3150,7 +3150,7 @@ for (;;)
             case 0x09:      /* HT */
             case 0x20:      /* SPACE */
             case 0xa0:      /* NBSP */
-            break; 
+            break;
             }
           }
         break;
@@ -3184,7 +3184,7 @@ for (;;)
             case 0x0c:      /* FF */
             case 0x0d:      /* CR */
             case 0x85:      /* NEL */
-            break; 
+            break;
             }
           }
         break;
@@ -3845,16 +3845,16 @@ for (;;)
           break;
 
           case OP_NOT_HSPACE:
-          case OP_HSPACE: 
+          case OP_HSPACE:
           for (i = min; i < max; i++)
             {
-            BOOL gotspace; 
+            BOOL gotspace;
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
             switch(c)
-              {  
-              default: gotspace = FALSE; break; 
+              {
+              default: gotspace = FALSE; break;
               case 0x09:      /* HT */
               case 0x20:      /* SPACE */
               case 0xa0:      /* NBSP */
@@ -3875,7 +3875,7 @@ for (;;)
               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
               case 0x3000:    /* IDEOGRAPHIC SPACE */
               gotspace = TRUE;
-              break;  
+              break;
               }
             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
             eptr += len;
@@ -3883,16 +3883,16 @@ for (;;)
           break;
 
           case OP_NOT_VSPACE:
-          case OP_VSPACE: 
+          case OP_VSPACE:
           for (i = min; i < max; i++)
             {
-            BOOL gotspace; 
+            BOOL gotspace;
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
-              default: gotspace = FALSE; break;   
+              default: gotspace = FALSE; break;
               case 0x0a:      /* LF */
               case 0x0b:      /* VT */
               case 0x0c:      /* FF */
@@ -3903,7 +3903,7 @@ for (;;)
               gotspace = TRUE;
               break;
               }
-            if (gotspace == (ctype == OP_NOT_VSPACE)) break;     
+            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
             eptr += len;
             }
           break;
@@ -4040,7 +4040,7 @@ for (;;)
             if (eptr >= md->end_subject) break;
             c = *eptr;
             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
-            eptr++; 
+            eptr++;
             }
           break;
 
@@ -4050,7 +4050,7 @@ for (;;)
             if (eptr >= md->end_subject) break;
             c = *eptr;
             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
-            eptr++; 
+            eptr++;
             }
           break;
 
@@ -4061,7 +4061,7 @@ for (;;)
             c = *eptr;
             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
               break;
-            eptr++; 
+            eptr++;
             }
           break;
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-06-13 15:09:54 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-06-13 15:09:54 +0000
commit	534f2ef23d3192cd74ec86f44c60ff5a7cb957a0 (patch)
tree	5b29d0fe9b45bef3e8ae979251ddfcc9dbe3a39e
parent	a24e9c9aff88d3b9f6022cbdfee49d758cfde0f7 (diff)
download	pcre-534f2ef23d3192cd74ec86f44c60ff5a7cb957a0.tar.gz