From 000f53cf12bbfb4f658d2417a96e03cb3c0b97cc Mon Sep 17 00:00:00 2001 From: ph10 Date: Tue, 28 May 2013 09:13:59 +0000 Subject: Final source file tidies for 8.33 release. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1335 2f5784b3-3f2a-0410-8824-cb99058d5e15 --- ChangeLog | 35 ++++++++------- NEWS | 2 +- PrepareRelease | 2 - configure.ac | 4 +- doc/html/index.html | 14 +++--- doc/html/pcre.html | 4 +- doc/html/pcreapi.html | 2 +- doc/html/pcrebuild.html | 8 ++-- doc/html/pcrecallout.html | 8 ++-- doc/html/pcrecompat.html | 4 +- doc/html/pcrejit.html | 2 +- doc/html/pcrepartial.html | 16 +++---- doc/html/pcrepattern.html | 108 ++++++++++++++++++++++---------------------- doc/html/pcresyntax.html | 4 +- doc/html/pcretest.html | 26 +++++------ doc/html/pcreunicode.html | 4 +- doc/pcre.3 | 8 ++-- doc/pcre.txt | 76 +++++++++++++++---------------- doc/pcreapi.3 | 2 +- doc/pcrebuild.3 | 12 ++--- doc/pcrecallout.3 | 8 ++-- doc/pcrecompat.3 | 4 +- doc/pcrejit.3 | 2 +- doc/pcrepartial.3 | 16 +++---- doc/pcrepattern.3 | 112 +++++++++++++++++++++++----------------------- doc/pcresyntax.3 | 4 +- doc/pcretest.1 | 26 +++++------ doc/pcreunicode.3 | 4 +- pcre_string_utils.c | 2 +- pcre_xclass.c | 20 ++++----- pcregrep.c | 10 ++--- pcretest.c | 6 +-- 32 files changed, 277 insertions(+), 278 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3805304..21cada4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ ChangeLog for PCRE ------------------ -Version 8.33 28-April-2013 +Version 8.33 28-May-2013 -------------------------- 1. Added 'U' to some constants that are compared to unsigned integers, to @@ -36,8 +36,8 @@ Version 8.33 28-April-2013 9. Optimizing fast_forward_start_bits in JIT. -10. Adding experimental support for callouts in JIT, and fixing some - issues revealed during this work. Namely: +10. Adding support for callouts in JIT, and fixing some issues revealed + during this work. Namely: (a) Unoptimized capturing brackets incorrectly reset on backtrack. @@ -125,7 +125,8 @@ Version 8.33 28-April-2013 have been moved to test 1, because either Perl or PCRE has changed, and these tests are now compatible. -32. Control verbs are handled in the same way in JIT and interpreter. +32. Backtracking control verbs are now handled in the same way in JIT and + interpreter. 33. An opening parenthesis in a MARK/PRUNE/SKIP/THEN name in a pattern that contained a forward subroutine reference caused a compile error. @@ -148,23 +149,23 @@ Version 8.33 28-April-2013 39. Try madvise first before posix_madvise. 40. Change 7 for PCRE 7.9 made it impossible for pcregrep to find empty lines - with a pattern such as ^$. It has taken 4 years for anybody to notice! The - original change locked out all matches of empty strings. This has been - changed so that one match of an empty string per line is recognized. - Subsequent searches on the same line (for colouring or for --only-matching, - for example) do not recognize empty strings. - -41. Applied a user patch to fix a number of spelling mistakes in comments. + with a pattern such as ^$. It has taken 4 years for anybody to notice! The + original change locked out all matches of empty strings. This has been + changed so that one match of an empty string per line is recognized. + Subsequent searches on the same line (for colouring or for --only-matching, + for example) do not recognize empty strings. + +41. Applied a user patch to fix a number of spelling mistakes in comments. 42. Data lines longer than 65536 caused pcretest to crash. -43. Clarified the data type for length and startoffset arguments for pcre_exec - and pcre_dfa_exec in the function-specific man pages, where they were - explicitly stated to be in bytes, never having been updated. I also added +43. Clarified the data type for length and startoffset arguments for pcre_exec + and pcre_dfa_exec in the function-specific man pages, where they were + explicitly stated to be in bytes, never having been updated. I also added some clarification to the pcreapi man page. - + 44. A call to pcre_dfa_exec() with an output vector size less than 2 caused - a segmentation fault. + a segmentation fault. Version 8.32 30-November-2012 @@ -1674,7 +1675,7 @@ Version 7.9 11-Apr-09 7. A pattern that could match an empty string could cause pcregrep to loop; it doesn't make sense to accept an empty string match in pcregrep, so I have locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this - seems to be how GNU grep behaves. [But see later change 40 for release + seems to be how GNU grep behaves. [But see later change 40 for release 8.33.] 8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at diff --git a/NEWS b/NEWS index cc384a6..2ff69cb 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ News about PCRE releases ------------------------ -Release 8.33 28-April-2013 +Release 8.33 28-May-2013 -------------------------- A number of bugs are fixed, and some performance improvements have been made. diff --git a/PrepareRelease b/PrepareRelease index f87758e..9891e08 100755 --- a/PrepareRelease +++ b/PrepareRelease @@ -245,8 +245,6 @@ files="\ pcre_stringpiece_unittest.cc \ perltest.pl \ ucp.h \ - ucpinternal.h \ - ucptable.h \ makevp.bat \ pcre.def \ libpcre.def \ diff --git a/configure.ac b/configure.ac index 1482292..e70ceae 100644 --- a/configure.ac +++ b/configure.ac @@ -10,8 +10,8 @@ dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre_major, [8]) m4_define(pcre_minor, [33]) -m4_define(pcre_prerelease, [-RC1]) -m4_define(pcre_date, [2013-04-28]) +m4_define(pcre_prerelease, []) +m4_define(pcre_date, [2013-05-28]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. diff --git a/doc/html/index.html b/doc/html/index.html index 887f4d7..352c55d 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -1,10 +1,10 @@ - +--> PCRE specification @@ -96,7 +96,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit functions.

- +
@@ -162,7 +162,7 @@ functions. - + diff --git a/doc/html/pcre.html b/doc/html/pcre.html index dde5626..692f651 100644 --- a/doc/html/pcre.html +++ b/doc/html/pcre.html @@ -100,9 +100,9 @@ function makes it possible for a client to discover which features are available. The features themselves are described in the pcrebuild page. Documentation about building PCRE for various operating systems can be -found in the +found in the README -and +and NON-AUTOTOOLS_BUILD files in the source distribution.

diff --git a/doc/html/pcreapi.html b/doc/html/pcreapi.html index 34fa096..2a00d9b 100644 --- a/doc/html/pcreapi.html +++ b/doc/html/pcreapi.html @@ -1995,7 +1995,7 @@ If startoffset is negative or greater than the length of the subject, pcre_exec() returns PCRE_ERROR_BADOFFSET. When the starting offset is zero, the search for a match starts at the beginning of the subject, and this is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point -to the start of a character, or the end of the subject (in UTF-32 mode, one +to the start of a character, or the end of the subject (in UTF-32 mode, one data unit equals one character, so all offsets are valid). Unlike the pattern string, the subject may contain binary zeroes.

diff --git a/doc/html/pcrebuild.html b/doc/html/pcrebuild.html index 9830c3e..03c8cbe 100644 --- a/doc/html/pcrebuild.html +++ b/doc/html/pcrebuild.html @@ -40,7 +40,7 @@ man page, in case the conversion went wrong.
BUILDING PCRE

-PCRE is distributed with a configure script that can be used to build the +PCRE is distributed with a configure script that can be used to build the library in Unix-like environments using the applications known as Autotools. Also in the distribution are files to support building using CMake instead of configure. The text file @@ -51,7 +51,7 @@ systems. There is a lot more information about building PCRE without using Autotools (including information about using CMake and building "by hand") in the text file called NON-AUTOTOOLS-BUILD. -You should consult this file as well as the +You should consult this file as well as the README file if you are building in a non-Unix-like environment.

@@ -66,8 +66,8 @@ using the GUI facility of cmake-gui if you are using CMake instead of configure to build PCRE.

-If you are not using Autotools or CMake, option selection can be done by -editing the config.h file, or by passing parameter settings to the +If you are not using Autotools or CMake, option selection can be done by +editing the config.h file, or by passing parameter settings to the compiler, as described in NON-AUTOTOOLS-BUILD.

diff --git a/doc/html/pcrecallout.html b/doc/html/pcrecallout.html index da111d7..7233bb6 100644 --- a/doc/html/pcrecallout.html +++ b/doc/html/pcrecallout.html @@ -64,14 +64,14 @@ it is processed as if it were

Notice that there is a callout before and after each parenthesis and -alternation bar. If the pattern contains a conditional group whose condition is -an assertion, an automatic callout is inserted immediately before the +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the condition. Such a callout may also be inserted explicitly, for example:
   (?(?C9)(?=a)ab|de)
 
-This applies only to assertion conditions (because they are themselves -independent groups). +This applies only to assertion conditions (because they are themselves +independent groups).

Automatic callouts can be used for tracking the progress of pattern matching. diff --git a/doc/html/pcrecompat.html b/doc/html/pcrecompat.html index cf18559..14e20c5 100644 --- a/doc/html/pcrecompat.html +++ b/doc/html/pcrecompat.html @@ -116,7 +116,7 @@ triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE, but there are examples where it differs.

-12. Most backtracking verbs in assertions have their normal actions. They are +12. Most backtracking verbs in assertions have their normal actions. They are not confined to the assertion.

@@ -142,7 +142,7 @@ Perl allows white space between ( and ? but PCRE never does, even if the PCRE_EXTENDED option is set.

-16. In PCRE, the upper/lower case character properties Lu and Ll are not +16. In PCRE, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \p{Lu} always matches an upper case letter. I think Perl has changed in this respect; in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all diff --git a/doc/html/pcrejit.html b/doc/html/pcrejit.html index 3202506..210f1da 100644 --- a/doc/html/pcrejit.html +++ b/doc/html/pcrejit.html @@ -173,7 +173,7 @@ PCRE_PARTIAL_SOFT.

The only unsupported pattern items are \C (match a single data unit) when -running in a UTF mode, and a callout immediately before an assertion condition +running in a UTF mode, and a callout immediately before an assertion condition in a conditional group.


RETURN VALUES FROM JIT EXECUTION
diff --git a/doc/html/pcrepartial.html b/doc/html/pcrepartial.html index 1ae06bb..98d34f0 100644 --- a/doc/html/pcrepartial.html +++ b/doc/html/pcrepartial.html @@ -95,8 +95,8 @@ of the subject. If there are at least two slots in the offsets vector when a partial match is returned, the first slot is set to the offset of the earliest character that was inspected. For convenience, the second offset points to the end of the -subject so that a substring can easily be identified. If there are at least -three slots in the offsets vector, the third slot is set to the offset of the +subject so that a substring can easily be identified. If there are at least +three slots in the offsets vector, the third slot is set to the offset of the character where matching started.

@@ -110,7 +110,7 @@ inspected while carrying out the match. For example, consider this pattern: This pattern matches "123", but only if it is preceded by "abc". If the subject string is "xyzabc12", the first two offsets after a partial match are for the substring "abc12", because all these characters were inspected. However, the -third offset is set to 6, because that is the offset where matching began. +third offset is set to 6, because that is the offset where matching began.

What happens when a partial match is identified depends on which of the two @@ -337,8 +337,8 @@ processing time is needed.

Note: If the pattern contains lookbehind assertions, or \K, or starts with \b or \B, the string that is returned for a partial match includes -characters that precede the start of what would be returned for a complete -match, because it contains all the characters that were inspected during the +characters that precede the start of what would be returned for a complete +match, because it contains all the characters that were inspected during the partial match.


ISSUES WITH MULTI-SEGMENT MATCHING
@@ -369,8 +369,8 @@ characters should be retained.) From release 8.33, there is a more accurate way of deciding which characters to retain. Instead of subtracting the length of the longest lookbehind from the earliest inspected character (offsets[0]), the match start position -(offsets[2]) should be used, and the next match attempt started at the -offsets[2] character by setting the startoffset argument of +(offsets[2]) should be used, and the next match attempt started at the +offsets[2] character by setting the startoffset argument of pcre_exec() or pcre_dfa_exec().

@@ -380,7 +380,7 @@ and 5. This indicates that the matching process that gave a partial match started at offset 5, but the characters "123a" were all inspected. The maximum lookbehind for that pattern is 3, so taking that away from 5 shows that we need only keep "123a", and the next match attempt can be started at offset 3 (that -is, at "a") when further characters have been added. When the match start is +is, at "a") when further characters have been added. When the match start is not the earliest inspected character, pcretest shows it explicitly:

     re> "(?<=123)abc"
diff --git a/doc/html/pcrepattern.html b/doc/html/pcrepattern.html
index 064b8dc..7e837e5 100644
--- a/doc/html/pcrepattern.html
+++ b/doc/html/pcrepattern.html
@@ -74,7 +74,7 @@ page.
 


SPECIAL START-OF-PATTERN ITEMS

-A number of options that can be passed to pcre_compile() can also be set +A number of options that can be passed to pcre_compile() can also be set by special items at the start of a pattern. These are not Perl-compatible, but are provided to make these options accessible to pattern writers who are not able to change the program that processes the pattern. Any number of these @@ -107,7 +107,7 @@ places below. There is also a summary of features in the page.

-Some applications that allow their users to supply patterns may wish to +Some applications that allow their users to supply patterns may wish to restrict them to non-UTF data for security reasons. If the PCRE_NEVER_UTF option is set at compile time, (*UTF) etc. are not allowed, and their appearance causes an error. @@ -180,23 +180,23 @@ convention. Setting match and recursion limits

-The caller of pcre_exec() can set a limit on the number of times the -internal match() function is called and on the maximum depth of +The caller of pcre_exec() can set a limit on the number of times the +internal match() function is called and on the maximum depth of recursive calls. These facilities are provided to catch runaway matches that are provoked by patterns with huge matching trees (a typical example is a pattern with nested unlimited repeats) and to avoid running out of system stack by too much recursion. When one of these limits is reached, pcre_exec() -gives an error return. The limits can also be set by items at the start of the +gives an error return. The limits can also be set by items at the start of the pattern of the form

   (*LIMIT_MATCH=d)
   (*LIMIT_RECURSION=d)
 
-where d is any number of decimal digits. However, the value of the setting must -be less than the value set by the caller of pcre_exec() for it to have -any effect. In other words, the pattern writer can lower the limit set by the -programmer, but not raise it. If there is more than one setting of one of these -limits, the lower value is used. +where d is any number of decimal digits. However, the value of the setting must +be less than the value set by the caller of pcre_exec() for it to have +any effect. In other words, the pattern writer can lower the limit set by the +programmer, but not raise it. If there is more than one setting of one of these +limits, the lower value is used.


EBCDIC CHARACTER CODES

@@ -848,7 +848,7 @@ Unicode table.

Specifying caseless matching does not affect these escape sequences. For -example, \p{Lu} always matches only upper case letters. This is different from +example, \p{Lu} always matches only upper case letters. This is different from the behaviour of current versions of Perl.

@@ -914,7 +914,7 @@ PCRE's additional properties As well as the standard Unicode properties described above, PCRE supports four more that make it possible to convert traditional escape sequences such as \w and \s and POSIX character classes to use Unicode properties. PCRE uses these -non-standard, non-Perl properties internally when PCRE_UCP is set. However, +non-standard, non-Perl properties internally when PCRE_UCP is set. However, they may also be used explicitly. These properties are:

   Xan   Any alphanumeric character
@@ -929,13 +929,13 @@ Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
 same characters as Xan, plus underscore.
 

-There is another non-standard property, Xuc, which matches any character that -can be represented by a Universal Character Name in C++ and other programming -languages. These are the characters $, @, ` (grave accent), and all characters -with Unicode code points greater than or equal to U+00A0, except for the -surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are excluded. (Universal Character Names are of the form \uHHHH or \UHHHHHHHH -where H is a hexadecimal digit. Note that the Xuc property does not match these +where H is a hexadecimal digit. Note that the Xuc property does not match these sequences but the characters that they represent.)


@@ -1410,7 +1410,7 @@ above. There are also the (*UTF8), (*UTF16),(*UTF32), and (*UCP) leading sequences that can be used to set UTF and Unicode property modes; they are equivalent to setting the PCRE_UTF8, PCRE_UTF16, PCRE_UTF32 and the PCRE_UCP options, respectively. The (*UTF) sequence is a generic version that can be -used with any of the libraries. However, the application can set the +used with any of the libraries. However, the application can set the PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences.


SUBPATTERNS
@@ -2005,7 +2005,7 @@ except that it does not cause the current matching position to be changed. Assertion subpatterns are not capturing subpatterns. If such an assertion contains capturing subpatterns within it, these are counted for the purposes of numbering the capturing subpatterns in the whole pattern. However, substring -capturing is carried out only for positive assertions. (Perl sometimes, but not +capturing is carried out only for positive assertions. (Perl sometimes, but not always, does do capturing in negative assertions.)

@@ -2666,8 +2666,8 @@ explicit callout may also be set at this position, as in this example:

   (?(?C9)(?=a)abc|def)
 
-Note that this applies only to assertion conditions, not to other types of -condition. +Note that this applies only to assertion conditions, not to other types of +condition.

During matching, when PCRE reaches a callout point, the external function is @@ -2690,7 +2690,7 @@ remarks apply to the PCRE features described in this section.

The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. They are generally of the form -(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving +(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving differently depending on whether or not a name is present. A name is any sequence of characters that does not include a closing parenthesis. The maximum length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit @@ -2702,15 +2702,15 @@ Any number of these verbs may occur in a pattern. Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using one of the traditional matching functions, because these use a backtracking algorithm. With the -exception of (*FAIL), which behaves like a failing negative assertion, the +exception of (*FAIL), which behaves like a failing negative assertion, the backtracking control verbs cause an error if encountered by a DFA matching function.

-The behaviour of these verbs in +The behaviour of these verbs in repeated groups, assertions, -and in +and in subpatterns called as subroutines (whether or not recursively) is documented below.

@@ -2748,7 +2748,7 @@ followed by a name. This verb causes the match to end successfully, skipping the remainder of the pattern. However, when it is inside a subpattern that is called as a subroutine, only that subpattern is ended successfully. Matching then continues -at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the assertion succeeds; in a negative assertion, the assertion fails.

@@ -2840,7 +2840,7 @@ Verbs that act after backtracking The following verbs do nothing when they are encountered. Matching continues with what follows, but if there is no subsequent match, causing a backtrack to the verb, a failure is forced. That is, backtracking cannot pass to the left of -the verb. However, when one of these verbs appears inside an atomic group or an +the verb. However, when one of these verbs appears inside an atomic group or an assertion that is true, its effect is confined to that group, because once the group has been matched, there is never any backtracking into it. In this situation, backtracking can "jump back" to the left of the entire atomic group @@ -2850,13 +2850,13 @@ applies in subroutine calls.)

These verbs differ in exactly what kind of failure occurs when backtracking reaches them. The behaviour described below is what happens when the verb is -not in a subroutine or an assertion. Subsequent sections cover these special +not in a subroutine or an assertion. Subsequent sections cover these special cases.

   (*COMMIT)
 
This verb, which may not be followed by a name, causes the whole match to fail -outright if there is a later matching failure that causes backtracking to reach +outright if there is a later matching failure that causes backtracking to reach it. Even if the pattern is unanchored, no further attempts to find a match by advancing the starting point take place. If (*COMMIT) is the only backtracking verb that is encountered, once it has been passed pcre_exec() is @@ -2871,8 +2871,8 @@ recently passed (*MARK) in the path is passed back when (*COMMIT) forces a match failure.

-If there is more than one backtracking verb in a pattern, a different one that -follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a match does not always guarantee that a match must be at this starting point.

@@ -2906,7 +2906,7 @@ expressed in any other way. In an anchored pattern (*PRUNE) has the same effect as (*COMMIT).

-The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). +The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is remembered for passing back to the caller. However, (*SKIP:NAME) searches only for names set with (*MARK).

@@ -2929,7 +2929,7 @@ instead of skipping on to "c".
 
   (*SKIP:NAME)
 
-When (*SKIP) has an associated name, its behaviour is modified. When it is +When (*SKIP) has an associated name, its behaviour is modified. When it is triggered, the previous path through the pattern is searched for the most recent (*MARK) that has the same name. If one is found, the "bumpalong" advance is to the subject position that corresponds to that (*MARK) instead of to where @@ -2937,12 +2937,12 @@ is to the subject position that corresponds to that (*MARK) instead of to where (*SKIP) is ignored.

-Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME).

   (*THEN) or (*THEN:NAME)
 
-This verb causes a skip to the next innermost alternative when backtracking +This verb causes a skip to the next innermost alternative when backtracking reaches it. That is, it cancels any further backtracking within the current alternative. Its name comes from the observation that it can be used for a pattern-based if-then-else block: @@ -2957,7 +2957,7 @@ more alternatives, so there is a backtrack to whatever came before the entire group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).

-The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). +The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is remembered for passing back to the caller. However, (*SKIP:NAME) searches only for names set with (*MARK).

@@ -3017,8 +3017,8 @@ etc. are complex pattern fragments:
   (A(*COMMIT)B(*THEN)C|ABD)
 
-If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to -fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes the next alternative (ABD) to be tried. This behaviour is consistent, but is not always the same as Perl's. It means that if two or more backtracking verbs appear in succession, all the the last of them has no effect. Consider this @@ -3026,21 +3026,21 @@ example:
   ...(*COMMIT)(*PRUNE)...
 
-If there is a matching failure to the right, backtracking onto (*PRUNE) cases -it to be triggered, and its action is taken. There can never be a backtrack -onto (*COMMIT). +If there is a matching failure to the right, backtracking onto (*PRUNE) cases +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT).


Backtracking verbs in repeated groups

-PCRE differs from Perl in its handling of backtracking verbs in repeated +PCRE differs from Perl in its handling of backtracking verbs in repeated groups. For example, consider:

   /(a(*COMMIT)b)+ac/
 
-If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in -the second repeat of the group acts. +If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in +the second repeat of the group acts.


Backtracking verbs in assertions @@ -3049,8 +3049,8 @@ Backtracking verbs in assertions (*FAIL) in an assertion has its normal effect: it forces an immediate backtrack.

-(*ACCEPT) in a positive assertion causes the assertion to succeed without any -further processing. In a negative assertion, (*ACCEPT) causes the assertion to +(*ACCEPT) in a positive assertion causes the assertion to succeed without any +further processing. In a negative assertion, (*ACCEPT) causes the assertion to fail without any further processing.

@@ -3062,17 +3062,17 @@ the assertion.

Negative assertions are, however, different, in order to ensure that changing a positive assertion into a negative assertion changes its result. Backtracking -into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true, -without considering any further alternative branches in the assertion. +into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true, +without considering any further alternative branches in the assertion. Backtracking into (*THEN) causes it to skip to the next enclosing alternative -within the assertion (the normal behaviour), but if the assertion does not have +within the assertion (the normal behaviour), but if the assertion does not have such an alternative, (*THEN) behaves like (*PRUNE).


Backtracking verbs in subroutines

-These behaviours occur whether or not the subpattern is called recursively. +These behaviours occur whether or not the subpattern is called recursively. Perl's treatment of subroutines is different in some cases.

@@ -3080,8 +3080,8 @@ Perl's treatment of subroutines is different in some cases. an immediate backtrack.

-(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to -succeed without any further processing. Matching then continues after the +(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the subroutine call.

@@ -3090,7 +3090,7 @@ the subroutine match to fail.

(*THEN) skips to the next alternative in the innermost enclosing group within -the subpattern that has alternatives. If there is no such group within the +the subpattern that has alternatives. If there is no such group within the subpattern, (*THEN) causes the subroutine match to fail.


SEE ALSO
diff --git a/doc/html/pcresyntax.html b/doc/html/pcresyntax.html index ea1c2bd..b32e8b1 100644 --- a/doc/html/pcresyntax.html +++ b/doc/html/pcresyntax.html @@ -151,8 +151,8 @@ PCRE_UCP option. Xan Alphanumeric: union of properties L and N Xps POSIX space: property Z or tab, NL, VT, FF, CR Xsp Perl space: property Z or tab, NL, FF, CR - Xuc Univerally-named character: one that can be - represented by a Universal Character Name + Xuc Univerally-named character: one that can be + represented by a Universal Character Name Xwd Perl word: property Xan or underscore

diff --git a/doc/html/pcretest.html b/doc/html/pcretest.html index bc50ec5..17d1ade 100644 --- a/doc/html/pcretest.html +++ b/doc/html/pcretest.html @@ -67,11 +67,11 @@ but without much justification.
INPUT DATA FORMAT

Input to pcretest is processed line by line, either by calling the C -library's fgets() function, or via the libreadline library (see -below). In Unix-like environments, fgets() treats any bytes other than -newline as data characters. However, in some Windows environments character 26 -(hex 1A) causes an immediate end of file, and no further data is read. For -maximum portability, therefore, it is safest to use only ASCII characters in +library's fgets() function, or via the libreadline library (see +below). In Unix-like environments, fgets() treats any bytes other than +newline as data characters. However, in some Windows environments character 26 +(hex 1A) causes an immediate end of file, and no further data is read. For +maximum portability, therefore, it is safest to use only ASCII characters in pcretest input files.


PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
@@ -120,7 +120,7 @@ internal form is output after compilation.

-C Output the version number of the PCRE library, and all available information -about the optional features that are included, and then exit with zero exit +about the optional features that are included, and then exit with zero exit code. All other options are ignored.

@@ -132,14 +132,14 @@ following options output the value and set the exit code as indicated: ebcdic-nl the code for LF (= NL) in an EBCDIC environment: 0x15 or 0x25 0 if used in an ASCII environment - exit code is always 0 + exit code is always 0 linksize the configured internal link size (2, 3, or 4) - exit code is set to the link size + exit code is set to the link size newline the default newline setting: CR, LF, CRLF, ANYCRLF, or ANY - exit code is always 0 + exit code is always 0

-The following options output 1 for true or 0 for false, and set the exit code +The following options output 1 for true or 0 for false, and set the exit code to the same value:
   ebcdic     compiled for an EBCDIC environment
@@ -148,10 +148,10 @@ to the same value:
   pcre32     the 32-bit library was built
   pcre8      the 8-bit library was built
   ucp        Unicode property support is available
-  utf        UTF-8 and/or UTF-16 and/or UTF-32 support 
+  utf        UTF-8 and/or UTF-16 and/or UTF-32 support
                is available
 
-If an unknown option is given, an error message is output; the exit code is 0. +If an unknown option is given, an error message is output; the exit code is 0.

-d @@ -339,7 +339,7 @@ fall into several groups that are described in detail in the following sections.

   /8              set UTF mode
-  /9              set PCRE_NEVER_UTF (locks out UTF mode) 
+  /9              set PCRE_NEVER_UTF (locks out UTF mode)
   /?              disable UTF validity check
   /+              show remainder of subject after match
   /=              show all captures (not just those that are set)
diff --git a/doc/html/pcreunicode.html b/doc/html/pcreunicode.html
index 1b05b19..ab36bc6 100644
--- a/doc/html/pcreunicode.html
+++ b/doc/html/pcreunicode.html
@@ -85,8 +85,8 @@ place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
 which are themselves derived from the Unicode specification. Earlier releases
 of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
 values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
-to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called 
-"non-character" code points are no longer excluded because Unicode corrigendum 
+to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called
+"non-character" code points are no longer excluded because Unicode corrigendum
 #9 makes it clear that they should not be.)
 

diff --git a/doc/pcre.3 b/doc/pcre.3 index dc62501..c2f9164 100644 --- a/doc/pcre.3 +++ b/doc/pcre.3 @@ -96,15 +96,15 @@ available. The features themselves are described in the \fBpcrebuild\fP .\" page. Documentation about building PCRE for various operating systems can be -found in the +found in the .\" HTML .\" -\fBREADME\fP +\fBREADME\fP .\" -and +and .\" HTML .\" -\fBNON-AUTOTOOLS_BUILD\fP +\fBNON-AUTOTOOLS_BUILD\fP .\" files in the source distribution. .P diff --git a/doc/pcre.txt b/doc/pcre.txt index 9613e0e..193203e 100644 --- a/doc/pcre.txt +++ b/doc/pcre.txt @@ -180,8 +180,8 @@ REVISION Last updated: 13 May 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE(3) Library Functions Manual PCRE(3) @@ -512,8 +512,8 @@ REVISION Last updated: 12 May 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE(3) Library Functions Manual PCRE(3) @@ -841,8 +841,8 @@ REVISION Last updated: 12 May 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREBUILD(3) Library Functions Manual PCREBUILD(3) @@ -1344,8 +1344,8 @@ REVISION Last updated: 12 May 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREMATCHING(3) Library Functions Manual PCREMATCHING(3) @@ -1553,8 +1553,8 @@ REVISION Last updated: 08 January 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREAPI(3) Library Functions Manual PCREAPI(3) @@ -4196,8 +4196,8 @@ REVISION Last updated: 12 May 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRECALLOUT(3) Library Functions Manual PCRECALLOUT(3) @@ -4414,8 +4414,8 @@ REVISION Last updated: 03 March 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRECOMPAT(3) Library Functions Manual PCRECOMPAT(3) @@ -4603,8 +4603,8 @@ REVISION Last updated: 19 March 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREPATTERN(3) Library Functions Manual PCREPATTERN(3) @@ -7438,8 +7438,8 @@ REVISION Last updated: 26 April 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRESYNTAX(3) Library Functions Manual PCRESYNTAX(3) @@ -7822,8 +7822,8 @@ REVISION Last updated: 26 April 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREUNICODE(3) Library Functions Manual PCREUNICODE(3) @@ -8041,8 +8041,8 @@ REVISION Last updated: 27 February 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREJIT(3) Library Functions Manual PCREJIT(3) @@ -8454,8 +8454,8 @@ REVISION Last updated: 17 March 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREPARTIAL(3) Library Functions Manual PCREPARTIAL(3) @@ -8929,8 +8929,8 @@ REVISION Last updated: 20 February 2013 Copyright (c) 1997-2013 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREPRECOMPILE(3) Library Functions Manual PCREPRECOMPILE(3) @@ -9064,8 +9064,8 @@ REVISION Last updated: 24 June 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREPERFORM(3) Library Functions Manual PCREPERFORM(3) @@ -9234,8 +9234,8 @@ REVISION Last updated: 25 August 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCREPOSIX(3) Library Functions Manual PCREPOSIX(3) @@ -9499,8 +9499,8 @@ REVISION Last updated: 09 January 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRECPP(3) Library Functions Manual PCRECPP(3) @@ -9842,8 +9842,8 @@ REVISION Last updated: 08 January 2012 ------------------------------------------------------------------------------ - - + + PCRESAMPLE(3) Library Functions Manual PCRESAMPLE(3) @@ -9991,8 +9991,8 @@ REVISION Last updated: 04 May 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRESTACK(3) Library Functions Manual PCRESTACK(3) @@ -10177,5 +10177,5 @@ REVISION Last updated: 24 June 2012 Copyright (c) 1997-2012 University of Cambridge. ------------------------------------------------------------------------------ - - + + diff --git a/doc/pcreapi.3 b/doc/pcreapi.3 index ac111f1..4718c7f 100644 --- a/doc/pcreapi.3 +++ b/doc/pcreapi.3 @@ -2003,7 +2003,7 @@ If \fIstartoffset\fP is negative or greater than the length of the subject, \fBpcre_exec()\fP returns PCRE_ERROR_BADOFFSET. When the starting offset is zero, the search for a match starts at the beginning of the subject, and this is by far the most common case. In UTF-8 or UTF-16 mode, the offset must point -to the start of a character, or the end of the subject (in UTF-32 mode, one +to the start of a character, or the end of the subject (in UTF-32 mode, one data unit equals one character, so all offsets are valid). Unlike the pattern string, the subject may contain binary zeroes. .P diff --git a/doc/pcrebuild.3 b/doc/pcrebuild.3 index 5767d0e..403f2ae 100644 --- a/doc/pcrebuild.3 +++ b/doc/pcrebuild.3 @@ -6,13 +6,13 @@ PCRE - Perl-compatible regular expressions .SH "BUILDING PCRE" .rs .sp -PCRE is distributed with a \fBconfigure\fP script that can be used to build the +PCRE is distributed with a \fBconfigure\fP script that can be used to build the library in Unix-like environments using the applications known as Autotools. Also in the distribution are files to support building using \fBCMake\fP instead of \fBconfigure\fP. The text file .\" HTML .\" -\fBREADME\fP +\fBREADME\fP .\" contains general information about building with Autotools (some of which is repeated below), and also has some comments about building on various operating @@ -23,10 +23,10 @@ hand") in the text file called .\" \fBNON-AUTOTOOLS-BUILD\fP. .\" -You should consult this file as well as the +You should consult this file as well as the .\" HTML .\" -\fBREADME\fP +\fBREADME\fP .\" file if you are building in a non-Unix-like environment. . @@ -42,8 +42,8 @@ same options can be selected in both Unix-like and non-Unix-like environments using the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead of \fBconfigure\fP to build PCRE. .P -If you are not using Autotools or \fBCMake\fP, option selection can be done by -editing the \fBconfig.h\fP file, or by passing parameter settings to the +If you are not using Autotools or \fBCMake\fP, option selection can be done by +editing the \fBconfig.h\fP file, or by passing parameter settings to the compiler, as described in .\" HTML .\" diff --git a/doc/pcrecallout.3 b/doc/pcrecallout.3 index 19baf23..79e2bb9 100644 --- a/doc/pcrecallout.3 +++ b/doc/pcrecallout.3 @@ -41,14 +41,14 @@ it is processed as if it were (?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) .sp Notice that there is a callout before and after each parenthesis and -alternation bar. If the pattern contains a conditional group whose condition is -an assertion, an automatic callout is inserted immediately before the +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the condition. Such a callout may also be inserted explicitly, for example: .sp (?(?C9)(?=a)ab|de) .sp -This applies only to assertion conditions (because they are themselves -independent groups). +This applies only to assertion conditions (because they are themselves +independent groups). .P Automatic callouts can be used for tracking the progress of pattern matching. The diff --git a/doc/pcrecompat.3 b/doc/pcrecompat.3 index 755c9ec..2c109ea 100644 --- a/doc/pcrecompat.3 +++ b/doc/pcrecompat.3 @@ -103,7 +103,7 @@ A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE, but there are examples where it differs. .P -12. Most backtracking verbs in assertions have their normal actions. They are +12. Most backtracking verbs in assertions have their normal actions. They are not confined to the assertion. .P 13. There are some differences that are concerned with the settings of captured @@ -125,7 +125,7 @@ between the ( and ? at the start of a subpattern. If the /x modifier is set, Perl allows white space between ( and ? but PCRE never does, even if the PCRE_EXTENDED option is set. .P -16. In PCRE, the upper/lower case character properties Lu and Ll are not +16. In PCRE, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \ep{Lu} always matches an upper case letter. I think Perl has changed in this respect; in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all diff --git a/doc/pcrejit.3 b/doc/pcrejit.3 index 65e50de..341403f 100644 --- a/doc/pcrejit.3 +++ b/doc/pcrejit.3 @@ -152,7 +152,7 @@ PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. .P The only unsupported pattern items are \eC (match a single data unit) when -running in a UTF mode, and a callout immediately before an assertion condition +running in a UTF mode, and a callout immediately before an assertion condition in a conditional group. . . diff --git a/doc/pcrepartial.3 b/doc/pcrepartial.3 index a860825..4041838 100644 --- a/doc/pcrepartial.3 +++ b/doc/pcrepartial.3 @@ -69,8 +69,8 @@ of the subject. If there are at least two slots in the offsets vector when a partial match is returned, the first slot is set to the offset of the earliest character that was inspected. For convenience, the second offset points to the end of the -subject so that a substring can easily be identified. If there are at least -three slots in the offsets vector, the third slot is set to the offset of the +subject so that a substring can easily be identified. If there are at least +three slots in the offsets vector, the third slot is set to the offset of the character where matching started. .P For the majority of patterns, the contents of the first and third slots will be @@ -83,7 +83,7 @@ inspected while carrying out the match. For example, consider this pattern: This pattern matches "123", but only if it is preceded by "abc". If the subject string is "xyzabc12", the first two offsets after a partial match are for the substring "abc12", because all these characters were inspected. However, the -third offset is set to 6, because that is the offset where matching began. +third offset is set to 6, because that is the offset where matching began. .P What happens when a partial match is identified depends on which of the two partial matching options are set. @@ -311,8 +311,8 @@ processing time is needed. .P \fBNote:\fP If the pattern contains lookbehind assertions, or \eK, or starts with \eb or \eB, the string that is returned for a partial match includes -characters that precede the start of what would be returned for a complete -match, because it contains all the characters that were inspected during the +characters that precede the start of what would be returned for a complete +match, because it contains all the characters that were inspected during the partial match. . . @@ -342,8 +342,8 @@ characters should be retained.) From release 8.33, there is a more accurate way of deciding which characters to retain. Instead of subtracting the length of the longest lookbehind from the earliest inspected character (\fIoffsets[0]\fP), the match start position -(\fIoffsets[2]\fP) should be used, and the next match attempt started at the -\fIoffsets[2]\fP character by setting the \fIstartoffset\fP argument of +(\fIoffsets[2]\fP) should be used, and the next match attempt started at the +\fIoffsets[2]\fP character by setting the \fIstartoffset\fP argument of \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. .P For example, if the pattern "(?<=123)abc" is partially @@ -352,7 +352,7 @@ and 5. This indicates that the matching process that gave a partial match started at offset 5, but the characters "123a" were all inspected. The maximum lookbehind for that pattern is 3, so taking that away from 5 shows that we need only keep "123a", and the next match attempt can be started at offset 3 (that -is, at "a") when further characters have been added. When the match start is +is, at "a") when further characters have been added. When the match start is not the earliest inspected character, \fBpcretest\fP shows it explicitly: .sp re> "(?<=123)abc" diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index 50a1ab0..3971101 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -37,7 +37,7 @@ page. .SH "SPECIAL START-OF-PATTERN ITEMS" .rs .sp -A number of options that can be passed to \fBpcre_compile()\fP can also be set +A number of options that can be passed to \fBpcre_compile()\fP can also be set by special items at the start of a pattern. These are not Perl-compatible, but are provided to make these options accessible to pattern writers who are not able to change the program that processes the pattern. Any number of these @@ -71,7 +71,7 @@ places below. There is also a summary of features in the .\" page. .P -Some applications that allow their users to supply patterns may wish to +Some applications that allow their users to supply patterns may wish to restrict them to non-UTF data for security reasons. If the PCRE_NEVER_UTF option is set at compile time, (*UTF) etc. are not allowed, and their appearance causes an error. @@ -151,23 +151,23 @@ convention. .SS "Setting match and recursion limits" .rs .sp -The caller of \fBpcre_exec()\fP can set a limit on the number of times the -internal \fBmatch()\fP function is called and on the maximum depth of +The caller of \fBpcre_exec()\fP can set a limit on the number of times the +internal \fBmatch()\fP function is called and on the maximum depth of recursive calls. These facilities are provided to catch runaway matches that are provoked by patterns with huge matching trees (a typical example is a pattern with nested unlimited repeats) and to avoid running out of system stack by too much recursion. When one of these limits is reached, \fBpcre_exec()\fP -gives an error return. The limits can also be set by items at the start of the +gives an error return. The limits can also be set by items at the start of the pattern of the form .sp (*LIMIT_MATCH=d) (*LIMIT_RECURSION=d) .sp -where d is any number of decimal digits. However, the value of the setting must -be less than the value set by the caller of \fBpcre_exec()\fP for it to have -any effect. In other words, the pattern writer can lower the limit set by the -programmer, but not raise it. If there is more than one setting of one of these -limits, the lower value is used. +where d is any number of decimal digits. However, the value of the setting must +be less than the value set by the caller of \fBpcre_exec()\fP for it to have +any effect. In other words, the pattern writer can lower the limit set by the +programmer, but not raise it. If there is more than one setting of one of these +limits, the lower value is used. . . .SH "EBCDIC CHARACTER CODES" @@ -846,7 +846,7 @@ Instead, this property is assumed for any code point that is not in the Unicode table. .P Specifying caseless matching does not affect these escape sequences. For -example, \ep{Lu} always matches only upper case letters. This is different from +example, \ep{Lu} always matches only upper case letters. This is different from the behaviour of current versions of Perl. .P Matching characters by Unicode property is not fast, because PCRE has to do a @@ -907,7 +907,7 @@ the "mark" property always have the "extend" grapheme breaking property. As well as the standard Unicode properties described above, PCRE supports four more that make it possible to convert traditional escape sequences such as \ew and \es and POSIX character classes to use Unicode properties. PCRE uses these -non-standard, non-Perl properties internally when PCRE_UCP is set. However, +non-standard, non-Perl properties internally when PCRE_UCP is set. However, they may also be used explicitly. These properties are: .sp Xan Any alphanumeric character @@ -921,13 +921,13 @@ carriage return, and any other character that has the Z (separator) property. Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the same characters as Xan, plus underscore. .P -There is another non-standard property, Xuc, which matches any character that -can be represented by a Universal Character Name in C++ and other programming -languages. These are the characters $, @, ` (grave accent), and all characters -with Unicode code points greater than or equal to U+00A0, except for the -surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH -where H is a hexadecimal digit. Note that the Xuc property does not match these +where H is a hexadecimal digit. Note that the Xuc property does not match these sequences but the characters that they represent.) . . @@ -1410,7 +1410,7 @@ above. There are also the (*UTF8), (*UTF16),(*UTF32), and (*UCP) leading sequences that can be used to set UTF and Unicode property modes; they are equivalent to setting the PCRE_UTF8, PCRE_UTF16, PCRE_UTF32 and the PCRE_UCP options, respectively. The (*UTF) sequence is a generic version that can be -used with any of the libraries. However, the application can set the +used with any of the libraries. However, the application can set the PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences. . . @@ -2020,7 +2020,7 @@ except that it does not cause the current matching position to be changed. Assertion subpatterns are not capturing subpatterns. If such an assertion contains capturing subpatterns within it, these are counted for the purposes of numbering the capturing subpatterns in the whole pattern. However, substring -capturing is carried out only for positive assertions. (Perl sometimes, but not +capturing is carried out only for positive assertions. (Perl sometimes, but not always, does do capturing in negative assertions.) .P For compatibility with Perl, assertion subpatterns may be repeated; though @@ -2691,8 +2691,8 @@ explicit callout may also be set at this position, as in this example: .sp (?(?C9)(?=a)abc|def) .sp -Note that this applies only to assertion conditions, not to other types of -condition. +Note that this applies only to assertion conditions, not to other types of +condition. .P During matching, when PCRE reaches a callout point, the external function is called. It is provided with the number of the callout, the position in the @@ -2718,7 +2718,7 @@ remarks apply to the PCRE features described in this section. .P The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. They are generally of the form -(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving +(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving differently depending on whether or not a name is present. A name is any sequence of characters that does not include a closing parenthesis. The maximum length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit @@ -2729,20 +2729,20 @@ Any number of these verbs may occur in a pattern. Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using one of the traditional matching functions, because these use a backtracking algorithm. With the -exception of (*FAIL), which behaves like a failing negative assertion, the +exception of (*FAIL), which behaves like a failing negative assertion, the backtracking control verbs cause an error if encountered by a DFA matching function. .P -The behaviour of these verbs in +The behaviour of these verbs in .\" HTML .\" -repeated groups, +repeated groups, .\" .\" HTML .\" -assertions, +assertions, .\" -and in +and in .\" HTML .\" subpatterns called as subroutines @@ -2788,7 +2788,7 @@ followed by a name. This verb causes the match to end successfully, skipping the remainder of the pattern. However, when it is inside a subpattern that is called as a subroutine, only that subpattern is ended successfully. Matching then continues -at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the assertion succeeds; in a negative assertion, the assertion fails. .P If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For @@ -2883,7 +2883,7 @@ to ensure that the match is always attempted. The following verbs do nothing when they are encountered. Matching continues with what follows, but if there is no subsequent match, causing a backtrack to the verb, a failure is forced. That is, backtracking cannot pass to the left of -the verb. However, when one of these verbs appears inside an atomic group or an +the verb. However, when one of these verbs appears inside an atomic group or an assertion that is true, its effect is confined to that group, because once the group has been matched, there is never any backtracking into it. In this situation, backtracking can "jump back" to the left of the entire atomic group @@ -2892,13 +2892,13 @@ applies in subroutine calls.) .P These verbs differ in exactly what kind of failure occurs when backtracking reaches them. The behaviour described below is what happens when the verb is -not in a subroutine or an assertion. Subsequent sections cover these special +not in a subroutine or an assertion. Subsequent sections cover these special cases. .sp (*COMMIT) .sp This verb, which may not be followed by a name, causes the whole match to fail -outright if there is a later matching failure that causes backtracking to reach +outright if there is a later matching failure that causes backtracking to reach it. Even if the pattern is unanchored, no further attempts to find a match by advancing the starting point take place. If (*COMMIT) is the only backtracking verb that is encountered, once it has been passed \fBpcre_exec()\fP is @@ -2912,8 +2912,8 @@ dynamic anchor, or "I've started, so I must finish." The name of the most recently passed (*MARK) in the path is passed back when (*COMMIT) forces a match failure. .P -If there is more than one backtracking verb in a pattern, a different one that -follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a match does not always guarantee that a match must be at this starting point. .P Note that (*COMMIT) at the start of a pattern is not the same as an anchor, @@ -2945,7 +2945,7 @@ possessive quantifier, but there are some uses of (*PRUNE) that cannot be expressed in any other way. In an anchored pattern (*PRUNE) has the same effect as (*COMMIT). .P -The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). +The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is remembered for passing back to the caller. However, (*SKIP:NAME) searches only for names set with (*MARK). .sp @@ -2968,19 +2968,19 @@ instead of skipping on to "c". .sp (*SKIP:NAME) .sp -When (*SKIP) has an associated name, its behaviour is modified. When it is +When (*SKIP) has an associated name, its behaviour is modified. When it is triggered, the previous path through the pattern is searched for the most recent (*MARK) that has the same name. If one is found, the "bumpalong" advance is to the subject position that corresponds to that (*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with a matching name is found, the (*SKIP) is ignored. .P -Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME). .sp (*THEN) or (*THEN:NAME) .sp -This verb causes a skip to the next innermost alternative when backtracking +This verb causes a skip to the next innermost alternative when backtracking reaches it. That is, it cancels any further backtracking within the current alternative. Its name comes from the observation that it can be used for a pattern-based if-then-else block: @@ -2994,7 +2994,7 @@ succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no more alternatives, so there is a backtrack to whatever came before the entire group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). .P -The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). +The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is remembered for passing back to the caller. However, (*SKIP:NAME) searches only for names set with (*MARK). .P @@ -3051,8 +3051,8 @@ etc. are complex pattern fragments: .sp (A(*COMMIT)B(*THEN)C|ABD) .sp -If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to -fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes the next alternative (ABD) to be tried. This behaviour is consistent, but is not always the same as Perl's. It means that if two or more backtracking verbs appear in succession, all the the last of them has no effect. Consider this @@ -3060,22 +3060,22 @@ example: .sp ...(*COMMIT)(*PRUNE)... .sp -If there is a matching failure to the right, backtracking onto (*PRUNE) cases -it to be triggered, and its action is taken. There can never be a backtrack -onto (*COMMIT). +If there is a matching failure to the right, backtracking onto (*PRUNE) cases +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). . . .\" HTML .SS "Backtracking verbs in repeated groups" .rs .sp -PCRE differs from Perl in its handling of backtracking verbs in repeated +PCRE differs from Perl in its handling of backtracking verbs in repeated groups. For example, consider: .sp /(a(*COMMIT)b)+ac/ .sp -If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in -the second repeat of the group acts. +If the subject is "abac", Perl matches, but PCRE fails because the (*COMMIT) in +the second repeat of the group acts. . . .\" HTML @@ -3084,8 +3084,8 @@ the second repeat of the group acts. .sp (*FAIL) in an assertion has its normal effect: it forces an immediate backtrack. .P -(*ACCEPT) in a positive assertion causes the assertion to succeed without any -further processing. In a negative assertion, (*ACCEPT) causes the assertion to +(*ACCEPT) in a positive assertion causes the assertion to succeed without any +further processing. In a negative assertion, (*ACCEPT) causes the assertion to fail without any further processing. .P The other backtracking verbs are not treated specially if they appear in a @@ -3095,10 +3095,10 @@ the assertion. .P Negative assertions are, however, different, in order to ensure that changing a positive assertion into a negative assertion changes its result. Backtracking -into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true, -without considering any further alternative branches in the assertion. +into (*COMMIT), (*SKIP), or (*PRUNE) causes a negative assertion to be true, +without considering any further alternative branches in the assertion. Backtracking into (*THEN) causes it to skip to the next enclosing alternative -within the assertion (the normal behaviour), but if the assertion does not have +within the assertion (the normal behaviour), but if the assertion does not have such an alternative, (*THEN) behaves like (*PRUNE). . . @@ -3106,21 +3106,21 @@ such an alternative, (*THEN) behaves like (*PRUNE). .SS "Backtracking verbs in subroutines" .rs .sp -These behaviours occur whether or not the subpattern is called recursively. +These behaviours occur whether or not the subpattern is called recursively. Perl's treatment of subroutines is different in some cases. .P (*FAIL) in a subpattern called as a subroutine has its normal effect: it forces an immediate backtrack. .P -(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to -succeed without any further processing. Matching then continues after the +(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the subroutine call. .P (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause the subroutine match to fail. .P (*THEN) skips to the next alternative in the innermost enclosing group within -the subpattern that has alternatives. If there is no such group within the +the subpattern that has alternatives. If there is no such group within the subpattern, (*THEN) causes the subroutine match to fail. . . diff --git a/doc/pcresyntax.3 b/doc/pcresyntax.3 index c7b92cf..399bbe2 100644 --- a/doc/pcresyntax.3 +++ b/doc/pcresyntax.3 @@ -116,8 +116,8 @@ PCRE_UCP option. Xan Alphanumeric: union of properties L and N Xps POSIX space: property Z or tab, NL, VT, FF, CR Xsp Perl space: property Z or tab, NL, FF, CR - Xuc Univerally-named character: one that can be - represented by a Universal Character Name + Xuc Univerally-named character: one that can be + represented by a Universal Character Name Xwd Perl word: property Xan or underscore . . diff --git a/doc/pcretest.1 b/doc/pcretest.1 index 2fb121d..b71c897 100644 --- a/doc/pcretest.1 +++ b/doc/pcretest.1 @@ -44,11 +44,11 @@ but without much justification. .rs .sp Input to \fBpcretest\fP is processed line by line, either by calling the C -library's \fBfgets()\fP function, or via the \fBlibreadline\fP library (see -below). In Unix-like environments, \fBfgets()\fP treats any bytes other than -newline as data characters. However, in some Windows environments character 26 -(hex 1A) causes an immediate end of file, and no further data is read. For -maximum portability, therefore, it is safest to use only ASCII characters in +library's \fBfgets()\fP function, or via the \fBlibreadline\fP library (see +below). In Unix-like environments, \fBfgets()\fP treats any bytes other than +newline as data characters. However, in some Windows environments character 26 +(hex 1A) causes an immediate end of file, and no further data is read. For +maximum portability, therefore, it is safest to use only ASCII characters in \fBpcretest\fP input files. . . @@ -96,7 +96,7 @@ internal form is output after compilation. .TP 10 \fB-C\fP Output the version number of the PCRE library, and all available information -about the optional features that are included, and then exit with zero exit +about the optional features that are included, and then exit with zero exit code. All other options are ignored. .TP 10 \fB-C\fP \fIoption\fP @@ -107,14 +107,14 @@ following options output the value and set the exit code as indicated: ebcdic-nl the code for LF (= NL) in an EBCDIC environment: 0x15 or 0x25 0 if used in an ASCII environment - exit code is always 0 + exit code is always 0 linksize the configured internal link size (2, 3, or 4) - exit code is set to the link size + exit code is set to the link size newline the default newline setting: CR, LF, CRLF, ANYCRLF, or ANY - exit code is always 0 + exit code is always 0 .sp -The following options output 1 for true or 0 for false, and set the exit code +The following options output 1 for true or 0 for false, and set the exit code to the same value: .sp ebcdic compiled for an EBCDIC environment @@ -123,10 +123,10 @@ to the same value: pcre32 the 32-bit library was built pcre8 the 8-bit library was built ucp Unicode property support is available - utf UTF-8 and/or UTF-16 and/or UTF-32 support + utf UTF-8 and/or UTF-16 and/or UTF-32 support is available .sp -If an unknown option is given, an error message is output; the exit code is 0. +If an unknown option is given, an error message is output; the exit code is 0. .TP 10 \fB-d\fP Behave as if each pattern has the \fB/D\fP (debug) modifier; the internal @@ -298,7 +298,7 @@ fall into several groups that are described in detail in the following sections. .sp \fB/8\fP set UTF mode - \fB/9\fP set PCRE_NEVER_UTF (locks out UTF mode) + \fB/9\fP set PCRE_NEVER_UTF (locks out UTF mode) \fB/?\fP disable UTF validity check \fB/+\fP show remainder of subject after match \fB/=\fP show all captures (not just those that are set) diff --git a/doc/pcreunicode.3 b/doc/pcreunicode.3 index 917ea5b..cb5e526 100644 --- a/doc/pcreunicode.3 +++ b/doc/pcreunicode.3 @@ -84,8 +84,8 @@ place. From release 7.3 of PCRE, the check is according the rules of RFC 3629, which are themselves derived from the Unicode specification. Earlier releases of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0 -to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called -"non-character" code points are no longer excluded because Unicode corrigendum +to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called +"non-character" code points are no longer excluded because Unicode corrigendum #9 makes it clear that they should not be.) .P Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, diff --git a/pcre_string_utils.c b/pcre_string_utils.c index 3ad3825..10b53d5 100644 --- a/pcre_string_utils.c +++ b/pcre_string_utils.c @@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. */ -/* This module contains internal functions for comparing and finding the length +/* This module contains internal functions for comparing and finding the length of strings for different data item sizes. */ diff --git a/pcre_xclass.c b/pcre_xclass.c index ddc2844..d777acb 100644 --- a/pcre_xclass.c +++ b/pcre_xclass.c @@ -179,20 +179,20 @@ while ((t = *data++) != XCL_END) == (t == XCL_PROP)) return !negated; break; - + case PT_UCNC: - if (c < 0xa0) - { - if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + if (c < 0xa0) + { + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP)) return !negated; - } - else - { - if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP)) + } + else + { + if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP)) return !negated; - } - break; + } + break; /* This should never occur, but compilers may mutter if there is no default. */ diff --git a/pcregrep.c b/pcregrep.c index 1d20733..6402348 100644 --- a/pcregrep.c +++ b/pcregrep.c @@ -1378,7 +1378,7 @@ to find all possible matches. Arguments: matchptr the start of the subject length the length of the subject to match - options options for pcre_exec + options options for pcre_exec startoffset where to start matching offsets the offets vector to fill in mrc address of where to put the result of pcre_exec() @@ -1389,7 +1389,7 @@ Returns: TRUE if there was a match */ static BOOL -match_patterns(char *matchptr, size_t length, unsigned int options, +match_patterns(char *matchptr, size_t length, unsigned int options, int startoffset, int *offsets, int *mrc) { int i; @@ -1540,7 +1540,7 @@ while (ptr < endptr) int endlinelength; int mrc = 0; int startoffset = 0; - unsigned int options = 0; + unsigned int options = 0; BOOL match; char *matchptr = ptr; char *t = ptr; @@ -1630,8 +1630,8 @@ while (ptr < endptr) /* Run through all the patterns until one matches or there is an error other than NOMATCH. This code is in a subroutine so that it can be re-used for - finding subsequent matches when colouring matched lines. After finding one - match, set PCRE_NOTEMPTY to disable any further matches of null strings in + finding subsequent matches when colouring matched lines. After finding one + match, set PCRE_NOTEMPTY to disable any further matches of null strings in this line. */ match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc); diff --git a/pcretest.c b/pcretest.c index 20dc0f1..57ee041 100644 --- a/pcretest.c +++ b/pcretest.c @@ -4412,7 +4412,7 @@ while (!done) #ifndef NOUTF /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create invalid input to pcre_exec, you must use \x?? or \x{} sequences. */ - + if (use_utf) { pcre_uint8 *q; @@ -4430,7 +4430,7 @@ while (!done) #ifdef SUPPORT_VALGRIND /* Mark the dbuffer as addressable but undefined again. */ - + if (dbuffer != NULL) { VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE); @@ -4439,7 +4439,7 @@ while (!done) /* Allocate a buffer to hold the data line; len+1 is an upper bound on the number of pcre_uchar units that will be needed. */ - + while (dbuffer == NULL || (size_t)len >= dbuffer_size) { dbuffer_size *= 2; -- cgit v1.2.1

pcre_assign_jit_stack   Assign stack for JIT matching
pcre_maketables   Build character tables in current locale
pcre_pattern_to_host_byte_order   Convert compiled pattern to host byte order if necessary