diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-30 15:55:18 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-30 15:55:18 +0000 |
commit | d0fc62ee8e85255467ef8541458df6e7f4e01cef (patch) | |
tree | 7bf7b93cbd42c94a0d7d292e72d0c144e75b3dc6 | |
parent | 28ed4f58e289c711ec3494e9f512da278506ece9 (diff) | |
download | pcre-d0fc62ee8e85255467ef8541458df6e7f4e01cef.tar.gz |
Trailing spaces.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@142 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | RunGrepTest | 2 | ||||
-rw-r--r-- | doc/html/pcreapi.html | 27 | ||||
-rw-r--r-- | doc/html/pcrepattern.html | 7 | ||||
-rw-r--r-- | doc/pcre.txt | 48 | ||||
-rw-r--r-- | doc/pcreapi.3 | 8 | ||||
-rw-r--r-- | pcregrep.c | 8 | ||||
-rw-r--r-- | pcretest.c | 20 |
10 files changed, 78 insertions, 56 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5731bd5..49f1715 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -254,7 +254,7 @@ IF(UNIX) ELSE(UNIX) IF(WIN32) ADD_TEST(test1 ${CMAKE_SOURCE_DIR}/RunTest.bat ${CMAKE_SOURCE_DIR}) - ENDIF(WIN32) + ENDIF(WIN32) ENDIF(UNIX) # Installation @@ -126,14 +126,14 @@ Version 7.1 12-Mar-07 16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub" in an attempt to make files that differ only in their line terminators compare equal. This works on Linux. - + 17. Under certain error circumstances pcregrep might try to free random memory as it exited. This is now fixed, thanks to valgrind. - + 19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string - "abc\r\n\r\n", it found an unwanted second match after the second \r. This - was because its rules for how to advance for /g after matching an empty - string did not allow for this case. They now check for it specially. + "abc\r\n\r\n", it found an unwanted second match after the second \r. This + was because its rules for how to advance for /g after matching an empty + string did not allow for this case. They now check for it specially. Version 7.0 19-Dec-06 @@ -489,7 +489,7 @@ is output to say why. If running this test produces instances of the error in the comparison output, it means that locale is not available on your system, despite being listed by "locale". This does not mean that PCRE is broken. -[If you are trying to run this test on Windows, you may be able to get it to +[If you are trying to run this test on Windows, you may be able to get it to work by changing "fr_FR" to "french" everywhere it occurs.] The fourth test checks the UTF-8 support. It is not run automatically unless diff --git a/RunGrepTest b/RunGrepTest index 131ceeb..5f73798 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -33,7 +33,7 @@ done if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then srcdir=. -fi +fi # Check for the availability of UTF-8 support diff --git a/doc/html/pcreapi.html b/doc/html/pcreapi.html index f0eb67e..ce393a8 100644 --- a/doc/html/pcreapi.html +++ b/doc/html/pcreapi.html @@ -737,20 +737,27 @@ bytes is created. <a name="localesupport"></a></P> <br><a name="SEC10" href="#TOC1">LOCALE SUPPORT</a><br> <P> -PCRE handles caseless matching, and determines whether characters are letters +PCRE handles caseless matching, and determines whether characters are letters, digits, or whatever, by reference to a set of tables, indexed by character value. When running in UTF-8 mode, this applies only to characters with codes less than 128. Higher-valued codes never match escapes such as \w or \d, but can be tested with \p if PCRE is built with Unicode character property -support. The use of locales with Unicode is discouraged. +support. The use of locales with Unicode is discouraged. If you are handling +characters with codes greater than 128, you should either use UTF-8 and +Unicode, or use locales, but not try to mix the two. </P> <P> -An internal set of tables is created in the default C locale when PCRE is -built. This is used when the final argument of <b>pcre_compile()</b> is NULL, -and is sufficient for many applications. An alternative set of tables can, -however, be supplied. These may be created in a different locale from the -default. As more and more applications change to using Unicode, the need for -this locale support is expected to die away. +PCRE contains an internal set of tables that are used when the final argument +of <b>pcre_compile()</b> is NULL. These are sufficient for many applications. +Normally, the internal tables recognize only ASCII characters. However, when +PCRE is built, it is possible to cause the internal tables to be rebuilt in the +default "C" locale of the local system, which may cause them to be different. +</P> +<P> +The internal tables can always be overridden by tables supplied by the +application that calls PCRE. These may be created in a different locale from +the default. As more and more applications change to using Unicode, the need +for this locale support is expected to die away. </P> <P> External tables are built by calling the <b>pcre_maketables()</b> function, @@ -764,6 +771,10 @@ the following code could be used: tables = pcre_maketables(); re = pcre_compile(..., tables); </pre> +The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +</P> +<P> When <b>pcre_maketables()</b> runs, the tables are built in memory that is obtained via <b>pcre_malloc</b>. It is the caller's responsibility to ensure that the memory containing the tables remains available for as long as it is diff --git a/doc/html/pcrepattern.html b/doc/html/pcrepattern.html index 6846df0..6cde7ad 100644 --- a/doc/html/pcrepattern.html +++ b/doc/html/pcrepattern.html @@ -297,8 +297,9 @@ place (see <a href="pcreapi.html#localesupport">"Locale support"</a> in the <a href="pcreapi.html"><b>pcreapi</b></a> -page). For example, in the "fr_FR" (French) locale, some character codes -greater than 128 are used for accented letters, and these are matched by \w. +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 128 are used for +accented letters, and these are matched by \w. </P> <P> In UTF-8 mode, characters with values greater than 128 never match \d, \s, or @@ -756,7 +757,7 @@ example [\x{100}-\x{2ff}]. If a range that includes letters is used when caseless matching is set, it matches the letters in either case. For example, [W-c] is equivalent to [][\\^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if character -tables for the "fr_FR" locale are in use, [\xc8-\xcb] matches accented E +tables for a French locale are in use, [\xc8-\xcb] matches accented E characters in both cases. In UTF-8 mode, PCRE supports the concept of case for characters with values greater than 128 only when it is compiled with Unicode property support. diff --git a/doc/pcre.txt b/doc/pcre.txt index 40b09a6..f554049 100644 --- a/doc/pcre.txt +++ b/doc/pcre.txt @@ -1323,31 +1323,41 @@ STUDYING A PATTERN LOCALE SUPPORT PCRE handles caseless matching, and determines whether characters are - letters digits, or whatever, by reference to a set of tables, indexed + letters, digits, or whatever, by reference to a set of tables, indexed by character value. When running in UTF-8 mode, this applies only to characters with codes less than 128. Higher-valued codes never match escapes such as \w or \d, but can be tested with \p if PCRE is built with Unicode character property support. The use of locales with Uni- - code is discouraged. - - An internal set of tables is created in the default C locale when PCRE - is built. This is used when the final argument of pcre_compile() is - NULL, and is sufficient for many applications. An alternative set of - tables can, however, be supplied. These may be created in a different - locale from the default. As more and more applications change to using - Unicode, the need for this locale support is expected to die away. - - External tables are built by calling the pcre_maketables() function, - which has no arguments, in the relevant locale. The result can then be - passed to pcre_compile() or pcre_exec() as often as necessary. For - example, to build and use tables that are appropriate for the French - locale (where accented characters with values greater than 128 are + code is discouraged. If you are handling characters with codes greater + than 128, you should either use UTF-8 and Unicode, or use locales, but + not try to mix the two. + + PCRE contains an internal set of tables that are used when the final + argument of pcre_compile() is NULL. These are sufficient for many + applications. Normally, the internal tables recognize only ASCII char- + acters. However, when PCRE is built, it is possible to cause the inter- + nal tables to be rebuilt in the default "C" locale of the local system, + which may cause them to be different. + + The internal tables can always be overridden by tables supplied by the + application that calls PCRE. These may be created in a different locale + from the default. As more and more applications change to using Uni- + code, the need for this locale support is expected to die away. + + External tables are built by calling the pcre_maketables() function, + which has no arguments, in the relevant locale. The result can then be + passed to pcre_compile() or pcre_exec() as often as necessary. For + example, to build and use tables that are appropriate for the French + locale (where accented characters with values greater than 128 are treated as letters), the following code could be used: setlocale(LC_CTYPE, "fr_FR"); tables = pcre_maketables(); re = pcre_compile(..., tables); + The locale name "fr_FR" is used on Linux and other Unix-like systems; + if you are using Windows, the name for the French locale is "french". + When pcre_maketables() runs, the tables are built in memory that is obtained via pcre_malloc. It is the caller's responsibility to ensure that the memory containing the tables remains available for as long as @@ -2918,9 +2928,9 @@ BACKSLASH is a letter or digit. The definition of letters and digits is con- trolled by PCRE's low-valued character tables, and may vary if locale- specific matching is taking place (see "Locale support" in the pcreapi - page). For example, in the "fr_FR" (French) locale, some character - codes greater than 128 are used for accented letters, and these are - matched by \w. + page). For example, in a French locale such as "fr_FR" in Unix-like + systems, or "french" in Windows, some character codes greater than 128 + are used for accented letters, and these are matched by \w. In UTF-8 mode, characters with values greater than 128 never match \d, \s, or \w, and always match \D, \S, and \W. This is true even when Uni- @@ -3289,7 +3299,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES If a range that includes letters is used when caseless matching is set, it matches the letters in either case. For example, [W-c] is equivalent to [][\\^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if - character tables for the "fr_FR" locale are in use, [\xc8-\xcb] matches + character tables for a French locale are in use, [\xc8-\xcb] matches accented E characters in both cases. In UTF-8 mode, PCRE supports the concept of case for characters with values greater than 128 only when it is compiled with Unicode property support. diff --git a/doc/pcreapi.3 b/doc/pcreapi.3 index 02c1016..1163fc7 100644 --- a/doc/pcreapi.3 +++ b/doc/pcreapi.3 @@ -734,13 +734,13 @@ digits, or whatever, by reference to a set of tables, indexed by character value. When running in UTF-8 mode, this applies only to characters with codes less than 128. Higher-valued codes never match escapes such as \ew or \ed, but can be tested with \ep if PCRE is built with Unicode character property -support. The use of locales with Unicode is discouraged. If you are handling -characters with codes greater than 128, you should either use UTF-8 and +support. The use of locales with Unicode is discouraged. If you are handling +characters with codes greater than 128, you should either use UTF-8 and Unicode, or use locales, but not try to mix the two. .P PCRE contains an internal set of tables that are used when the final argument of \fBpcre_compile()\fP is NULL. These are sufficient for many applications. -Normally, the internal tables recognize only ASCII characters. However, when +Normally, the internal tables recognize only ASCII characters. However, when PCRE is built, it is possible to cause the internal tables to be rebuilt in the default "C" locale of the local system, which may cause them to be different. .P @@ -760,7 +760,7 @@ the following code could be used: tables = pcre_maketables(); re = pcre_compile(..., tables); .sp -The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +The locale name "fr_FR" is used on Linux and other Unix-like systems; if you are using Windows, the name for the French locale is "french". .P When \fBpcre_maketables()\fP runs, the tables are built in memory that is @@ -1408,11 +1408,11 @@ sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, suffix[process_options]); pattern_list[pattern_count] = pcre_compile(buffer, options, &error, &errptr, pcretables); -if (pattern_list[pattern_count] != NULL) +if (pattern_list[pattern_count] != NULL) { - pattern_count++; + pattern_count++; return TRUE; - } + } /* Handle compile errors */ @@ -1947,7 +1947,7 @@ for (j = 0; j < pattern_count; j++) fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); goto EXIT2; } - hint_count++; + hint_count++; } /* If there are include or exclude patterns, compile them. */ @@ -1972,7 +1972,7 @@ while (!done) for (;; gmatched++) /* Loop for /g or /G */ { - int gany_fudge; + int gany_fudge; if (timeitm > 0) { register int i; @@ -2256,17 +2256,17 @@ while (!done) what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the same point. If this fails (picked up above) we advance to the next - character. - + character. + Yet more complication arises in the case when the newline option is - "any" and a pattern in multiline mode has to match at the start of a - line. If a previous match was at the end of a line, and advance of one - character just passes the \r, whereas we should prefer the longer newline + "any" and a pattern in multiline mode has to match at the start of a + line. If a previous match was at the end of a line, and advance of one + character just passes the \r, whereas we should prefer the longer newline sequence, as does the code in pcre_exec(). So we fudge it. */ g_notempty = 0; - gany_fudge = 0; - + gany_fudge = 0; + if (use_offsets[0] == use_offsets[1]) { if (use_offsets[0] == len) break; @@ -2274,9 +2274,9 @@ while (!done) if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 && (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY && use_offsets[0] < len - 1 && - bptr[use_offsets[0]] == '\r' && + bptr[use_offsets[0]] == '\r' && bptr[use_offsets[0]+1] == '\n') - gany_fudge = 1; + gany_fudge = 1; } /* For /g, update the start offset, leaving the rest alone */ |