summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-30 15:55:18 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-30 15:55:18 +0000
commitd0fc62ee8e85255467ef8541458df6e7f4e01cef (patch)
tree7bf7b93cbd42c94a0d7d292e72d0c144e75b3dc6
parent28ed4f58e289c711ec3494e9f512da278506ece9 (diff)
downloadpcre-d0fc62ee8e85255467ef8541458df6e7f4e01cef.tar.gz
Trailing spaces.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@142 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--CMakeLists.txt2
-rw-r--r--ChangeLog10
-rw-r--r--README2
-rwxr-xr-xRunGrepTest2
-rw-r--r--doc/html/pcreapi.html27
-rw-r--r--doc/html/pcrepattern.html7
-rw-r--r--doc/pcre.txt48
-rw-r--r--doc/pcreapi.38
-rw-r--r--pcregrep.c8
-rw-r--r--pcretest.c20
10 files changed, 78 insertions, 56 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5731bd5..49f1715 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -254,7 +254,7 @@ IF(UNIX)
ELSE(UNIX)
IF(WIN32)
ADD_TEST(test1 ${CMAKE_SOURCE_DIR}/RunTest.bat ${CMAKE_SOURCE_DIR})
- ENDIF(WIN32)
+ ENDIF(WIN32)
ENDIF(UNIX)
# Installation
diff --git a/ChangeLog b/ChangeLog
index da3397e..3ba3ac5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -126,14 +126,14 @@ Version 7.1 12-Mar-07
16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub"
in an attempt to make files that differ only in their line terminators
compare equal. This works on Linux.
-
+
17. Under certain error circumstances pcregrep might try to free random memory
as it exited. This is now fixed, thanks to valgrind.
-
+
19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
- "abc\r\n\r\n", it found an unwanted second match after the second \r. This
- was because its rules for how to advance for /g after matching an empty
- string did not allow for this case. They now check for it specially.
+ "abc\r\n\r\n", it found an unwanted second match after the second \r. This
+ was because its rules for how to advance for /g after matching an empty
+ string did not allow for this case. They now check for it specially.
Version 7.0 19-Dec-06
diff --git a/README b/README
index 177a7e6..6044d64 100644
--- a/README
+++ b/README
@@ -489,7 +489,7 @@ is output to say why. If running this test produces instances of the error
in the comparison output, it means that locale is not available on your system,
despite being listed by "locale". This does not mean that PCRE is broken.
-[If you are trying to run this test on Windows, you may be able to get it to
+[If you are trying to run this test on Windows, you may be able to get it to
work by changing "fr_FR" to "french" everywhere it occurs.]
The fourth test checks the UTF-8 support. It is not run automatically unless
diff --git a/RunGrepTest b/RunGrepTest
index 131ceeb..5f73798 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -33,7 +33,7 @@ done
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
srcdir=.
-fi
+fi
# Check for the availability of UTF-8 support
diff --git a/doc/html/pcreapi.html b/doc/html/pcreapi.html
index f0eb67e..ce393a8 100644
--- a/doc/html/pcreapi.html
+++ b/doc/html/pcreapi.html
@@ -737,20 +737,27 @@ bytes is created.
<a name="localesupport"></a></P>
<br><a name="SEC10" href="#TOC1">LOCALE SUPPORT</a><br>
<P>
-PCRE handles caseless matching, and determines whether characters are letters
+PCRE handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character
value. When running in UTF-8 mode, this applies only to characters with codes
less than 128. Higher-valued codes never match escapes such as \w or \d, but
can be tested with \p if PCRE is built with Unicode character property
-support. The use of locales with Unicode is discouraged.
+support. The use of locales with Unicode is discouraged. If you are handling
+characters with codes greater than 128, you should either use UTF-8 and
+Unicode, or use locales, but not try to mix the two.
</P>
<P>
-An internal set of tables is created in the default C locale when PCRE is
-built. This is used when the final argument of <b>pcre_compile()</b> is NULL,
-and is sufficient for many applications. An alternative set of tables can,
-however, be supplied. These may be created in a different locale from the
-default. As more and more applications change to using Unicode, the need for
-this locale support is expected to die away.
+PCRE contains an internal set of tables that are used when the final argument
+of <b>pcre_compile()</b> is NULL. These are sufficient for many applications.
+Normally, the internal tables recognize only ASCII characters. However, when
+PCRE is built, it is possible to cause the internal tables to be rebuilt in the
+default "C" locale of the local system, which may cause them to be different.
+</P>
+<P>
+The internal tables can always be overridden by tables supplied by the
+application that calls PCRE. These may be created in a different locale from
+the default. As more and more applications change to using Unicode, the need
+for this locale support is expected to die away.
</P>
<P>
External tables are built by calling the <b>pcre_maketables()</b> function,
@@ -764,6 +771,10 @@ the following code could be used:
tables = pcre_maketables();
re = pcre_compile(..., tables);
</pre>
+The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
+are using Windows, the name for the French locale is "french".
+</P>
+<P>
When <b>pcre_maketables()</b> runs, the tables are built in memory that is
obtained via <b>pcre_malloc</b>. It is the caller's responsibility to ensure
that the memory containing the tables remains available for as long as it is
diff --git a/doc/html/pcrepattern.html b/doc/html/pcrepattern.html
index 6846df0..6cde7ad 100644
--- a/doc/html/pcrepattern.html
+++ b/doc/html/pcrepattern.html
@@ -297,8 +297,9 @@ place (see
<a href="pcreapi.html#localesupport">"Locale support"</a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a>
-page). For example, in the "fr_FR" (French) locale, some character codes
-greater than 128 are used for accented letters, and these are matched by \w.
+page). For example, in a French locale such as "fr_FR" in Unix-like systems,
+or "french" in Windows, some character codes greater than 128 are used for
+accented letters, and these are matched by \w.
</P>
<P>
In UTF-8 mode, characters with values greater than 128 never match \d, \s, or
@@ -756,7 +757,7 @@ example [\x{100}-\x{2ff}].
If a range that includes letters is used when caseless matching is set, it
matches the letters in either case. For example, [W-c] is equivalent to
[][\\^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if character
-tables for the "fr_FR" locale are in use, [\xc8-\xcb] matches accented E
+tables for a French locale are in use, [\xc8-\xcb] matches accented E
characters in both cases. In UTF-8 mode, PCRE supports the concept of case for
characters with values greater than 128 only when it is compiled with Unicode
property support.
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 40b09a6..f554049 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -1323,31 +1323,41 @@ STUDYING A PATTERN
LOCALE SUPPORT
PCRE handles caseless matching, and determines whether characters are
- letters digits, or whatever, by reference to a set of tables, indexed
+ letters, digits, or whatever, by reference to a set of tables, indexed
by character value. When running in UTF-8 mode, this applies only to
characters with codes less than 128. Higher-valued codes never match
escapes such as \w or \d, but can be tested with \p if PCRE is built
with Unicode character property support. The use of locales with Uni-
- code is discouraged.
-
- An internal set of tables is created in the default C locale when PCRE
- is built. This is used when the final argument of pcre_compile() is
- NULL, and is sufficient for many applications. An alternative set of
- tables can, however, be supplied. These may be created in a different
- locale from the default. As more and more applications change to using
- Unicode, the need for this locale support is expected to die away.
-
- External tables are built by calling the pcre_maketables() function,
- which has no arguments, in the relevant locale. The result can then be
- passed to pcre_compile() or pcre_exec() as often as necessary. For
- example, to build and use tables that are appropriate for the French
- locale (where accented characters with values greater than 128 are
+ code is discouraged. If you are handling characters with codes greater
+ than 128, you should either use UTF-8 and Unicode, or use locales, but
+ not try to mix the two.
+
+ PCRE contains an internal set of tables that are used when the final
+ argument of pcre_compile() is NULL. These are sufficient for many
+ applications. Normally, the internal tables recognize only ASCII char-
+ acters. However, when PCRE is built, it is possible to cause the inter-
+ nal tables to be rebuilt in the default "C" locale of the local system,
+ which may cause them to be different.
+
+ The internal tables can always be overridden by tables supplied by the
+ application that calls PCRE. These may be created in a different locale
+ from the default. As more and more applications change to using Uni-
+ code, the need for this locale support is expected to die away.
+
+ External tables are built by calling the pcre_maketables() function,
+ which has no arguments, in the relevant locale. The result can then be
+ passed to pcre_compile() or pcre_exec() as often as necessary. For
+ example, to build and use tables that are appropriate for the French
+ locale (where accented characters with values greater than 128 are
treated as letters), the following code could be used:
setlocale(LC_CTYPE, "fr_FR");
tables = pcre_maketables();
re = pcre_compile(..., tables);
+ The locale name "fr_FR" is used on Linux and other Unix-like systems;
+ if you are using Windows, the name for the French locale is "french".
+
When pcre_maketables() runs, the tables are built in memory that is
obtained via pcre_malloc. It is the caller's responsibility to ensure
that the memory containing the tables remains available for as long as
@@ -2918,9 +2928,9 @@ BACKSLASH
is a letter or digit. The definition of letters and digits is con-
trolled by PCRE's low-valued character tables, and may vary if locale-
specific matching is taking place (see "Locale support" in the pcreapi
- page). For example, in the "fr_FR" (French) locale, some character
- codes greater than 128 are used for accented letters, and these are
- matched by \w.
+ page). For example, in a French locale such as "fr_FR" in Unix-like
+ systems, or "french" in Windows, some character codes greater than 128
+ are used for accented letters, and these are matched by \w.
In UTF-8 mode, characters with values greater than 128 never match \d,
\s, or \w, and always match \D, \S, and \W. This is true even when Uni-
@@ -3289,7 +3299,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES
If a range that includes letters is used when caseless matching is set,
it matches the letters in either case. For example, [W-c] is equivalent
to [][\\^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if
- character tables for the "fr_FR" locale are in use, [\xc8-\xcb] matches
+ character tables for a French locale are in use, [\xc8-\xcb] matches
accented E characters in both cases. In UTF-8 mode, PCRE supports the
concept of case for characters with values greater than 128 only when
it is compiled with Unicode property support.
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 02c1016..1163fc7 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -734,13 +734,13 @@ digits, or whatever, by reference to a set of tables, indexed by character
value. When running in UTF-8 mode, this applies only to characters with codes
less than 128. Higher-valued codes never match escapes such as \ew or \ed, but
can be tested with \ep if PCRE is built with Unicode character property
-support. The use of locales with Unicode is discouraged. If you are handling
-characters with codes greater than 128, you should either use UTF-8 and
+support. The use of locales with Unicode is discouraged. If you are handling
+characters with codes greater than 128, you should either use UTF-8 and
Unicode, or use locales, but not try to mix the two.
.P
PCRE contains an internal set of tables that are used when the final argument
of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
-Normally, the internal tables recognize only ASCII characters. However, when
+Normally, the internal tables recognize only ASCII characters. However, when
PCRE is built, it is possible to cause the internal tables to be rebuilt in the
default "C" locale of the local system, which may cause them to be different.
.P
@@ -760,7 +760,7 @@ the following code could be used:
tables = pcre_maketables();
re = pcre_compile(..., tables);
.sp
-The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
+The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french".
.P
When \fBpcre_maketables()\fP runs, the tables are built in memory that is
diff --git a/pcregrep.c b/pcregrep.c
index 79596f5..837bc76 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -1408,11 +1408,11 @@ sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
suffix[process_options]);
pattern_list[pattern_count] =
pcre_compile(buffer, options, &error, &errptr, pcretables);
-if (pattern_list[pattern_count] != NULL)
+if (pattern_list[pattern_count] != NULL)
{
- pattern_count++;
+ pattern_count++;
return TRUE;
- }
+ }
/* Handle compile errors */
@@ -1947,7 +1947,7 @@ for (j = 0; j < pattern_count; j++)
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
goto EXIT2;
}
- hint_count++;
+ hint_count++;
}
/* If there are include or exclude patterns, compile them. */
diff --git a/pcretest.c b/pcretest.c
index 405e9cf..0f3017d 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1972,7 +1972,7 @@ while (!done)
for (;; gmatched++) /* Loop for /g or /G */
{
- int gany_fudge;
+ int gany_fudge;
if (timeitm > 0)
{
register int i;
@@ -2256,17 +2256,17 @@ while (!done)
what Perl's /g options does. This turns out to be rather cunning. First
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
same point. If this fails (picked up above) we advance to the next
- character.
-
+ character.
+
Yet more complication arises in the case when the newline option is
- "any" and a pattern in multiline mode has to match at the start of a
- line. If a previous match was at the end of a line, and advance of one
- character just passes the \r, whereas we should prefer the longer newline
+ "any" and a pattern in multiline mode has to match at the start of a
+ line. If a previous match was at the end of a line, and advance of one
+ character just passes the \r, whereas we should prefer the longer newline
sequence, as does the code in pcre_exec(). So we fudge it. */
g_notempty = 0;
- gany_fudge = 0;
-
+ gany_fudge = 0;
+
if (use_offsets[0] == use_offsets[1])
{
if (use_offsets[0] == len) break;
@@ -2274,9 +2274,9 @@ while (!done)
if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&
(((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
use_offsets[0] < len - 1 &&
- bptr[use_offsets[0]] == '\r' &&
+ bptr[use_offsets[0]] == '\r' &&
bptr[use_offsets[0]+1] == '\n')
- gany_fudge = 1;
+ gany_fudge = 1;
}
/* For /g, update the start offset, leaving the rest alone */