summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:13 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:13 +0000
commit2550303b1f255c525d802f94d9c4411a0ccc630f (patch)
treeec4504b6ce58f106752b7306810fbc35b7263be2
parent9413dc1ec018ad717d506487968b1f4c2b778e3f (diff)
downloadpcre-2550303b1f255c525d802f94d9c4411a0ccc630f.tar.gz
Load pcre-6.4 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@85 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog22
-rw-r--r--Makefile.in9
-rw-r--r--NON-UNIX-USE2
-rw-r--r--config.h.in (renamed from config.in)2
-rwxr-xr-xconfigure12
-rw-r--r--configure.in10
-rw-r--r--pcre.h.in (renamed from pcre.in)0
-rw-r--r--pcre_compile.c18
-rw-r--r--pcre_config.c2
-rw-r--r--pcre_dfa_exec.c40
-rw-r--r--pcre_exec.c34
-rw-r--r--pcre_fullinfo.c2
-rw-r--r--pcre_info.c2
-rw-r--r--pcre_internal.h19
-rw-r--r--pcre_ord2utf8.c2
-rw-r--r--pcre_printint.src (renamed from pcre_printint.c)15
-rw-r--r--pcre_refcount.c2
-rw-r--r--pcre_study.c2
-rw-r--r--pcre_tables.c7
-rw-r--r--pcre_try_flipped.c2
-rw-r--r--pcre_ucp_findchar.c114
-rw-r--r--pcre_valid_utf8.c2
-rw-r--r--pcre_version.c2
-rw-r--r--pcre_xclass.c4
-rw-r--r--pcregrep.c8
-rw-r--r--pcreposix.c8
-rw-r--r--pcretest.c77
-rw-r--r--testdata/grepoutput4
-rw-r--r--testdata/testoutput12
-rw-r--r--testdata/testoutput22
-rw-r--r--testdata/testoutput32
-rw-r--r--testdata/testoutput42
-rw-r--r--testdata/testoutput52
-rw-r--r--testdata/testoutput62
-rw-r--r--testdata/testoutput72
-rw-r--r--testdata/testoutput82
-rw-r--r--testdata/testoutput92
-rw-r--r--ucp.h2
-rw-r--r--ucp_findchar.c160
39 files changed, 316 insertions, 287 deletions
diff --git a/ChangeLog b/ChangeLog
index a3c95f4..a04153a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,28 @@
ChangeLog for PCRE
------------------
+Version 6.4 05-Sep-05
+---------------------
+
+ 1. Change 6.0/10/(l) to pcregrep introduced a bug that caused separator lines
+ "--" to be printed when multiple files were scanned, even when none of the
+ -A, -B, or -C options were used. This is not compatible with Gnu grep, so I
+ consider it to be a bug, and have restored the previous behaviour.
+
+ 2. A couple of code tidies to get rid of compiler warnings.
+
+ 3. The pcretest program used to cheat by referring to symbols in the library
+ whose names begin with _pcre_. These are internal symbols that are not
+ really supposed to be visible externally, and in some environments it is
+ possible to suppress them. The cheating is now confined to including
+ certain files from the library's source, which is a bit cleaner.
+
+ 4. Renamed pcre.in as pcre.h.in to go with pcrecpp.h.in; it also makes the
+ file's purpose clearer.
+
+ 5. Reorganized pcre_ucp_findchar().
+
+
Version 6.3 15-Aug-05
---------------------
diff --git a/Makefile.in b/Makefile.in
index 8a2d004..fbb3199 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -105,7 +105,7 @@ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) -c $(CFLAGS) -I. -I$(top_srcdir) $(N
LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) -c $(CXXFLAGS) -I. -I$(top_srcdir) $(NEWLINE) $(LINK_SIZE) $(MATCH_LIMIT) $(NO_RECURSE) $(EBCDIC)
@ON_WINDOWS@LINK = $(CC) $(LDFLAGS) -I. -I$(top_srcdir) -L.libs
@NOT_ON_WINDOWS@LINK = $(LIBTOOL) --mode=link $(CC) $(CFLAGS) $(LDFLAGS) -I. -I$(top_srcdir)
-LINKLIB = $(LIBTOOL) --mode=link $(CC) -export-symbols-regex '^[^_]|__?pcre_.*utf8|__?pcre_printint' $(LDFLAGS) -I. -I$(top_srcdir)
+LINKLIB = $(LIBTOOL) --mode=link $(CC) -export-symbols-regex '^[^_]' $(LDFLAGS) -I. -I$(top_srcdir)
LINK_FOR_BUILD = $(LIBTOOL) --mode=link $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) -I. -I$(top_srcdir)
@ON_WINDOWS@CXXLINK = $(CXX) $(LDFLAGS) -I. -I$(top_srcdir) -L.libs
@NOT_ON_WINDOWS@CXXLINK = $(LIBTOOL) --mode=link $(CXX) $(LDFLAGS) -I. -I$(top_srcdir)
@@ -131,7 +131,6 @@ OBJ = pcre_chartables.@OBJEXT@ \
pcre_info.@OBJEXT@ \
pcre_maketables.@OBJEXT@ \
pcre_ord2utf8.@OBJEXT@ \
- pcre_printint.@OBJEXT@ \
pcre_refcount.@OBJEXT@ \
pcre_study.@OBJEXT@ \
pcre_tables.@OBJEXT@ \
@@ -153,7 +152,6 @@ LOBJ = pcre_chartables.lo \
pcre_info.lo \
pcre_maketables.lo \
pcre_ord2utf8.lo \
- pcre_printint.lo \
pcre_refcount.lo \
pcre_study.lo \
pcre_tables.lo \
@@ -269,11 +267,6 @@ pcre_ord2utf8.@OBJEXT@: Makefile config.h pcre.h \
@$(LTCOMPILE) $(UTF8) $(UCP) $(POSIX_MALLOC_THRESHOLD) \
$(top_srcdir)/pcre_ord2utf8.c
-pcre_printint.@OBJEXT@: Makefile config.h pcre.h \
- $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_printint.c
- @$(LTCOMPILE) $(UTF8) $(UCP) $(POSIX_MALLOC_THRESHOLD) \
- $(top_srcdir)/pcre_printint.c
-
pcre_refcount.@OBJEXT@: Makefile config.h pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_refcount.c
@$(LTCOMPILE) $(UTF8) $(UCP) $(POSIX_MALLOC_THRESHOLD) \
diff --git a/NON-UNIX-USE b/NON-UNIX-USE
index b7fe430..fc02ba1 100644
--- a/NON-UNIX-USE
+++ b/NON-UNIX-USE
@@ -46,7 +46,7 @@ configure.in.
write pcre.h
(3) Compile dftables.c as a stand-alone program, and then run it with
-the single argument "chartables.c". This generates a set of standard
+the single argument "pcre_chartables.c". This generates a set of standard
character tables and writes them to that file.
rem Mark Tetrode's commands
diff --git a/config.in b/config.h.in
index fc17ddd..a6d1451 100644
--- a/config.in
+++ b/config.h.in
@@ -23,7 +23,7 @@ this can be done via --enable-ebcdic. */
before the definition of an exported function, define this macro to contain the
relevant magic. It apears at the start of every exported function. */
-#define EXPORT
+#define PCRE_EXPORT
/* Define to empty if the "const" keyword does not work. */
diff --git a/configure b/configure
index d04f8c1..e178aa5 100755
--- a/configure
+++ b/configure
@@ -1501,13 +1501,13 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
- ac_config_headers="$ac_config_headers config.h:config.in"
+ ac_config_headers="$ac_config_headers config.h"
PCRE_MAJOR=6
-PCRE_MINOR=3
-PCRE_DATE=15-Aug-2005
+PCRE_MINOR=4
+PCRE_DATE=05-Sep-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
@@ -20872,7 +20872,7 @@ _ACEOF
fi
- ac_config_files="$ac_config_files Makefile pcre.h:pcre.in pcre-config:pcre-config.in libpcre.pc:libpcre.pc.in pcrecpp.h:pcrecpp.h.in pcre_stringpiece.h:pcre_stringpiece.h.in RunGrepTest:RunGrepTest.in RunTest:RunTest.in"
+ ac_config_files="$ac_config_files Makefile pcre.h:pcre.h.in pcre-config:pcre-config.in libpcre.pc:libpcre.pc.in pcrecpp.h:pcrecpp.h.in pcre_stringpiece.h:pcre_stringpiece.h.in RunGrepTest:RunGrepTest.in RunTest:RunTest.in"
ac_config_commands="$ac_config_commands default"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@@ -21403,7 +21403,7 @@ do
case "$ac_config_target" in
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
- "pcre.h" ) CONFIG_FILES="$CONFIG_FILES pcre.h:pcre.in" ;;
+ "pcre.h" ) CONFIG_FILES="$CONFIG_FILES pcre.h:pcre.h.in" ;;
"pcre-config" ) CONFIG_FILES="$CONFIG_FILES pcre-config:pcre-config.in" ;;
"libpcre.pc" ) CONFIG_FILES="$CONFIG_FILES libpcre.pc:libpcre.pc.in" ;;
"pcrecpp.h" ) CONFIG_FILES="$CONFIG_FILES pcrecpp.h:pcrecpp.h.in" ;;
@@ -21411,7 +21411,7 @@ do
"RunGrepTest" ) CONFIG_FILES="$CONFIG_FILES RunGrepTest:RunGrepTest.in" ;;
"RunTest" ) CONFIG_FILES="$CONFIG_FILES RunTest:RunTest.in" ;;
"default" ) CONFIG_COMMANDS="$CONFIG_COMMANDS default" ;;
- "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h:config.in" ;;
+ "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
*) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
{ (exit 1); exit 1; }; };;
diff --git a/configure.in b/configure.in
index a7ea54d..e011a8a 100644
--- a/configure.in
+++ b/configure.in
@@ -15,10 +15,10 @@ dnl A safety precaution
AC_PREREQ(2.57)
-dnl Arrange to build config.h from config.in. Note that pcre.h is
+dnl Arrange to build config.h from config.h.in. Note that pcre.h is
dnl built differently, as it is just a "substitution" file.
dnl Manual says this macro should come right after AC_INIT.
-AC_CONFIG_HEADER(config.h:config.in)
+AC_CONFIG_HEADER(config.h)
dnl Provide the current PCRE version information. Do not use numbers
dnl with leading zeros for the minor version, as they end up in a C
@@ -27,8 +27,8 @@ dnl digits for minor numbers less than 10. There are unlikely to be
dnl that many releases anyway.
PCRE_MAJOR=6
-PCRE_MINOR=3
-PCRE_DATE=15-Aug-2005
+PCRE_MINOR=4
+PCRE_DATE=05-Sep-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
dnl Default values for miscellaneous macros
@@ -287,4 +287,4 @@ if test "x$enable_shared" = "xno" ; then
fi
dnl This must be last; it determines what files are written as well as config.h
-AC_OUTPUT(Makefile pcre.h:pcre.in pcre-config:pcre-config.in libpcre.pc:libpcre.pc.in pcrecpp.h:pcrecpp.h.in pcre_stringpiece.h:pcre_stringpiece.h.in RunGrepTest:RunGrepTest.in RunTest:RunTest.in,[chmod a+x RunTest RunGrepTest pcre-config])
+AC_OUTPUT(Makefile pcre.h:pcre.h.in pcre-config:pcre-config.in libpcre.pc:libpcre.pc.in pcrecpp.h:pcrecpp.h.in pcre_stringpiece.h:pcre_stringpiece.h.in RunGrepTest:RunGrepTest.in RunTest:RunTest.in,[chmod a+x RunTest RunGrepTest pcre-config])
diff --git a/pcre.in b/pcre.h.in
index be1546c..be1546c 100644
--- a/pcre.in
+++ b/pcre.h.in
diff --git a/pcre_compile.c b/pcre_compile.c
index 2289952..9850399 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -45,6 +45,15 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
+/* When DEBUG is defined, we need the pcre_printint() function, which is also
+used by pcretest. DEBUG is not defined when building a production library. */
+
+#ifdef DEBUG
+#include "pcre_printint.src"
+#endif
+
+
+
/*************************************************
* Code parameters and static tables *
*************************************************/
@@ -3839,7 +3848,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
-EXPORT pcre *
+PCRE_EXPORT pcre *
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@@ -3847,7 +3856,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
-EXPORT pcre *
+PCRE_EXPORT pcre *
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{
@@ -5000,7 +5009,8 @@ if (reqbyte >= 0 &&
re->options |= PCRE_REQCHSET;
}
-/* Print out the compiled data for debugging */
+/* Print out the compiled data if debugging is enabled. This is never the
+case when building a production library. */
#ifdef DEBUG
@@ -5038,7 +5048,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
else printf("Req char = \\x%02x%s\n", ch, caseless);
}
-_pcre_printint(re, stdout);
+pcre_printint(re, stdout);
/* This check is done here in the debugging case so that the code that
was compiled can be seen. */
diff --git a/pcre_config.c b/pcre_config.c
index 04029a9..5538a70 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -58,7 +58,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
-EXPORT int
+PCRE_EXPORT int
pcre_config(int what, void *where)
{
switch (what)
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 7101570..c68f232 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -758,7 +758,7 @@ for (;;)
if (clen > 0)
{
int rqdtype, category;
- category = ucp_findchar(c, &chartype, &othercase);
+ category = _pcre_ucp_findchar(c, &chartype, &othercase);
rqdtype = code[1];
if (rqdtype >= 128)
{
@@ -865,7 +865,7 @@ for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }
if (clen > 0)
{
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[2];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
@@ -878,7 +878,7 @@ for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
- if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)
+ if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -887,7 +887,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
+ if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -911,7 +911,7 @@ for (;;)
ADD_ACTIVE(state_offset + 3, 0);
if (clen > 0)
{
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[2];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
@@ -932,7 +932,7 @@ for (;;)
QS2:
ADD_ACTIVE(state_offset + 2, 0);
- if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)
+ if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -941,7 +941,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
+ if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -958,7 +958,7 @@ for (;;)
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
int rqdtype = code[4];
if ((d == OP_PROP) ==
(rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))
@@ -978,7 +978,7 @@ for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
- if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)
+ if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -987,7 +987,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
+ if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1022,11 +1022,11 @@ for (;;)
/* If we have Unicode property support, we can use it to test the
other case of the character, if there is one. The result of
- ucp_findchar() is < 0 if the char isn't found, and othercase is
+ _pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is
returned as zero if there isn't another case. */
#ifdef SUPPORT_UCP
- if (ucp_findchar(c, &chartype, &othercase) < 0)
+ if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)
#endif
othercase = -1;
@@ -1050,7 +1050,7 @@ for (;;)
to wait for them to pass before continuing. */
case OP_EXTUNI:
- if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)
+ if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1058,7 +1058,7 @@ for (;;)
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
- if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break;
+ if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;
ncount++;
nptr += nclen;
}
@@ -1096,7 +1096,7 @@ for (;;)
if (utf8 && c >= 128)
{
#ifdef SUPPORT_UCP
- if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
+ if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
#endif /* SUPPORT_UCP */
}
else
@@ -1123,7 +1123,7 @@ for (;;)
if (utf8 && c >= 128)
{
#ifdef SUPPORT_UCP
- if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
+ if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
#endif /* SUPPORT_UCP */
}
else
@@ -1150,7 +1150,7 @@ for (;;)
if (utf8 && c >= 128)
{
#ifdef SUPPORT_UCP
- if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
+ if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;
#endif /* SUPPORT_UCP */
}
else
@@ -1181,7 +1181,7 @@ for (;;)
if (utf8 && c >= 128)
{
#ifdef SUPPORT_UCP
- if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
+ if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;
#endif /* SUPPORT_UCP */
}
else
@@ -1424,7 +1424,6 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
{
- const uschar *endcode;
int local_offsets[2];
int local_workspace[1000];
@@ -1446,7 +1445,6 @@ for (;;)
const uschar *end_subpattern = code;
int charcount = local_offsets[1] - local_offsets[0];
int next_state_offset, repeat_state_offset;
- BOOL is_repeated;
do { end_subpattern += GET(end_subpattern, 1); }
while (*end_subpattern == OP_ALT);
@@ -1613,7 +1611,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
-EXPORT int
+PCRE_EXPORT int
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
diff --git a/pcre_exec.c b/pcre_exec.c
index 65173e2..133b3a7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1354,7 +1354,7 @@ for (;;)
{
int chartype, rqdtype;
int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
rqdtype = *(++ecode);
ecode++;
@@ -1381,7 +1381,7 @@ for (;;)
{
int chartype;
int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -1390,7 +1390,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- category = ucp_findchar(c, &chartype, &othercase);
+ category = _pcre_ucp_findchar(c, &chartype, &othercase);
if (category != ucp_M) break;
eptr += len;
}
@@ -1841,7 +1841,7 @@ for (;;)
ecode += length;
/* If we have Unicode property support, we can use it to test the other
- case of the character, if there is one. The result of ucp_findchar() is
+ case of the character, if there is one. The result of _pcre_ucp_findchar() is
< 0 if the char isn't found, and othercase is returned as zero if there
isn't one. */
@@ -1850,7 +1850,7 @@ for (;;)
#ifdef SUPPORT_UCP
int chartype;
int othercase;
- if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)
+ if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -1920,7 +1920,7 @@ for (;;)
int othercase;
int chartype;
if ((ims & PCRE_CASELESS) != 0 &&
- ucp_findchar(fc, &chartype, &othercase) >= 0 &&
+ _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&
othercase > 0)
oclength = _pcre_ord2utf8(othercase, occhars);
#endif /* SUPPORT_UCP */
@@ -2439,7 +2439,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
GETCHARINC(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if ((*prop_test_variable == prop_test_against) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2453,7 +2453,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2462,7 +2462,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2632,7 +2632,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if ((*prop_test_variable == prop_test_against) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2649,7 +2649,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2658,7 +2658,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2790,7 +2790,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if ((*prop_test_variable == prop_test_against) == prop_fail_result)
break;
eptr+= len;
@@ -2816,7 +2816,7 @@ for (;;)
{
if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category == ucp_M) break;
while (eptr < md->end_subject)
{
@@ -2825,7 +2825,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2846,7 +2846,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);
if (prop_category != ucp_M) break;
eptr--;
}
@@ -3200,7 +3200,7 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
-EXPORT int
+PCRE_EXPORT int
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index ac80e65..cd1a9a4 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -61,7 +61,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
-EXPORT int
+PCRE_EXPORT int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
diff --git a/pcre_info.c b/pcre_info.c
index 228949d..18741b1 100644
--- a/pcre_info.c
+++ b/pcre_info.c
@@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
or negative values on error
*/
-EXPORT int
+PCRE_EXPORT int
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;
diff --git a/pcre_internal.h b/pcre_internal.h
index 67c0186..a56f43c 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -42,12 +42,14 @@ POSSIBILITY OF SUCH DAMAGE.
modules, but which are not relevant to the exported API. This includes some
functions whose names all begin with "_pcre_". */
+#ifndef PCRE_INTERNAL_H
+#define PCRE_INTERNAL_H
/* Define DEBUG to get debugging output on stdout. */
-/****
+#if 0
#define DEBUG
-****/
+#endif
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
inline, and there are *still* stupid compilers about that don't like indented
@@ -112,14 +114,10 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */
typedef unsigned char uschar;
-/* Include the public PCRE header */
+/* Include the public PCRE header and the definitions of UCP character property
+values. */
#include "pcre.h"
-
-/* Include the (copy of) the public ucp header, changing the external name into
-a private one. This does no harm, even if we aren't compiling UCP support. */
-
-#define ucp_findchar _pcre_ucp_findchar
#include "ucp.h"
/* When compiling for use with the Virtual Pascal compiler, these functions
@@ -862,7 +860,7 @@ total length. */
#define tables_length (ctypes_offset + 256)
/* Layout of the UCP type table that translates property names into codes for
-ucp_findchar(). */
+pcre_ucp_findchar(). */
typedef struct {
const char *name;
@@ -895,11 +893,12 @@ one of the exported public functions. They have to be "external" in the C
sense, but are not part of the PCRE public API. */
extern int _pcre_ord2utf8(int, uschar *);
-extern void _pcre_printint(pcre *, FILE *);
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
const pcre_study_data *, pcre_study_data *);
extern int _pcre_ucp_findchar(const int, int *, int *);
extern int _pcre_valid_utf8(const uschar *, int);
extern BOOL _pcre_xclass(int, const uschar *);
+#endif
+
/* End of pcre_internal.h */
diff --git a/pcre_ord2utf8.c b/pcre_ord2utf8.c
index f6a06fc..82fa3c7 100644
--- a/pcre_ord2utf8.c
+++ b/pcre_ord2utf8.c
@@ -59,7 +59,7 @@ Arguments:
Returns: number of characters placed in the buffer
*/
-EXPORT int
+PCRE_EXPORT int
_pcre_ord2utf8(int cvalue, uschar *buffer)
{
register int i, j;
diff --git a/pcre_printint.c b/pcre_printint.src
index d18f399..410f920 100644
--- a/pcre_printint.c
+++ b/pcre_printint.src
@@ -38,12 +38,15 @@ POSSIBILITY OF SUCH DAMAGE.
*/
-/* This module contains an PCRE private debugging function for printing out the
+/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
-local functions. */
+local functions. This source file is used in two places:
+(1) It is #included by pcre_compile.c when it is compiled in debugging mode
+(DEBUG defined in pcre_internal.h). It is not included in production compiles.
-#include "pcre_internal.h"
+(2) It is always #included by pcretest.c, which can be asked to print out a
+compiled regex for debugging purposes. */
static const char *OP_names[] = { OP_NAME_LIST };
@@ -121,8 +124,8 @@ return "??";
/* Make this function work for a regex with integers either byte order.
However, we assume that what we are passed is a compiled regex. */
-EXPORT void
-_pcre_printint(pcre *external_re, FILE *f)
+static void
+pcre_printint(pcre *external_re, FILE *f)
{
real_pcre *re = (real_pcre *)external_re;
uschar *codestart, *code;
@@ -448,4 +451,4 @@ for(;;)
}
}
-/* End of pcre_printint.c */
+/* End of pcre_printint.src */
diff --git a/pcre_refcount.c b/pcre_refcount.c
index 35a7ee8..79fde42 100644
--- a/pcre_refcount.c
+++ b/pcre_refcount.c
@@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
-EXPORT int
+PCRE_EXPORT int
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;
diff --git a/pcre_study.c b/pcre_study.c
index 7c10c04..58f2414 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -401,7 +401,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
-EXPORT pcre_extra *
+PCRE_EXPORT pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
diff --git a/pcre_tables.c b/pcre_tables.c
index 4f442ea..f91e881 100644
--- a/pcre_tables.c
+++ b/pcre_tables.c
@@ -39,7 +39,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains some fixed tables that are used by more than one of the
-PCRE code modules. */
+PCRE code modules. The tables are also #included by the pcretest program, which
+uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
+clashes with the library. */
#include "pcre_internal.h"
@@ -81,8 +83,7 @@ const uschar _pcre_utf8_table4[] = {
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* This table translates Unicode property names into code values for the
-ucp_findchar() function. It is used by pcretest as well as by the library
-functions. */
+ucp_findchar() function. */
const ucp_type_table _pcre_utt[] = {
{ "C", 128 + ucp_C },
diff --git a/pcre_try_flipped.c b/pcre_try_flipped.c
index a07bb23..536eb05 100644
--- a/pcre_try_flipped.c
+++ b/pcre_try_flipped.c
@@ -94,7 +94,7 @@ Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
-EXPORT real_pcre *
+PCRE_EXPORT real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
diff --git a/pcre_ucp_findchar.c b/pcre_ucp_findchar.c
index 9f8de06..b9a4c97 100644
--- a/pcre_ucp_findchar.c
+++ b/pcre_ucp_findchar.c
@@ -41,13 +41,121 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module compiles code for supporting the use of Unicode character
properties. We use the (embryonic at the time of writing) UCP library, by
including some of its files, copies of which have been put in the PCRE
-distribution. There is a macro in pcre_internal.h that changes the name
-ucp_findchar into _pcre_ucp_findchar. */
+distribution. The actual search function is reproduced here, with its name
+changed. */
#include "pcre_internal.h"
-#include "ucp_findchar.c"
+#include "ucp.h" /* Category definitions */
+#include "ucpinternal.h" /* Internal table details */
+#include "ucptable.c" /* The table itself */
+
+/*************************************************
+* Search table and return data *
+*************************************************/
+
+/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
+character type is ucp_Lu, ucp_Nd, etc.
+
+Arguments:
+ c the character value
+ type_ptr the detailed character type is returned here
+ case_ptr for letters, the opposite case is returned here, if there
+ is one, else zero
+
+Returns: the character type category or -1 if not found
+*/
+
+PCRE_EXPORT int
+_pcre_ucp_findchar(const int c, int *type_ptr, int *case_ptr)
+{
+cnode *node = ucp_table;
+register int cc = c;
+int case_offset;
+
+for (;;)
+ {
+ register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
+ if (cc == d) break;
+ if (cc < d)
+ {
+ if ((node->f0 & f0_leftexists) == 0) return -1;
+ node ++;
+ }
+ else
+ {
+ register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
+ if (roffset == 0) return -1;
+ node += 1 << (roffset - 1);
+ }
+ }
+
+switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
+ {
+ case ucp_Cc:
+ case ucp_Cf:
+ case ucp_Cn:
+ case ucp_Co:
+ case ucp_Cs:
+ return ucp_C;
+ break;
+
+ case ucp_Ll:
+ case ucp_Lu:
+ case_offset = node->f2 & f2_casemask;
+ if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
+ *case_ptr = (case_offset == 0)? 0 : cc + case_offset;
+ return ucp_L;
+
+ case ucp_Lm:
+ case ucp_Lo:
+ case ucp_Lt:
+ *case_ptr = 0;
+ return ucp_L;
+ break;
+
+ case ucp_Mc:
+ case ucp_Me:
+ case ucp_Mn:
+ return ucp_M;
+ break;
+
+ case ucp_Nd:
+ case ucp_Nl:
+ case ucp_No:
+ return ucp_N;
+ break;
+
+ case ucp_Pc:
+ case ucp_Pd:
+ case ucp_Pe:
+ case ucp_Pf:
+ case ucp_Pi:
+ case ucp_Ps:
+ case ucp_Po:
+ return ucp_P;
+ break;
+
+ case ucp_Sc:
+ case ucp_Sk:
+ case ucp_Sm:
+ case ucp_So:
+ return ucp_S;
+ break;
+
+ case ucp_Zl:
+ case ucp_Zp:
+ case ucp_Zs:
+ return ucp_Z;
+ break;
+
+ default: /* "Should never happen" */
+ return -1;
+ break;
+ }
+}
+
/* End of pcre_ucp_findchar.c */
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c
index 72f0f5e..2fe3bbb 100644
--- a/pcre_valid_utf8.c
+++ b/pcre_valid_utf8.c
@@ -63,7 +63,7 @@ Returns: < 0 if the string is a valid UTF-8 string
>= 0 otherwise; the value is the offset of the bad byte
*/
-EXPORT int
+PCRE_EXPORT int
_pcre_valid_utf8(const uschar *string, int length)
{
register const uschar *p;
diff --git a/pcre_version.c b/pcre_version.c
index d296eea..2d3080a 100644
--- a/pcre_version.c
+++ b/pcre_version.c
@@ -52,7 +52,7 @@ string that identifies the PCRE version that is in use. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
-EXPORT const char *
+PCRE_EXPORT const char *
pcre_version(void)
{
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
diff --git a/pcre_xclass.c b/pcre_xclass.c
index 40d2654..4a4f895 100644
--- a/pcre_xclass.c
+++ b/pcre_xclass.c
@@ -60,7 +60,7 @@ Arguments:
Returns: TRUE if character matches, else FALSE
*/
-EXPORT BOOL
+PCRE_EXPORT BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
@@ -102,7 +102,7 @@ while ((t = *data++) != XCL_END)
{
int chartype, othercase;
int rqdtype = *data++;
- int category = ucp_findchar(c, &chartype, &othercase);
+ int category = _pcre_ucp_findchar(c, &chartype, &othercase);
if (rqdtype >= 128)
{
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
diff --git a/pcregrep.c b/pcregrep.c
index f6e5180..d390ae5 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE.
typedef int BOOL;
-#define VERSION "4.0 07-Jun-2005"
+#define VERSION "4.1 05-Sep-2005"
#define MAX_PATTERN_COUNT 100
#if BUFSIZ > 8192
@@ -531,9 +531,11 @@ while (ptr < endptr)
}
/* Now print the matching line(s); ensure we set hyphenpending at the end
- of the file. */
+ of the file if any context lines are being output. */
+
+ if (after_context > 0 || before_context > 0)
+ endhyphenpending = TRUE;
- endhyphenpending = TRUE;
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%d:", linenumber);
diff --git a/pcreposix.c b/pcreposix.c
index 06fd58b..c8f25ad 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -131,7 +131,7 @@ static const char *const pstring[] = {
* Translate error code to string *
*************************************************/
-EXPORT size_t
+PCRE_EXPORT size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const char *message, *addmessage;
@@ -166,7 +166,7 @@ return length + addlength;
* Free store held by a regex *
*************************************************/
-EXPORT void
+PCRE_EXPORT void
regfree(regex_t *preg)
{
(pcre_free)(preg->re_pcre);
@@ -189,7 +189,7 @@ Returns: 0 on success
various non-zero codes on failure
*/
-EXPORT int
+PCRE_EXPORT int
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
@@ -225,7 +225,7 @@ ints. However, if the number of possible capturing brackets is small, use a
block of store on the stack, to reduce the use of malloc/free. The threshold is
in a macro that can be changed at configure time. */
-EXPORT int
+PCRE_EXPORT int
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
diff --git a/pcretest.c b/pcretest.c
index 9b63470..4a7c1fd 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -46,13 +46,34 @@ POSSIBILITY OF SUCH DAMAGE.
#define PCRE_SPY /* For Win32 build, import data, not export */
-/* We need the internal info for displaying the results of pcre_study() and
-other internal data; pcretest also uses some of the fixed tables, and generally
-has "inside information" compared to a program that strictly follows the PCRE
-API. */
+/* We include pcre_internal.h because we need the internal info for displaying
+the results of pcre_study() and we also need to know about the internal
+macros, structures, and other internal data values; pcretest has "inside
+information" compared to a program that strictly follows the PCRE API. */
#include "pcre_internal.h"
+/* We need access to the data tables that PCRE uses. So as not to have to keep
+two copies, we include the source file here, changing the names of the external
+symbols to prevent clashes. */
+
+#define _pcre_utf8_table1 utf8_table1
+#define _pcre_utf8_table1_size utf8_table1_size
+#define _pcre_utf8_table2 utf8_table2
+#define _pcre_utf8_table3 utf8_table3
+#define _pcre_utf8_table4 utf8_table4
+#define _pcre_utt utt
+#define _pcre_utt_size utt_size
+#define _pcre_OP_lengths OP_lengths
+
+#include "pcre_tables.c"
+
+/* We also need the pcre_printint() function for printing out compiled
+patterns. This function is in a separate file so that it can be included in
+pcre_compile.c when that module is compiled with debugging enabled. */
+
+#include "pcre_printint.src"
+
/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
@@ -68,6 +89,8 @@ to the DFA matcher (NODFA), and without the doublecheck of the old "info"
function (define NOINFOCHECK). */
+/* Other parameters */
+
#ifndef CLOCKS_PER_SEC
#ifdef CLK_TCK
#define CLOCKS_PER_SEC CLK_TCK
@@ -83,6 +106,8 @@ function (define NOINFOCHECK). */
#define DBUFFER_SIZE BUFFER_SIZE
+/* Static variables */
+
static FILE *outfile;
static int log_store = 0;
static int callout_count;
@@ -162,7 +187,7 @@ if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
/* i now has a value in the range 1-5 */
s = 6*i;
-d = (c & _pcre_utf8_table3[i]) << s;
+d = (c & utf8_table3[i]) << s;
for (j = 0; j < i; j++)
{
@@ -174,8 +199,8 @@ for (j = 0; j < i; j++)
/* Check that encoding was the correct unique one */
-for (j = 0; j < _pcre_utf8_table1_size; j++)
- if (d <= _pcre_utf8_table1[j]) break;
+for (j = 0; j < utf8_table1_size; j++)
+ if (d <= utf8_table1[j]) break;
if (j != i) return -(i+1);
/* Valid value */
@@ -189,6 +214,38 @@ return i+1;
/*************************************************
+* Convert character value to UTF-8 *
+*************************************************/
+
+/* This function takes an integer value in the range 0 - 0x7fffffff
+and encodes it as a UTF-8 character in 0 to 6 bytes.
+
+Arguments:
+ cvalue the character value
+ buffer pointer to buffer for result - at least 6 bytes long
+
+Returns: number of characters placed in the buffer
+*/
+
+static int
+ord2utf8(int cvalue, uschar *buffer)
+{
+register int i, j;
+for (i = 0; i < utf8_table1_size; i++)
+ if (cvalue <= utf8_table1[i]) break;
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
+
+
+/*************************************************
* Print character string *
*************************************************/
@@ -198,7 +255,7 @@ chars without printing. */
static int pchars(unsigned char *p, int length, FILE *f)
{
-int c;
+int c = 0;
int yield = 0;
while (length-- > 0)
@@ -988,7 +1045,7 @@ while (!done)
if (do_debug)
{
fprintf(outfile, "------------------------------------------------------------------\n");
- _pcre_printint(re, outfile);
+ pcre_printint(re, outfile);
}
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
@@ -1290,7 +1347,7 @@ while (!done)
{
unsigned char buff8[8];
int ii, utn;
- utn = _pcre_ord2utf8(c, buff8);
+ utn = ord2utf8(c, buff8);
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
c = buff8[ii]; /* Last byte */
p = pt + 1;
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 64b060b..2ea9a91 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -1,4 +1,4 @@
-pcregrep version 4.0 07-Jun-2005 using PCRE version 6.3 15-Aug-2005
+pcregrep version 4.1 05-Sep-2005 using PCRE version 6.4 05-Sep-2005
---------------------------- Test 1 ------------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
@@ -17,7 +17,6 @@ PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput:10:This pattern is in lower case.
./testdata/grepinput:586:Check up on PATTERN near the end.
---
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern
---------------------------- Test 6 ------------------------------
@@ -25,7 +24,6 @@ PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case.
586:Check up on PATTERN near the end.
---
3:Here is the pattern again.
5:Pattern
---------------------------- Test 7 ------------------------------
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 6fc1e8f..bb7eb55 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/the quick brown fox/
the quick brown fox
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 21031f0..29dfa16 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/(a)b|/
Capturing subpattern count = 1
diff --git a/testdata/testoutput3 b/testdata/testoutput3
index 2dbfcd3..af5ac53 100644
--- a/testdata/testoutput3
+++ b/testdata/testoutput3
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/^[\w]+/
*** Failers
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index f153b70..41f888f 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/-- Do not use the \x{} construct except with patterns that have the --/
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index d2122f2..da697db 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/\x{100}/8DM
Memory allocation (code space): 10
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index a93af3e..1a8d96d 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/^\pC\pL\pM\pN\pP\pS\pZ</8
\x7f\x{c0}\x{30f}\x{660}\x{66c}\x{f01}\x{1680}<
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 878c8ff..eda8b16 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/abc/
abc
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index ac52ca3..99b375e 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/-- Do not use the \x{} construct except with patterns that have the --/
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 92174a0..2598a74 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -1,4 +1,4 @@
-PCRE version 6.3 15-Aug-2005
+PCRE version 6.4 05-Sep-2005
/\pL\P{Nd}/8
AB
diff --git a/ucp.h b/ucp.h
index 955e104..b216e0e 100644
--- a/ucp.h
+++ b/ucp.h
@@ -53,8 +53,6 @@ enum {
ucp_Zs /* Space separator */
};
-extern int ucp_findchar(const int, int *, int *);
-
#endif
/* End of ucp.h */
diff --git a/ucp_findchar.c b/ucp_findchar.c
deleted file mode 100644
index 1f45117..0000000
--- a/ucp_findchar.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*************************************************
-* libucp - Unicode Property Table handler *
-*************************************************/
-
-/* Copyright (c) University of Cambridge 2004 */
-
-/* This little library provides a fast way of obtaining the basic Unicode
-properties of a character, using a compact binary tree that occupies less than
-100K bytes.
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-
-#include "ucp.h" /* Exported interface */
-#include "ucpinternal.h" /* Internal table details */
-#include "ucptable.c" /* The table itself */
-
-
-/* In some environments, external functions have to be preceded by some magic.
-In my world (Unix), they do not. Use a macro to deal with this. */
-
-#ifndef EXPORT
-#define EXPORT
-#endif
-
-
-
-/*************************************************
-* Search table and return data *
-*************************************************/
-
-/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
-character type is ucp_Lu, ucp_Nd, etc.
-
-Arguments:
- c the character value
- type_ptr the detailed character type is returned here
- case_ptr for letters, the opposite case is returned here, if there
- is one, else zero
-
-Returns: the character type category or -1 if not found
-*/
-
-EXPORT int
-ucp_findchar(const int c, int *type_ptr, int *case_ptr)
-{
-cnode *node = ucp_table;
-register int cc = c;
-int case_offset;
-
-for (;;)
- {
- register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
- if (cc == d) break;
- if (cc < d)
- {
- if ((node->f0 & f0_leftexists) == 0) return -1;
- node ++;
- }
- else
- {
- register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
- if (roffset == 0) return -1;
- node += 1 << (roffset - 1);
- }
- }
-
-switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
- {
- case ucp_Cc:
- case ucp_Cf:
- case ucp_Cn:
- case ucp_Co:
- case ucp_Cs:
- return ucp_C;
- break;
-
- case ucp_Ll:
- case ucp_Lu:
- case_offset = node->f2 & f2_casemask;
- if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
- *case_ptr = (case_offset == 0)? 0 : cc + case_offset;
- return ucp_L;
-
- case ucp_Lm:
- case ucp_Lo:
- case ucp_Lt:
- *case_ptr = 0;
- return ucp_L;
- break;
-
- case ucp_Mc:
- case ucp_Me:
- case ucp_Mn:
- return ucp_M;
- break;
-
- case ucp_Nd:
- case ucp_Nl:
- case ucp_No:
- return ucp_N;
- break;
-
- case ucp_Pc:
- case ucp_Pd:
- case ucp_Pe:
- case ucp_Pf:
- case ucp_Pi:
- case ucp_Ps:
- case ucp_Po:
- return ucp_P;
- break;
-
- case ucp_Sc:
- case ucp_Sk:
- case ucp_Sm:
- case ucp_So:
- return ucp_S;
- break;
-
- case ucp_Zl:
- case ucp_Zp:
- case ucp_Zs:
- return ucp_Z;
- break;
-
- default: /* "Should never happen" */
- return -1;
- break;
- }
-}
-
-/* End of ucp_findchar.c */