summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-10 02:20:06 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-10 02:20:06 +0000
commit669e6f0bbc3b07f6df3b0d0cafba3555e39e433c (patch)
tree37c97c3fa732981cf8d2dbed54d27cca37fa8fac
parent24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (diff)
downloadpcre-669e6f0bbc3b07f6df3b0d0cafba3555e39e433c.tar.gz
extending the 16 bit API, mode check, and fixes
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@795 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--Makefile.am2
-rw-r--r--pcre.h.in22
-rw-r--r--pcre16_dfa_exec.c45
-rw-r--r--pcre16_get.c45
-rw-r--r--pcre16_ord2utf16.c2
-rw-r--r--pcre_compile.c110
-rw-r--r--pcre_dfa_exec.c72
-rw-r--r--pcre_exec.c7
-rw-r--r--pcre_fullinfo.c1
-rw-r--r--pcre_get.c125
-rw-r--r--pcre_info.c1
-rw-r--r--pcre_internal.h32
-rw-r--r--pcre_jit_compile.c2
-rw-r--r--pcre_jit_test.c107
-rw-r--r--pcre_newline.c4
-rw-r--r--pcre_ord2utf8.c4
-rw-r--r--pcre_study.c21
-rw-r--r--pcre_valid_utf8.c4
-rw-r--r--pcre_xclass.c31
-rw-r--r--pcreposix.c1
20 files changed, 484 insertions, 154 deletions
diff --git a/Makefile.am b/Makefile.am
index b64ccd5..ac2c675 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -212,8 +212,10 @@ libpcre16_la_SOURCES = \
pcre16_chartables.c \
pcre16_compile.c \
pcre16_config.c \
+ pcre16_dfa_exec.c \
pcre16_exec.c \
pcre16_fullinfo.c \
+ pcre16_get.c \
pcre16_info.c \
pcre16_jit_compile.c \
pcre16_newline.c \
diff --git a/pcre.h.in b/pcre.h.in
index ea11766..e83f2cf 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -166,6 +166,7 @@ compile-time only bits for runtime options, or vice versa. */
#define PCRE_ERROR_SHORTUTF8 (-25)
#define PCRE_ERROR_RECURSELOOP (-26)
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
+#define PCRE_ERROR_BADMODE (-28)
/* Specific error codes for UTF-8 validity checks */
@@ -357,29 +358,46 @@ PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre16_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
-PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
- int);
+PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre *, PCRE_SPTR16,
+ int *, int, PCRE_SPTR16, PCRE_SCHAR16 *, int);
+PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
+ char *, int);
+PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
+ PCRE_SCHAR16 *, int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int pcre16_dfa_exec(const pcre *, const pcre_extra *,
+ PCRE_SPTR16, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL int pcre16_exec(const pcre *, const pcre_extra *, PCRE_SPTR16,
int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
+PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
+PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre16_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
+PCRE_EXP_DECL int pcre16_get_named_substring(const pcre *, PCRE_SPTR16,
+ int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
+PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre *, PCRE_SPTR16);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
+PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre *, PCRE_SPTR16,
+ PCRE_SCHAR16 **, PCRE_SCHAR16 **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
+PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
+ PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
+PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
+ PCRE_SPTR16 **);
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL int pcre16_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
diff --git a/pcre16_dfa_exec.c b/pcre16_dfa_exec.c
new file mode 100644
index 0000000..dc6ea49
--- /dev/null
+++ b/pcre16_dfa_exec.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_dfa_exec.c"
+
+/* End of pcre16_dfa_exec.c */
diff --git a/pcre16_get.c b/pcre16_get.c
new file mode 100644
index 0000000..0b9bd61
--- /dev/null
+++ b/pcre16_get.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_get.c"
+
+/* End of pcre16_get.c */
diff --git a/pcre16_ord2utf16.c b/pcre16_ord2utf16.c
index 99bed29..c0d3ee5 100644
--- a/pcre16_ord2utf16.c
+++ b/pcre16_ord2utf16.c
@@ -87,7 +87,7 @@ return 2;
#else /* SUPPORT_UTF */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
-(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
#endif /* SUPPORT_UTF */
}
diff --git a/pcre_compile.c b/pcre_compile.c
index 24a7b1c..3fa7c67 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -2357,7 +2357,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
actual length is stored in the compiled code, so we must update "code"
here. */
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
ccode = code += GET(code, 1);
goto CHECK_CLASS_REPEAT;
@@ -2367,7 +2367,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_NCLASS:
ccode = code + PRIV(OP_lengths)[OP_CLASS];
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
CHECK_CLASS_REPEAT:
#endif
@@ -2980,7 +2980,7 @@ the next item is a character. */
if (next >= 0) switch(op_code)
{
case OP_CHAR:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
@@ -2992,13 +2992,13 @@ if (next >= 0) switch(op_code)
high-valued characters. */
case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
#endif
if (c == next) return FALSE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf)
{
unsigned int othercase;
@@ -3011,7 +3011,7 @@ if (next >= 0) switch(op_code)
return (unsigned int)c != othercase;
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
return (c != cd->fcc[next]); /* Non-UTF-8 mode */
/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
@@ -3023,7 +3023,7 @@ if (next >= 0) switch(op_code)
case OP_NOTI:
if ((c = *previous) == next) return TRUE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf)
{
unsigned int othercase;
@@ -3036,7 +3036,7 @@ if (next >= 0) switch(op_code)
return (unsigned int)c == othercase;
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
return (c == cd->fcc[next]); /* Non-UTF-8 mode */
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
@@ -3128,7 +3128,7 @@ switch(op_code)
{
case OP_CHAR:
case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
@@ -3358,7 +3358,7 @@ pcre_uint8 classbits[32];
must not do this for other options (e.g. PCRE_EXTENDED) because they may change
dynamically as we process the pattern. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
BOOL utf = (options & PCRE_UTF8) != 0;
pcre_uchar utf_chars[6];
@@ -4150,7 +4150,7 @@ for (;; ptr++)
goto LONE_SINGLE_CHARACTER;
}
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf)
{ /* Braces are required because the */
GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
@@ -4200,7 +4200,9 @@ for (;; ptr++)
matching for characters > 127 is available only if UCP support is
available. */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+ if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
+#elif defined SUPPORT_UTF
if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
#elif !(defined COMPILE_PCRE8)
if (d > 255)
@@ -4214,7 +4216,11 @@ for (;; ptr++)
they fit with the basic range. */
#ifdef SUPPORT_UCP
+#ifndef COMPILE_PCRE8
+ if (utf && (options & PCRE_CASELESS) != 0)
+#else
if ((options & PCRE_CASELESS) != 0)
+#endif
{
unsigned int occ, ocd;
unsigned int cc = c;
@@ -4257,12 +4263,25 @@ for (;; ptr++)
*class_uchardata++ = XCL_RANGE;
#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ if (utf)
+ {
+ class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(d, class_uchardata);
+ }
+ else
+ {
+ *class_uchardata++ = c;
+ *class_uchardata++ = d;
+ }
+#else
class_uchardata += PRIV(ord2utf)(c, class_uchardata);
class_uchardata += PRIV(ord2utf)(d, class_uchardata);
-#else
+#endif
+#else /* SUPPORT_UTF */
*class_uchardata++ = c;
*class_uchardata++ = d;
-#endif
+#endif /* SUPPORT_UTF */
/* With UCP support, we are done. Without UCP support, there is no
caseless matching for UTF characters > 127; we can use the bit map
@@ -4270,9 +4289,26 @@ for (;; ptr++)
can still use */
#ifdef SUPPORT_UCP
- continue; /* With next character in the class */
-#else
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ if (utf)
+#endif
+ continue; /* With next character in the class */
+#endif /* SUPPORT_UCP */
+
+#if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
+ if (utf)
+ {
+ if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
+ /* Adjust upper limit and fall through to set up the map */
+ d = 127;
+ }
+ else
+ {
+ if (c > 255) continue;
+ /* Adjust upper limit and fall through to set up the map */
+ d = 255;
+ }
+#elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
/* Adjust upper limit and fall through to set up the map */
d = 127;
@@ -4280,10 +4316,9 @@ for (;; ptr++)
if (c > 255) continue;
/* Adjust upper limit and fall through to set up the map */
d = 255;
-#endif /* SUPPORT_UTF */
-#endif /* SUPPORT_UCP */
+#endif /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
}
-#endif /* SUPPORT_UTF8 || COMPILE_PCRE16 */
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
/* We use the bit map for 8 bit mode, or when the characters fall
partially or entirely to [0-255] ([0-127] for UCP) ranges. */
@@ -4314,7 +4349,9 @@ for (;; ptr++)
/* Handle a character that cannot go in the bit map */
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+ if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
+#elif defined SUPPORT_UTF
if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
#elif !(defined COMPILE_PCRE8)
if (c > 255)
@@ -4324,13 +4361,26 @@ for (;; ptr++)
xclass = TRUE;
*class_uchardata++ = XCL_SINGLE;
#ifdef SUPPORT_UTF
- class_uchardata += PRIV(ord2utf)(c, class_uchardata);
-#else
- *class_uchardata++ = c;
+#ifndef COMPILE_PCRE8
+ /* In non 8 bit mode, we can get here even
+ if we are not in UTF mode. */
+ if (!utf)
+ *class_uchardata++ = c;
+ else
#endif
+ class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+#else /* SUPPORT_UTF */
+ *class_uchardata++ = c;
+#endif /* SUPPORT_UTF */
#ifdef SUPPORT_UCP
+#ifdef COMPILE_PCRE8
if ((options & PCRE_CASELESS) != 0)
+#else
+ /* In non 8 bit mode, we can get here even
+ if we are not in UTF mode. */
+ if (utf && (options & PCRE_CASELESS) != 0)
+#endif
{
unsigned int othercase;
if ((othercase = UCD_OTHERCASE(c)) != c)
@@ -4415,7 +4465,7 @@ for (;; ptr++)
/* For a single, positive character, get the value into mcbuffer, and
then we can handle this with the normal one-character code. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && class_lastchar > 127)
mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
else
@@ -4843,7 +4893,7 @@ for (;; ptr++)
else if (*previous == OP_CLASS ||
*previous == OP_NCLASS ||
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
*previous == OP_XCLASS ||
#endif
*previous == OP_REF ||
@@ -6635,7 +6685,7 @@ for (;; ptr++)
a value > 127. We set its representation in the length/buffer, and then
handle it as a data character. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && c > 127)
mclength = PRIV(ord2utf)(c, mcbuffer);
else
@@ -7471,12 +7521,12 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
-/* Can't support UTF8 unless PCRE has been compiled to include the code. The
+/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
not used here. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
{
@@ -7673,7 +7723,7 @@ code = (pcre_uchar *)codestart;
&firstchar, &reqchar, NULL, cd, NULL);
re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
-re->flags = cd->external_flags;
+re->flags = cd->external_flags | PCRE_MODE;
if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index a5bc745..58197ce 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -413,7 +413,7 @@ const pcre_uchar *start_subject = md->start_subject;
const pcre_uchar *end_subject = md->end_subject;
const pcre_uchar *start_code = md->start_code;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
BOOL utf = (md->poptions & PCRE_UTF8) != 0;
#else
BOOL utf = FALSE;
@@ -471,7 +471,7 @@ if (*first_op == OP_REVERSE)
/* If we can't go back the amount required for the longest lookbehind
pattern, go back as far as we can; some alternatives may still be viable. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* In character mode we have to step back character by character */
if (utf)
@@ -603,9 +603,9 @@ for (;;)
if (ptr < end_subject)
{
clen = 1; /* Number of bytes in the character */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(c, ptr, clen); } else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
c = *ptr;
}
else
@@ -692,9 +692,9 @@ for (;;)
if (coptable[codevalue] > 0)
{
dlen = 1;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
d = code[coptable[codevalue]];
if (codevalue >= OP_TYPESTAR)
{
@@ -957,8 +957,8 @@ for (;;)
{
const pcre_uchar *temp = ptr - 1;
if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF8
- if (utf) BACKCHAR(temp);
+#ifdef SUPPORT_UTF
+ if (utf) { BACKCHAR(temp); }
#endif
GETCHARTEST(d, temp);
#ifdef SUPPORT_UCP
@@ -1983,28 +1983,28 @@ for (;;)
case OP_CHARI:
if (clen == 0) break;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf)
{
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
{
unsigned int othercase;
- if (c < 128) othercase = fcc[c]; else
-
- /* If we have Unicode property support, we can use it to test the
- other case of the character. */
-
+ if (c < 128)
+ othercase = fcc[c];
+ else
+ /* If we have Unicode property support, we can use it to test the
+ other case of the character. */
#ifdef SUPPORT_UCP
- othercase = UCD_OTHERCASE(c);
+ othercase = UCD_OTHERCASE(c);
#else
- othercase = NOTACHAR;
+ othercase = NOTACHAR;
#endif
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
}
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
/* Not UTF mode */
{
if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
@@ -2207,7 +2207,7 @@ for (;;)
unsigned int otherd = NOTACHAR;
if (caseless)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
@@ -2215,7 +2215,7 @@ for (;;)
#endif /* SUPPORT_UCP */
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
otherd = fcc[d];
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2254,7 +2254,7 @@ for (;;)
unsigned int otherd = NOTACHAR;
if (caseless)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
@@ -2262,7 +2262,7 @@ for (;;)
#endif /* SUPPORT_UCP */
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
otherd = fcc[d];
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2299,7 +2299,7 @@ for (;;)
unsigned int otherd = NOTACHAR;
if (caseless)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
@@ -2307,7 +2307,7 @@ for (;;)
#endif /* SUPPORT_UCP */
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
otherd = fcc[d];
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2336,7 +2336,7 @@ for (;;)
unsigned int otherd = NOTACHAR;
if (caseless)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
@@ -2344,7 +2344,7 @@ for (;;)
#endif /* SUPPORT_UCP */
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
otherd = fcc[d];
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2380,7 +2380,7 @@ for (;;)
unsigned int otherd = NOTACHAR;
if (caseless)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
@@ -2388,7 +2388,7 @@ for (;;)
#endif /* SUPPORT_UCP */
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
otherd = fcc[d];
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2438,7 +2438,7 @@ for (;;)
else
{
ecode = code + GET(code, 1);
- if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE);
+ if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
}
/* At this point, isinclass is set for all kinds of class, and ecode
@@ -2994,10 +2994,17 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
+ PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount, int *workspace, int wscount)
+#endif
{
real_pcre *re = (real_pcre *)argument_re;
dfa_match_data match_block;
@@ -3062,14 +3069,15 @@ if (re->magic_number != MAGIC_NUMBER)
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* Set some local values */
-current_subject = (const unsigned char *)subject + start_offset;
-end_subject = (const unsigned char *)subject + length;
+current_subject = (const pcre_uchar *)subject + start_offset;
+end_subject = (const pcre_uchar *)subject + length;
req_char_ptr = current_subject - 1;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
utf = (re->options & PCRE_UTF8) != 0;
#else
@@ -3083,7 +3091,7 @@ anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
md->start_code = (const pcre_uchar *)argument_re +
re->name_table_offset + re->name_count * re->name_entry_size;
-md->start_subject = (const unsigned char *)subject;
+md->start_subject = (const pcre_uchar *)subject;
md->end_subject = end_subject;
md->start_offset = start_offset;
md->moptions = options;
diff --git a/pcre_exec.c b/pcre_exec.c
index c5932f7..9aa07a7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -2968,7 +2968,7 @@ for (;;)
MRRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(c, eptr);
- if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+ if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
}
/* If max == min we can continue with the main loop without the
@@ -2992,7 +2992,7 @@ for (;;)
MRRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(c, eptr);
- if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+ if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3015,7 +3015,7 @@ for (;;)
#else
c = *eptr;
#endif
- if (!PRIV(xclass)(c, data)) break;
+ if (!PRIV(xclass)(c, data, utf)) break;
eptr += len;
}
for(;;)
@@ -6113,6 +6113,7 @@ if (re->magic_number != MAGIC_NUMBER)
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
/* Set up other data */
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 2bdf24b..078f5fd 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -91,6 +91,7 @@ if (re->magic_number != MAGIC_NUMBER)
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
switch (what)
{
diff --git a/pcre_get.c b/pcre_get.c
index 330035f..10a54a7 100644
--- a/pcre_get.c
+++ b/pcre_get.c
@@ -65,8 +65,13 @@ Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringnumber(const pcre *code, PCRE_SPTR16 stringname)
+#endif
{
int rc;
int entrysize;
@@ -87,7 +92,8 @@ while (top > bot)
{
int mid = (top + bot) / 2;
pcre_uchar *entry = nametable + entrysize*mid;
- int c = strcmp(stringname, (char *)(entry + 2));
+ int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+ (pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0) return (entry[0] << 8) + entry[1];
if (c > 0) bot = mid + 1; else top = mid;
}
@@ -114,9 +120,15 @@ Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringtable_entries(const pcre *code, PCRE_SPTR16 stringname,
+ PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
+#endif
{
int rc;
int entrysize;
@@ -138,23 +150,31 @@ while (top > bot)
{
int mid = (top + bot) / 2;
pcre_uchar *entry = nametable + entrysize*mid;
- int c = strcmp(stringname, (char *)(entry + 2));
+ int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+ (pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0)
{
pcre_uchar *first = entry;
pcre_uchar *last = entry;
while (first > nametable)
{
- if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
+ if (STRCMP_UC_UC((pcre_uchar *)stringname,
+ (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
- if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
+ if (STRCMP_UC_UC((pcre_uchar *)stringname,
+ (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
+#ifdef COMPILE_PCRE8
*firstptr = (char *)first;
*lastptr = (char *)last;
+#else
+ *firstptr = (PCRE_SCHAR16 *)first;
+ *lastptr = (PCRE_SCHAR16 *)last;
+#endif
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
@@ -182,16 +202,29 @@ Returns: the number of the first that is set,
or a negative number on error
*/
+#ifdef COMPILE_PCRE8
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
+#else
+static int
+get_first_set(const pcre *code, PCRE_SPTR16 stringname, int *ovector)
+#endif
{
const real_pcre *re = (const real_pcre *)code;
int entrysize;
-char *first, *last;
+pcre_uchar *first, *last;
pcre_uchar *entry;
+#ifdef COMPILE_PCRE8
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
-entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
+entrysize = pcre_get_stringtable_entries(code, stringname,
+ (char **)&first, (char **)&last);
+#else
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+ return pcre16_get_stringnumber(code, stringname);
+entrysize = pcre16_get_stringtable_entries(code, stringname,
+ (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
+#endif
if (entrysize <= 0) return entrysize;
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
{
@@ -231,9 +264,15 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+ int stringnumber, PCRE_SCHAR16 *buffer, int size)
+#endif
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
@@ -241,7 +280,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
-memcpy(buffer, subject + ovector[stringnumber], yield);
+memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
buffer[yield] = 0;
return yield;
}
@@ -276,13 +315,23 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+ int stringcount, PCRE_SPTR16 stringname, PCRE_SCHAR16 *buffer, int size)
+#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#else
+return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#endif
}
@@ -308,29 +357,39 @@ Returns: if successful: 0
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
+ PCRE_SPTR16 **listptr)
+#endif
{
int i;
-int size = sizeof(char *);
+int size = sizeof(pcre_uchar *);
int double_count = stringcount * 2;
-char **stringlist;
-char *p;
+pcre_uchar **stringlist;
+pcre_uchar *p;
for (i = 0; i < double_count; i += 2)
- size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
+ size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
-stringlist = (char **)(pcre_malloc)(size);
+stringlist = (pcre_uchar **)(pcre_malloc)(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
+#ifdef COMPILE_PCRE8
*listptr = (const char **)stringlist;
-p = (char *)(stringlist + stringcount + 1);
+#else
+*listptr = (PCRE_SPTR16 *)stringlist;
+#endif
+p = (pcre_uchar *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
int len = ovector[i+1] - ovector[i];
- memcpy(p, subject + ovector[i], len);
+ memcpy(p, subject + ovector[i], IN_UCHARS(len));
*stringlist++ = p;
p += len;
*p++ = 0;
@@ -353,8 +412,13 @@ Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring_list(PCRE_SPTR16 *pointer)
+#endif
{
(pcre_free)((void *)pointer);
}
@@ -386,21 +450,31 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+ int stringnumber, PCRE_SPTR16 *stringptr)
+#endif
{
int yield;
-char *substring;
+pcre_uchar *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
-substring = (char *)(pcre_malloc)(yield + 1);
+substring = (pcre_uchar *)(pcre_malloc)(IN_UCHARS(yield + 1));
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
-memcpy(substring, subject + ovector[stringnumber], yield);
+memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
substring[yield] = 0;
-*stringptr = substring;
+#ifdef COMPILE_PCRE8
+*stringptr = (const char *)substring;
+#else
+*stringptr = (PCRE_SPTR16)substring;
+#endif
return yield;
}
@@ -433,13 +507,23 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+ int stringcount, PCRE_SPTR16 stringname, PCRE_SPTR16 *stringptr)
+#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
+#else
+return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
+#endif
}
@@ -456,8 +540,13 @@ Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring(PCRE_SPTR16 pointer)
+#endif
{
(pcre_free)((void *)pointer);
}
diff --git a/pcre_info.c b/pcre_info.c
index e7b3730..52d593a 100644
--- a/pcre_info.c
+++ b/pcre_info.c
@@ -88,6 +88,7 @@ if (re->magic_number != MAGIC_NUMBER)
re = PRIV(try_flipped)(re, &internal_re, NULL, NULL);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
}
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
if (first_char != NULL)
*first_char = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
diff --git a/pcre_internal.h b/pcre_internal.h
index e748809..9a20e73 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -832,15 +832,21 @@ are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
the restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
-#define PCRE_FIRSTSET 0x0002 /* first_char is set */
-#define PCRE_REQCHSET 0x0004 /* req_byte is set */
-#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
-#define PCRE_JCHANGED 0x0010 /* j option used in regex */
-#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */
-#define PCRE_FCH_CASELESS 0x0080 /* caseless first char */
-#define PCRE_RCH_CASELESS 0x0100 /* caseless requested char */
+#ifdef COMPILE_PCRE8
+#define PCRE_MODE 0x0001 /* compiled in 8 bit mode */
+#endif
+#ifdef COMPILE_PCRE16
+#define PCRE_MODE 0x0002 /* compiled in 16 bit mode */
+#endif
+#define PCRE_FIRSTSET 0x0010 /* first_char is set */
+#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
+#define PCRE_REQCHSET 0x0040 /* req_byte is set */
+#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
+#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
+#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
+#define PCRE_JCHANGED 0x0400 /* j option used in regex */
+#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
/* Flags for the "extra" block produced by pcre_study(). */
@@ -917,7 +923,7 @@ for) in a minority area (EBCDIC platforms), this is not sensible. Any
application that did need both could compile two versions of the library, using
macros to give the functions distinct names. */
-#ifndef SUPPORT_UTF8
+#ifndef SUPPORT_UTF
/* UTF-8 support is not enabled; use the platform-dependent character literals
so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
@@ -1186,7 +1192,7 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
-#else /* SUPPORT_UTF8 */
+#else /* SUPPORT_UTF */
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@@ -1446,7 +1452,7 @@ only. */
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
/* Escape items that are just an encoding of a particular data value. */
@@ -2249,7 +2255,7 @@ extern real_pcre *PRIV(try_flipped)(const real_pcre *, real_pcre *,
extern int PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
int *, BOOL);
-extern BOOL PRIV(xclass)(int, const pcre_uchar *);
+extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL);
#ifdef SUPPORT_JIT
extern void PRIV(jit_compile)(const real_pcre *, pcre_extra *);
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 8c6b206..50376b9 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -1311,7 +1311,7 @@ static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar*
{
/* Detects if the character and its othercase has only 1 bit difference. */
unsigned int c, oc, bit;
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
int n;
#endif
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index d82af25..5b66bac 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -56,6 +56,8 @@ POSSIBILITY OF SUCH DAMAGE.
Non-letter characters:
\xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
\xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+ \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
+ \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
Newlines:
\xc2\x85 = 0x85 = 133 (NExt Line = NEL)
\xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
@@ -99,13 +101,19 @@ int main(void)
#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
#endif
-#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
-#define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+
+#define OFFSET_MASK 0xffff
+#define F_DIFF 0x010000
+#define F_FORCECONV 0x020000
+#define F_NO8 0x100000
+#define F_NO16 0x200000
struct regression_test_case {
int flags;
@@ -521,7 +529,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
{ MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
- { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
+ { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
{ MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
{ MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
@@ -535,11 +543,11 @@ static struct regression_test_case regression_test_cases[] = {
{ MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
{ MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
{ MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
- { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
+ { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
- { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
+ { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
{ MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
@@ -601,6 +609,20 @@ static struct regression_test_case regression_test_cases[] = {
{ MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
{ MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
+ /* 16 bit specific tests. */
+ { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
+ { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+ { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
+ { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+ { CMA, 0 | F_FORCECONV | F_NO8, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
+ { CMA, 0 | F_FORCECONV | F_NO8, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
+ { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
+ { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
+ { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
+ { CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
+ { CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
+ { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
+
/* Deep recursion. */
{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
{ MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
@@ -721,7 +743,7 @@ static int regression_tests(void)
int ovector8_2[32];
int return_value8_1, return_value8_2;
int utf8 = 0, ucp8 = 0;
- int disabled_flags8 = PCRE_BUG;
+ int disabled_flags8 = 0;
#endif
#ifdef SUPPORT_PCRE16
pcre *re16;
@@ -730,7 +752,7 @@ static int regression_tests(void)
int ovector16_2[32];
int return_value16_1, return_value16_2;
int utf16 = 0, ucp16 = 0;
- int disabled_flags16 = PCRE_BUG;
+ int disabled_flags16 = 0;
int length16;
#endif
@@ -765,9 +787,11 @@ static int regression_tests(void)
error = NULL;
#ifdef SUPPORT_PCRE8
- re8 = pcre_compile(current->pattern,
- current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
- &error, &err_offs, NULL);
+ re8 = NULL;
+ if (!(current->start_offset & F_NO8))
+ re8 = pcre_compile(current->pattern,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+ &error, &err_offs, NULL);
extra8 = NULL;
if (re8) {
@@ -784,17 +808,20 @@ static int regression_tests(void)
pcre_free(re8);
re8 = NULL;
}
- } else if (utf8 && ucp8)
+ } else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
#endif
#ifdef SUPPORT_PCRE16
- if (current->flags & PCRE_UTF8)
+ if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
else
copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
- re16 = pcre16_compile(regtest_buf,
- current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
- &error, &err_offs, NULL);
+
+ re16 = NULL;
+ if (!(current->start_offset & F_NO16))
+ re16 = pcre16_compile(regtest_buf,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+ &error, &err_offs, NULL);
extra16 = NULL;
if (re16) {
@@ -811,7 +838,7 @@ static int regression_tests(void)
pcre_free(re16);
re16 = NULL;
}
- } else if (utf16 && ucp16)
+ } else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
#endif
@@ -822,16 +849,15 @@ static int regression_tests(void)
#ifdef SUPPORT_PCRE8
return_value8_1 = -1000;
return_value8_2 = -1000;
+ for (i = 0; i < 32; ++i)
+ ovector8_1[i] = -2;
+ for (i = 0; i < 32; ++i)
+ ovector8_2[i] = -2;
if (re8) {
setstack(extra8, 0);
- for (i = 0; i < 32; ++i)
- ovector8_1[i] = -2;
- return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+ return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
-
- for (i = 0; i < 32; ++i)
- ovector8_2[i] = -2;
- return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+ return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
}
#endif
@@ -839,32 +865,30 @@ static int regression_tests(void)
#ifdef SUPPORT_PCRE16
return_value16_1 = -1000;
return_value16_2 = -1000;
+ for (i = 0; i < 32; ++i)
+ ovector16_1[i] = -2;
+ for (i = 0; i < 32; ++i)
+ ovector16_2[i] = -2;
if (re16) {
setstack(extra16, 0);
- if (current->flags & PCRE_UTF8)
+ if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
else
length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
-
- for (i = 0; i < 32; ++i)
- ovector16_1[i] = -2;
- return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+ return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
-
- for (i = 0; i < 32; ++i)
- ovector16_2[i] = -2;
- return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+ return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
}
#endif
- /* If PCRE_BUG is set, just run the test, but do not compare the results.
+ /* If F_DIFF is set, just run the test, but do not compare the results.
Segfaults can still be captured. */
is_succesful = 1;
- if (!(current->flags & PCRE_BUG)) {
+ if (!(current->start_offset & F_DIFF)) {
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- if (utf8 == utf16) {
+ if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
/* All results must be the same. */
if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
@@ -947,7 +971,7 @@ static int regression_tests(void)
}
#endif
- /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
+ /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
printf(".");
fflush(stdout);
current++;
@@ -962,5 +986,4 @@ static int regression_tests(void)
}
}
-
/* End of pcre_jit_test.c */
diff --git a/pcre_newline.c b/pcre_newline.c
index d618b80..2343f73 100644
--- a/pcre_newline.c
+++ b/pcre_newline.c
@@ -84,7 +84,7 @@ if (utf)
GETCHAR(c, ptr);
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
@@ -150,7 +150,7 @@ if (utf)
GETCHAR(c, ptr);
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
diff --git a/pcre_ord2utf8.c b/pcre_ord2utf8.c
index b374987..67cf529 100644
--- a/pcre_ord2utf8.c
+++ b/pcre_ord2utf8.c
@@ -65,7 +65,7 @@ Returns: number of characters placed in the buffer
int
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
register int i, j;
@@ -88,7 +88,7 @@ return i + 1;
#else
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
-(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
#endif
diff --git a/pcre_study.c b/pcre_study.c
index 3f25c3a..493108e 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -323,7 +323,7 @@ for (;;)
/* Check a class for variable quantification */
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
/* Fall through */
@@ -824,7 +824,7 @@ do
case OP_SOM:
case OP_THEN:
case OP_THEN_ARG:
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
#endif
return SSB_FAIL;
@@ -1325,6 +1325,16 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER)
return NULL;
}
+if ((re->flags & PCRE_MODE) == 0)
+ {
+#ifdef COMPILE_PCRE8
+ *errorptr = "argument is compiled in 16 bit mode";
+#else
+ *errorptr = "argument is compiled in 8 bit mode";
+#endif
+ return NULL;
+ }
+
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
@@ -1346,9 +1356,16 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* Set the character tables in the block that is passed around */
tables = re->tables;
+
+#ifdef COMPILE_PCRE8
if (tables == NULL)
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
+#else
+ if (tables == NULL)
+ (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+ (void *)(&tables));
+#endif
compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c
index bbab87f..05d82f9 100644
--- a/pcre_valid_utf8.c
+++ b/pcre_valid_utf8.c
@@ -105,7 +105,7 @@ Returns: = 0 if the string is a valid UTF-8 string
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
if (length < 0)
@@ -288,7 +288,7 @@ for (p = string; length-- > 0; p++)
}
}
-#else /* SUPPORT_UTF8 */
+#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
#endif
diff --git a/pcre_xclass.c b/pcre_xclass.c
index cdb9d07..1c2b65a 100644
--- a/pcre_xclass.c
+++ b/pcre_xclass.c
@@ -64,11 +64,17 @@ Returns: TRUE if character matches, else FALSE
*/
BOOL
-PRIV(xclass)(int c, const pcre_uchar *data)
+PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;
+(void)utf;
+#ifdef COMPILE_PCRE8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
@@ -91,13 +97,30 @@ while ((t = *data++) != XCL_END)
int x, y;
if (t == XCL_SINGLE)
{
- GETCHARINC(x, data);
+#ifdef SUPPORT_UTF
+ if (utf)
+ {
+ GETCHARINC(x, data); /* macro generates multiple statements */
+ }
+ else
+#endif
+ x = *data++;
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
- GETCHARINC(x, data);
- GETCHARINC(y, data);
+#ifdef SUPPORT_UTF
+ if (utf)
+ {
+ GETCHARINC(x, data); /* macro generates multiple statements */
+ GETCHARINC(y, data); /* macro generates multiple statements */
+ }
+ else
+#endif
+ {
+ x = *data++;
+ y = *data++;
+ }
if (c >= x && c <= y) return !negated;
}
diff --git a/pcreposix.c b/pcreposix.c
index 2dc1561..0426e2e 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -401,6 +401,7 @@ switch(rc)
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
case PCRE_ERROR_BADUTF8: return REG_INVARG;
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+ case PCRE_ERROR_BADMODE: return REG_INVARG;
default: return REG_ASSERT;
}
}