summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-22 20:11:31 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-22 20:11:31 +0000
commit1e69e6b9e3629c2e915018d288aa08671c3648f7 (patch)
treece160a805ecd7cf3f3d96327cbd5906f9ee83e96
parentde5eb59c57fd7f6e4e0689664c31811f429ae0f8 (diff)
downloadpcre-1e69e6b9e3629c2e915018d288aa08671c3648f7.tar.gz
More 16-bit fixes, and removal of pcre_info().
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@818 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--Makefile.am6
-rw-r--r--pcre16_info.c45
-rw-r--r--pcre_compile.c5
-rw-r--r--pcre_info.c103
-rw-r--r--pcre_internal.h2
-rw-r--r--pcreposix.c3
-rw-r--r--pcretest.c157
-rw-r--r--testdata/testinput14200
-rw-r--r--testdata/testinput17198
-rw-r--r--testdata/testinput2200
-rw-r--r--testdata/testoutput14212
-rw-r--r--testdata/testoutput17211
-rw-r--r--testdata/testoutput2212
14 files changed, 912 insertions, 645 deletions
diff --git a/ChangeLog b/ChangeLog
index b410d0f..66b2b90 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,9 @@ Version 8.22
2. Fixed a bug in the code for calculating the fixed length of lookbehind
assertions.
+
+3. Removed the function pcre_info(), which has been obsolete and deprecated
+ since it was replaced by pcre_fullinfo() in February 2000.
Version 8.21 12-Dec-2011
diff --git a/Makefile.am b/Makefile.am
index 95c74dd..9137b69 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -35,7 +35,6 @@ dist_html_DATA = \
doc/html/pcre_get_stringtable_entries.html \
doc/html/pcre_get_substring.html \
doc/html/pcre_get_substring_list.html \
- doc/html/pcre_info.html \
doc/html/pcre_jit_stack_alloc.html \
doc/html/pcre_jit_stack_free.html \
doc/html/pcre_maketables.html \
@@ -184,7 +183,6 @@ libpcre_la_SOURCES = \
pcre_fullinfo.c \
pcre_get.c \
pcre_globals.c \
- pcre_info.c \
pcre_internal.h \
pcre_jit_compile.c \
pcre_maketables.c \
@@ -219,7 +217,6 @@ libpcre16_la_SOURCES = \
pcre16_fullinfo.c \
pcre16_get.c \
pcre16_globals.c \
- pcre16_info.c \
pcre16_jit_compile.c \
pcre16_maketables.c \
pcre16_newline.c \
@@ -434,7 +431,7 @@ test: check ;
# nice DLL for Windows use". (It is used by the pcre.dll target.)
DLL_OBJS= pcre_byte_order.o pcre_compile.o pcre_config.o \
pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
- pcre_globals.o pcre_info.o pcre_jit_compile.o pcre_maketables.o \
+ pcre_globals.o pcre_jit_compile.o pcre_maketables.o \
pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
pcre_study.o pcre_tables.o pcre_ucd.o \
pcre_valid_utf8.o pcre_version.o pcre_chartables.o \
@@ -477,7 +474,6 @@ dist_man_MANS = \
doc/pcre_get_stringtable_entries.3 \
doc/pcre_get_substring.3 \
doc/pcre_get_substring_list.3 \
- doc/pcre_info.3 \
doc/pcre_jit_stack_alloc.3 \
doc/pcre_jit_stack_free.3 \
doc/pcre_maketables.3 \
diff --git a/pcre16_info.c b/pcre16_info.c
deleted file mode 100644
index b4b221a..0000000
--- a/pcre16_info.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*************************************************
-* Perl-Compatible Regular Expressions *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
- Written by Philip Hazel
- Copyright (c) 1997-2011 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-/* Generate code with 16 bit character support. */
-#define COMPILE_PCRE16
-
-#include "pcre_info.c"
-
-/* End of pcre16_info.c */
diff --git a/pcre_compile.c b/pcre_compile.c
index e9427ea..74ae41d 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -685,10 +685,9 @@ if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||
newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)
return ERR72;
-newspace = (PUBL(malloc))(newsize);
+newspace = (PUBL(malloc))(newsize * sizeof(pcre_uchar));
if (newspace == NULL) return ERR21;
-
-memcpy(newspace, cd->start_workspace, cd->workspace_size);
+memcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar));
cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace);
if (cd->workspace_size > COMPILE_WORK_SIZE)
(PUBL(free))((void *)cd->start_workspace);
diff --git a/pcre_info.c b/pcre_info.c
deleted file mode 100644
index 498c442..0000000
--- a/pcre_info.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*************************************************
-* Perl-Compatible Regular Expressions *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
- Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-
-/* This module contains the external function pcre_info(), which gives some
-information about a compiled pattern. However, use of this function is now
-deprecated, as it has been superseded by pcre_fullinfo(). */
-
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "pcre_internal.h"
-
-
-/*************************************************
-* (Obsolete) Return info about compiled pattern *
-*************************************************/
-
-/* This is the original "info" function. It picks potentially useful data out
-of the private structure, but its interface was too rigid. It remains for
-backwards compatibility. The public options are passed back in an int - though
-the re->options field has been expanded to a long int, all the public options
-at the low end of it, and so even on 16-bit systems this will still be OK.
-Therefore, I haven't changed the API for pcre_info().
-
-Arguments:
- argument_re points to compiled code
- optptr where to pass back the options
- first_byte where to pass back the first character,
- or -1 if multiline and all branches start ^,
- or -2 otherwise
-
-Returns: number of capturing subpatterns
- or negative values on error
-*/
-
-#ifdef COMPILE_PCRE8
-PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_info(const pcre *argument_re, int *optptr, int *first_char)
-#else
-PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre16_info(const pcre *argument_re, int *optptr, int *first_char)
-#endif
-{
-const real_pcre *re = (const real_pcre *)argument_re;
-if (re == NULL) return PCRE_ERROR_NULL;
-
-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
- return re->magic_number == REVERSED_MAGIC_NUMBER?
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
-if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
-if (first_char != NULL)
- *first_char = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
- ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
-return re->top_bracket;
-}
-
-/* End of pcre_info.c */
diff --git a/pcre_internal.h b/pcre_internal.h
index b437df5..3cb7b4f 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -465,7 +465,7 @@ is automated on Unix systems via the "configure" command. */
#define PUT(a,n,d) \
(a[n] = (d) >> 16), \
- (a[(n)+1] = (d) & 65536)
+ (a[(n)+1] = (d) & 65535)
#define GET(a,n) \
(((a)[n] << 16) | (a)[(n)+1])
diff --git a/pcreposix.c b/pcreposix.c
index d931c63..f21bd54 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -277,7 +277,8 @@ if (preg->re_pcre == NULL)
eint[errorcode] : REG_BADPAT;
}
-preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
+(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
+ &(preg->re_nsub));
return 0;
}
diff --git a/pcretest.c b/pcretest.c
index d0778bb..c85ef35 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -162,8 +162,7 @@ Makefile. */
/* It is also possible, originally for the benefit of a version that was
imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
-without the interface to the DFA matcher (NODFA), and without the doublecheck
-of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
+without the interface to the DFA matcher (NODFA). In fact, we automatically cut
out the UTF8 support if PCRE is built without it. */
#ifndef SUPPORT_UTF8
@@ -188,8 +187,12 @@ use these in the definitions of generic macros. */
#define PCHARSV8(p, len, f) \
(void)pchars((pcre_uint8 *)p, len, f)
+#define SET_PCRE_CALLOUT8(callout) \
+ pcre_callout = callout
+
#define STRLEN8(p) ((int)strlen((char *)p))
+
#define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
re = pcre_compile((char *)pat, options, error, erroffset, tables)
@@ -237,12 +240,12 @@ use these in the definitions of generic macros. */
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
pcre_pattern_to_host_byte_order(re, extra, tables)
+#define PCRE_PRINTINT8(re, outfile, debug_lengths) \
+ pcre_printint(re, outfile, debug_lengths)
+
#define PCRE_STUDY8(extra, re, options, error) \
extra = pcre_study(re, options, error)
-#define SET_PCRE_CALLOUT8(callout) \
- pcre_callout = callout
-
#endif /* SUPPORT_PCRE8 */
/* -----------------------------------------------------------*/
@@ -257,6 +260,10 @@ use these in the definitions of generic macros. */
#define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
+#define SET_PCRE_CALLOUT16(callout) \
+ pcre16_callout = callout
+
+
#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
@@ -307,16 +314,18 @@ use these in the definitions of generic macros. */
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
pcre16_pattern_to_host_byte_order(re, extra, tables)
+#define PCRE_PRINTINT16(re, outfile, debug_lengths) \
+ pcre16_printint(re, outfile, debug_lengths)
+
#define PCRE_STUDY16(extra, re, options, error) \
extra = pcre16_study(re, options, error)
-#define SET_PCRE_CALLOUT16(callout) \
- pcre16_callout = callout
-
#endif /* SUPPORT_PCRE16 */
-/* ----- Both modes are supported; a runtime test is needed ----- */
+/* ----- Both modes are supported; a runtime test is needed, except for
+pcre_config(), and the JIT stack functions, when it doesn't matter which
+version is called. ----- */
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
@@ -334,13 +343,23 @@ use these in the definitions of generic macros. */
else \
PCHARSV8(p, len, f)
+#define SET_PCRE_CALLOUT(callout) \
+ if (use_pcre16) \
+ SET_PCRE_CALLOUT16(callout); \
+ else \
+ SET_PCRE_CALLOUT8(callout)
+
#define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
+#define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
+
#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
if (use_pcre16) \
PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
else \
PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
+
+#define PCRE_CONFIG pcre_config
#define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
namesptr, cbuffer, size) \
@@ -420,23 +439,29 @@ use these in the definitions of generic macros. */
else \
PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
+#define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
+#define PCRE_JIT_STACK_FREE pcre_jit_stack_free
+
+#define PCRE_MAKETABLES \
+ (use_pcre16? pcre16_maketables() : pcre_maketables())
+
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
if (use_pcre16) \
PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
else \
PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
-#define PCRE_STUDY(extra, re, options, error) \
+#define PCRE_PRINTINT(re, outfile, debug_lengths) \
if (use_pcre16) \
- PCRE_STUDY16(extra, re, options, error); \
+ PCRE_PRINTINT16(re, outfile, debug_lengths); \
else \
- PCRE_STUDY8(extra, re, options, error)
+ PCRE_PRINTINT8(re, outfile, debug_lengths)
-#define SET_PCRE_CALLOUT(callout) \
+#define PCRE_STUDY(extra, re, options, error) \
if (use_pcre16) \
- SET_PCRE_CALLOUT16(callout); \
+ PCRE_STUDY16(extra, re, options, error); \
else \
- SET_PCRE_CALLOUT8(callout)
+ PCRE_STUDY8(extra, re, options, error)
/* ----- Only 8-bit mode is supported ----- */
@@ -444,8 +469,11 @@ use these in the definitions of generic macros. */
#define CHAR_SIZE 1
#define PCHARS PCHARS8
#define PCHARSV PCHARSV8
+#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
#define STRLEN STRLEN8
+#define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
#define PCRE_COMPILE PCRE_COMPILE8
+#define PCRE_CONFIG pcre_config
#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
#define PCRE_DFA_EXEC PCRE_DFA_EXEC8
@@ -457,9 +485,12 @@ use these in the definitions of generic macros. */
#define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
#define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
#define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
+#define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
+#define PCRE_JIT_STACK_FREE pcre_jit_stack_free
+#define PCRE_MAKETABLES pcre_maketables()
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
+#define PCRE_PRINTINT PCRE_PRINTINT8
#define PCRE_STUDY PCRE_STUDY8
-#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
/* ----- Only 16-bit mode is supported ----- */
@@ -467,8 +498,11 @@ use these in the definitions of generic macros. */
#define CHAR_SIZE 1
#define PCHARS PCHARS16
#define PCHARSV PCHARSV16
+#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
#define STRLEN STRLEN16
+#define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
#define PCRE_COMPILE PCRE_COMPILE16
+#define PCRE_CONFIG pcre16_config
#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
#define PCRE_DFA_EXEC PCRE_DFA_EXEC16
@@ -480,9 +514,12 @@ use these in the definitions of generic macros. */
#define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
#define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
#define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
+#define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
+#define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
+#define PCRE_MAKETABLES pcre16_maketables()
#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
+#define PCRE_PRINTINT PCRE_PRINTINT16
#define PCRE_STUDY PCRE_STUDY16
-#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
#endif
/* ----- End of mode-specific function call macros ----- */
@@ -2029,10 +2066,15 @@ while (argc > 1 && argv[op][0] == '-')
force_study = 1;
force_study_options = PCRE_STUDY_JIT_COMPILE;
}
+ else if (strcmp(argv[op], "-16") == 0)
+ {
#ifdef SUPPORT_PCRE16
- else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
+ use_pcre16 = 1;
+#else
+ printf("** This version of PCRE was built without 16-bit support\n");
+ exit(1);
#endif
-
+ }
else if (strcmp(argv[op], "-q") == 0) quiet = 1;
else if (strcmp(argv[op], "-b") == 0) debug = 1;
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
@@ -2114,32 +2156,32 @@ are set, either both UTFs are supported or both are not supported. */
printf(" %sUTF-16 support\n", rc? "" : "No ");
#endif
- (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
printf(" %sUnicode properties support\n", rc? "" : "No ");
- (void)pcre_config(PCRE_CONFIG_JIT, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
if (rc)
printf(" Just-in-time compiler support\n");
else
printf(" No just-in-time compiler support\n");
- (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
/* Note that these values are always the ASCII values, even
in EBCDIC environments. CR is 13 and NL is 10. */
printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
(rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
(rc == -2)? "ANYCRLF" :
(rc == -1)? "ANY" : "???");
- (void)pcre_config(PCRE_CONFIG_BSR, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
"all Unicode newlines");
- (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
printf(" Internal link size = %d\n", rc);
- (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
printf(" POSIX malloc threshold = %d\n", rc);
- (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
printf(" Default match limit = %ld\n", lrc);
- (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
printf(" Default recursion depth limit = %ld\n", lrc);
- (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
printf(" Match recursion uses %s\n", rc? "stack" : "heap");
goto EXIT;
}
@@ -2497,7 +2539,7 @@ while (!done)
goto SKIP_DATA;
}
locale_set = 1;
- tables = pcre_maketables();
+ tables = PCRE_MAKETABLES;
pp = ppp;
break;
@@ -2719,16 +2761,7 @@ while (!done)
if (do_debug)
{
fprintf(outfile, "------------------------------------------------------------------\n");
-#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
- if (use_pcre16)
- pcre16_printint(re, outfile, debug_lengths);
- else
- pcre_printint(re, outfile, debug_lengths);
-#elif defined SUPPORT_PCRE8
- pcre_printint(re, outfile, debug_lengths);
-#else
- pcre16_printint(re, outfile, debug_lengths);
-#endif
+ PCRE_PRINTINT(re, outfile, debug_lengths);
}
/* We already have the options in get_options (see above) */
@@ -2736,9 +2769,6 @@ while (!done)
if (do_showinfo)
{
unsigned long int all_options;
-#if !defined NOINFOCHECK
- int old_first_char, old_options, old_count;
-#endif
int count, backrefmax, first_char, need_char, okpartial, jchanged,
hascrorlf;
int nameentrysize, namecount;
@@ -2756,32 +2786,6 @@ while (!done)
new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
- /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
- that it gives the same results as the new function. */
-
-#if !defined NOINFOCHECK
- if (!use_pcre16)
- {
- old_count = pcre_info(re, &old_options, &old_first_char);
- if (count < 0) fprintf(outfile,
- "Error %d from pcre_info()\n", count);
- else
- {
- if (old_count != count) fprintf(outfile,
- "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
- old_count);
-
- if (old_first_char != first_char) fprintf(outfile,
- "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
- first_char, old_first_char);
-
- if (old_options != (int)get_options) fprintf(outfile,
- "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
- get_options, old_options);
- }
- }
-#endif
-
if (size != regex_gotten_store) fprintf(outfile,
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
(int)size, (int)regex_gotten_store);
@@ -3073,7 +3077,9 @@ while (!done)
options = 0;
*copynames = 0;
+ copynames[1] = 0;
*getnames = 0;
+ getnames[1] = 0;
copynamesptr = copynames;
getnamesptr = getnames;
@@ -3370,9 +3376,9 @@ while (!done)
&& (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
&& extra->executable_jit != NULL)
{
- if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
- jit_stack = pcre_jit_stack_alloc(1, n * 1024);
- pcre_assign_jit_stack(extra, jit_callback, jit_stack);
+ if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
+ jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
+ PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
}
continue;
@@ -3750,7 +3756,7 @@ while (!done)
{
if ((copystrings & (1 << i)) != 0)
{
- int rc;
+ int rc;
char copybuffer[256];
PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
copybuffer, sizeof(copybuffer));
@@ -3766,6 +3772,7 @@ while (!done)
}
for (copynamesptr = copynames;
+
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
use_pcre16?
(*(PCRE_SCHAR16*)copynamesptr) != 0 : *copynamesptr != 0;
@@ -3891,7 +3898,7 @@ while (!done)
terminated by CRLF, an advance of one character just passes the \r,
whereas we should prefer the longer newline sequence, as does the code in
pcre_exec(). Fudge the offset value to achieve this. We check for a
- newline setting in the pattern; if none was set, use pcre_config() to
+ newline setting in the pattern; if none was set, use PCRE_CONFIG() to
find the default.
Otherwise, in the case of UTF-8 matching, the advance must be one
@@ -3907,7 +3914,7 @@ while (!done)
if ((obits & PCRE_NEWLINE_BITS) == 0)
{
int d;
- (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
+ (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
/* Note that these values are always the ASCII ones, even in
EBCDIC environments. CR = 13, NL = 10. */
obits = (d == 13)? PCRE_NEWLINE_CR :
@@ -4022,7 +4029,7 @@ while (!done)
}
if (jit_stack != NULL)
{
- pcre_jit_stack_free(jit_stack);
+ PCRE_JIT_STACK_FREE(jit_stack);
jit_stack = NULL;
}
}
diff --git a/testdata/testinput14 b/testdata/testinput14
index 6d39d60..2decd01 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -73,6 +73,9 @@
/\w+A/PU
CDAAAAB
+/\Biss\B/I+P
+ Mississippi
+
/-- End of POSIX tests --/
/a\Cb/
@@ -81,4 +84,201 @@
** Failers (too big char)
A\x{123}B
+/\x{100}/I
+
+/ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional leading comment
+(?: (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # one word, optionally followed by....
+(?:
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
+\(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) | # comments, or...
+
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+# quoted strings
+)*
+< (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # leading <
+(?: @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* , (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+)* # further okay, if led by comma
+: # closing colon
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* )? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address spec
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* > # trailing >
+# name and address
+) (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional trailing comment
+/xSI
+
/-- End of testinput14 --/
diff --git a/testdata/testinput17 b/testdata/testinput17
index 3d1e631..6d4a63d 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -14,4 +14,202 @@
/[^\x{c4}]/DZ
+
+/\x{100}/I
+
+/ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional leading comment
+(?: (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # one word, optionally followed by....
+(?:
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
+\(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) | # comments, or...
+
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+# quoted strings
+)*
+< (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # leading <
+(?: @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* , (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+)* # further okay, if led by comma
+: # closing colon
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* )? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address spec
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* > # trailing >
+# name and address
+) (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional trailing comment
+/xSI
+
/-- End of testinput17 --/
diff --git a/testdata/testinput2 b/testdata/testinput2
index 4cad699..f812afb 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -456,9 +456,6 @@
/\Biss\B/I+
Mississippi
-/\Biss\B/I+P
- Mississippi
-
/iss/IG+
Mississippi
@@ -1497,8 +1494,6 @@
(this)
((this))
-/\x{100}/I
-
/\x{0000ff}/I
/^((?P<A>a1)|(?P<A>a2)b)/I
@@ -2889,201 +2884,6 @@ a random value. /Ix
/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/SI
-/ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # optional leading comment
-(?: (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # initial word
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) )* # further okay, if led by a period
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-# address
-| # or
-(?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # one word, optionally followed by....
-(?:
-[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
-\(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) | # comments, or...
-
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-# quoted strings
-)*
-< (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # leading <
-(?: @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* , (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-)* # further okay, if led by comma
-: # closing colon
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* )? # optional route
-(?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # initial word
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) )* # further okay, if led by a period
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-# address spec
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* > # trailing >
-# name and address
-) (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # optional trailing comment
-/xSI
-
/<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
"(?>.*/)foo"SI
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index f34ac66..b2cda8e 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -126,6 +126,11 @@ No match: POSIX code 17: match failed
CDAAAAB
0: CDA
+/\Biss\B/I+P
+ Mississippi
+ 0: iss
+ 0+ issippi
+
/-- End of POSIX tests --/
/a\Cb/
@@ -140,4 +145,211 @@ No match
** Truncation will probably give the wrong result.
No match
+/\x{100}/I
+Failed: character value in \x{...} sequence is too large at offset 6
+
+/ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional leading comment
+(?: (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # one word, optionally followed by....
+(?:
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
+\(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) | # comments, or...
+
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+# quoted strings
+)*
+< (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # leading <
+(?: @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* , (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+)* # further okay, if led by comma
+: # closing colon
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* )? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address spec
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* > # trailing >
+# name and address
+) (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional trailing comment
+/xSI
+Capturing subpattern count = 0
+Contains explicit CR or LF match
+Options: extended
+No first char
+No need char
+Subject length lower bound = 3
+Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
+ 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
+ f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
+
/-- End of testinput14 --/
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index 99b35c0..e277ce1 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -32,4 +32,215 @@ No options
No first char
No need char
+
+/\x{100}/I
+Capturing subpattern count = 0
+No options
+First char = \x{100}
+No need char
+
+/ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional leading comment
+(?: (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # one word, optionally followed by....
+(?:
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
+\(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) | # comments, or...
+
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+# quoted strings
+)*
+< (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # leading <
+(?: @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* , (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+)* # further okay, if led by comma
+: # closing colon
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* )? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address spec
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* > # trailing >
+# name and address
+) (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional trailing comment
+/xSI
+Capturing subpattern count = 0
+Contains explicit CR or LF match
+Options: extended
+No first char
+No need char
+Subject length lower bound = 3
+Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
+ 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
+ f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
+
/-- End of testinput17 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2f8ba24..43e3ac9 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -1212,11 +1212,6 @@ Need char = 's'
0: iss
0+ issippi
-/\Biss\B/I+P
- Mississippi
- 0: iss
- 0+ issippi
-
/iss/IG+
Capturing subpattern count = 0
No options
@@ -6060,9 +6055,6 @@ No need char
((this))
0: ((this))
-/\x{100}/I
-Failed: character value in \x{...} sequence is too large at offset 6
-
/\x{0000ff}/I
Capturing subpattern count = 0
No options
@@ -10152,210 +10144,6 @@ Need char = ':'
Subject length lower bound = 22
No set of starting bytes
-/ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # optional leading comment
-(?: (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # initial word
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) )* # further okay, if led by a period
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-# address
-| # or
-(?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # one word, optionally followed by....
-(?:
-[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
-\(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) | # comments, or...
-
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-# quoted strings
-)*
-< (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # leading <
-(?: @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* , (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-)* # further okay, if led by comma
-: # closing colon
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* )? # optional route
-(?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) # initial word
-(?: (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-|
-" (?: # opening quote...
-[^\\\x80-\xff\n\015"] # Anything except backslash and quote
-| # or
-\\ [^\x80-\xff] # Escaped something (something != CR)
-)* " # closing quote
-) )* # further okay, if led by a period
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* @ (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # initial subdomain
-(?: #
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* \. # if led by a period...
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* (?:
-[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
-(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
-| \[ # [
-(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
-\] # ]
-) # ...further okay
-)*
-# address spec
-(?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* > # trailing >
-# name and address
-) (?: [\040\t] | \(
-(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
-\) )* # optional trailing comment
-/xSI
-Capturing subpattern count = 0
-Contains explicit CR or LF match
-Options: extended
-No first char
-No need char
-Subject length lower bound = 3
-Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
- 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
-
/<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
Capturing subpattern count = 11
Options: caseless dotall