1 files changed, 102 insertions, 10375 deletions
diff --git a/regcomp.c b/regcomp.c
index de3335ae25..d88109c78b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -31,29 +31,25 @@
  * with the POSIX routines of the same names.
 */
 
-#ifdef PERL_EXT_RE_BUILD
-#include "re_top.h"
-#endif
-
 /*
  * pregcomp and pregexec -- regsub and regerror are not used in perl
  *
- *      Copyright (c) 1986 by University of Toronto.
- *      Written by Henry Spencer.  Not derived from licensed software.
+ *	Copyright (c) 1986 by University of Toronto.
+ *	Written by Henry Spencer.  Not derived from licensed software.
  *
- *      Permission is granted to anyone to use this software for any
- *      purpose on any computer system, and to redistribute it freely,
- *      subject to the following restrictions:
+ *	Permission is granted to anyone to use this software for any
+ *	purpose on any computer system, and to redistribute it freely,
+ *	subject to the following restrictions:
  *
- *      1. The author is not responsible for the consequences of use of
- *              this software, no matter how awful, even if they arise
- *              from defects in it.
+ *	1. The author is not responsible for the consequences of use of
+ *		this software, no matter how awful, even if they arise
+ *		from defects in it.
  *
- *      2. The origin of this software must not be misrepresented, either
- *              by explicit claim or by omission.
+ *	2. The origin of this software must not be misrepresented, either
+ *		by explicit claim or by omission.
  *
- *      3. Altered versions must be plainly marked as such, and must not
- *              be misrepresented as being the original software.
+ *	3. Altered versions must be plainly marked as such, and must not
+ *		be misrepresented as being the original software.
  *
  *
  ****    Alterations to Henry's code are...
@@ -127,11 +123,16 @@
  * access data that we don't want to duplicate.
  */
 
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
 #include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
 #define PERL_IN_REGCOMP_C
 #include "perl.h"
 
-#define REG_COMP_C
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
 EXTERN_C const struct regexp_engine my_reg_engine;
@@ -142,1349 +143,7 @@ EXTERN_C const struct regexp_engine wild_reg_engine;
 
 #include "invlist_inline.h"
 #include "unicode_constants.h"
-
-#ifndef STATIC
-#define STATIC  static
-#endif
-
-/* this is a chain of data about sub patterns we are processing that
-   need to be handled separately/specially in study_chunk. Its so
-   we can simulate recursion without losing state.  */
-struct scan_frame;
-typedef struct scan_frame {
-    regnode *last_regnode;      /* last node to process in this frame */
-    regnode *next_regnode;      /* next node to process when last is reached */
-    U32 prev_recursed_depth;
-    I32 stopparen;              /* what stopparen do we use */
-    bool in_gosub;              /* this or an outer frame is for GOSUB */
-
-    struct scan_frame *this_prev_frame; /* this previous frame */
-    struct scan_frame *prev_frame;      /* previous frame */
-    struct scan_frame *next_frame;      /* next frame */
-} scan_frame;
-
-/* Certain characters are output as a sequence with the first being a
- * backslash. */
-#define isBACKSLASHED_PUNCT(c)  memCHRs("-[]\\^", c)
-
-
-struct RExC_state_t {
-    U32         flags;                  /* RXf_* are we folding, multilining? */
-    U32         pm_flags;               /* PMf_* stuff from the calling PMOP */
-    char        *precomp;               /* uncompiled string. */
-    char        *precomp_end;           /* pointer to end of uncompiled string. */
-    REGEXP      *rx_sv;                 /* The SV that is the regexp. */
-    regexp      *rx;                    /* perl core regexp structure */
-    regexp_internal     *rxi;           /* internal data for regexp object
-                                           pprivate field */
-    char        *start;                 /* Start of input for compile */
-    char        *end;                   /* End of input for compile */
-    char        *parse;                 /* Input-scan pointer. */
-    char        *copy_start;            /* start of copy of input within
-                                           constructed parse string */
-    char        *save_copy_start;       /* Provides one level of saving
-                                           and restoring 'copy_start' */
-    char        *copy_start_in_input;   /* Position in input string
-                                           corresponding to copy_start */
-    SSize_t     whilem_seen;            /* number of WHILEM in this expr */
-    regnode     *emit_start;            /* Start of emitted-code area */
-    regnode_offset emit;                /* Code-emit pointer */
-    I32         naughty;                /* How bad is this pattern? */
-    I32         sawback;                /* Did we see \1, ...? */
-    SSize_t     size;                   /* Number of regnode equivalents in
-                                           pattern */
-    Size_t      sets_depth;              /* Counts recursion depth of already-
-                                           compiled regex set patterns */
-    U32         seen;
-
-    I32      parens_buf_size;           /* #slots malloced open/close_parens */
-    regnode_offset *open_parens;        /* offsets to open parens */
-    regnode_offset *close_parens;       /* offsets to close parens */
-    HV          *paren_names;           /* Paren names */
-
-    /* position beyond 'precomp' of the warning message furthest away from
-     * 'precomp'.  During the parse, no warnings are raised for any problems
-     * earlier in the parse than this position.  This works if warnings are
-     * raised the first time a given spot is parsed, and if only one
-     * independent warning is raised for any given spot */
-    Size_t      latest_warn_offset;
-
-    I32         npar;                   /* Capture buffer count so far in the
-                                           parse, (OPEN) plus one. ("par" 0 is
-                                           the whole pattern)*/
-    I32         total_par;              /* During initial parse, is either 0,
-                                           or -1; the latter indicating a
-                                           reparse is needed.  After that pass,
-                                           it is what 'npar' became after the
-                                           pass.  Hence, it being > 0 indicates
-                                           we are in a reparse situation */
-    I32         nestroot;               /* root parens we are in - used by
-                                           accept */
-    I32         seen_zerolen;
-    regnode     *end_op;                /* END node in program */
-    I32         utf8;           /* whether the pattern is utf8 or not */
-    I32         orig_utf8;      /* whether the pattern was originally in utf8 */
-                                /* XXX use this for future optimisation of case
-                                 * where pattern must be upgraded to utf8. */
-    I32         uni_semantics;  /* If a d charset modifier should use unicode
-                                   rules, even if the pattern is not in
-                                   utf8 */
-
-    I32         recurse_count;          /* Number of recurse regops we have generated */
-    regnode     **recurse;              /* Recurse regops */
-    U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
-                                           through */
-    U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
-    I32         in_lookaround;
-    I32         contains_locale;
-    I32         override_recoding;
-    I32         recode_x_to_native;
-    I32         in_multi_char_class;
-    int         code_index;             /* next code_blocks[] slot */
-    struct reg_code_blocks *code_blocks;/* positions of literal (?{})
-                                            within pattern */
-    SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
-    scan_frame *frame_head;
-    scan_frame *frame_last;
-    U32         frame_count;
-    AV         *warn_text;
-    HV         *unlexed_names;
-    SV          *runtime_code_qr;       /* qr with the runtime code blocks */
-#ifdef DEBUGGING
-    const char  *lastparse;
-    I32         lastnum;
-    U32         study_chunk_recursed_count;
-    AV          *paren_name_list;       /* idx -> name */
-    SV          *mysv1;
-    SV          *mysv2;
-
-#define RExC_lastparse  (pRExC_state->lastparse)
-#define RExC_lastnum    (pRExC_state->lastnum)
-#define RExC_paren_name_list    (pRExC_state->paren_name_list)
-#define RExC_study_chunk_recursed_count    (pRExC_state->study_chunk_recursed_count)
-#define RExC_mysv       (pRExC_state->mysv1)
-#define RExC_mysv1      (pRExC_state->mysv1)
-#define RExC_mysv2      (pRExC_state->mysv2)
-
-#endif
-    bool        seen_d_op;
-    bool        strict;
-    bool        study_started;
-    bool        in_script_run;
-    bool        use_BRANCHJ;
-    bool        sWARN_EXPERIMENTAL__VLB;
-    bool        sWARN_EXPERIMENTAL__REGEX_SETS;
-};
-
-#define RExC_flags      (pRExC_state->flags)
-#define RExC_pm_flags   (pRExC_state->pm_flags)
-#define RExC_precomp    (pRExC_state->precomp)
-#define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
-#define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
-#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
-#define RExC_precomp_end (pRExC_state->precomp_end)
-#define RExC_rx_sv      (pRExC_state->rx_sv)
-#define RExC_rx         (pRExC_state->rx)
-#define RExC_rxi        (pRExC_state->rxi)
-#define RExC_start      (pRExC_state->start)
-#define RExC_end        (pRExC_state->end)
-#define RExC_parse      (pRExC_state->parse)
-#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
-#define RExC_whilem_seen        (pRExC_state->whilem_seen)
-#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
-                                                   under /d from /u ? */
-
-#define RExC_emit       (pRExC_state->emit)
-#define RExC_emit_start (pRExC_state->emit_start)
-#define RExC_sawback    (pRExC_state->sawback)
-#define RExC_seen       (pRExC_state->seen)
-#define RExC_size       (pRExC_state->size)
-#define RExC_maxlen        (pRExC_state->maxlen)
-#define RExC_npar       (pRExC_state->npar)
-#define RExC_total_parens       (pRExC_state->total_par)
-#define RExC_parens_buf_size    (pRExC_state->parens_buf_size)
-#define RExC_nestroot   (pRExC_state->nestroot)
-#define RExC_seen_zerolen       (pRExC_state->seen_zerolen)
-#define RExC_utf8       (pRExC_state->utf8)
-#define RExC_uni_semantics      (pRExC_state->uni_semantics)
-#define RExC_orig_utf8  (pRExC_state->orig_utf8)
-#define RExC_open_parens        (pRExC_state->open_parens)
-#define RExC_close_parens       (pRExC_state->close_parens)
-#define RExC_end_op     (pRExC_state->end_op)
-#define RExC_paren_names        (pRExC_state->paren_names)
-#define RExC_recurse    (pRExC_state->recurse)
-#define RExC_recurse_count      (pRExC_state->recurse_count)
-#define RExC_sets_depth         (pRExC_state->sets_depth)
-#define RExC_study_chunk_recursed        (pRExC_state->study_chunk_recursed)
-#define RExC_study_chunk_recursed_bytes  \
-                                   (pRExC_state->study_chunk_recursed_bytes)
-#define RExC_in_lookaround      (pRExC_state->in_lookaround)
-#define RExC_contains_locale    (pRExC_state->contains_locale)
-#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
-
-#ifdef EBCDIC
-#  define SET_recode_x_to_native(x)                                         \
-                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
-#else
-#  define SET_recode_x_to_native(x) NOOP
-#endif
-
-#define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
-#define RExC_frame_head (pRExC_state->frame_head)
-#define RExC_frame_last (pRExC_state->frame_last)
-#define RExC_frame_count (pRExC_state->frame_count)
-#define RExC_strict (pRExC_state->strict)
-#define RExC_study_started      (pRExC_state->study_started)
-#define RExC_warn_text (pRExC_state->warn_text)
-#define RExC_in_script_run      (pRExC_state->in_script_run)
-#define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
-#define RExC_warned_WARN_EXPERIMENTAL__VLB (pRExC_state->sWARN_EXPERIMENTAL__VLB)
-#define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
-#define RExC_unlexed_names (pRExC_state->unlexed_names)
-
-
-/***********************************************************************/
-/* UTILITY MACROS FOR ADVANCING OR SETTING THE PARSE "CURSOR" RExC_parse
- *
- * All of these macros depend on the above RExC_ accessor macros, which
- * in turns depend on a variable pRExC_state being in scope where they
- * are used. This is the standard regexp parser context variable which is
- * passed into every non-trivial parse function in this file.
- *
- * Note that the UTF macro is itself a wrapper around RExC_utf8, so all
- * of the macros which do not take an argument will operate on the
- * pRExC_state structure *only*.
- *
- * Please do NOT modify RExC_parse without using these macros. In the
- * future these macros will be extended for enhanced debugging and trace
- * output during the parse process.
- */
-
-/* RExC_parse_incf(flag)
- *
- * Increment RExC_parse to point at the next codepoint, while doing
- * the right thing depending on whether we are parsing UTF-8 strings
- * or not. The 'flag' argument determines if content is UTF-8 or not,
- * intended for cases where this is NOT governed by the UTF macro.
- *
- * Use RExC_parse_inc() if UTF-8ness is controlled by the UTF macro.
- *
- * WARNING: Does NOT take into account RExC_end; it is the callers
- * responsibility to make sure there are enough octets left in
- * RExC_parse to ensure that when processing UTF-8 we would not read
- * past the end of the string.
- */
-#define RExC_parse_incf(flag) STMT_START {              \
-    RExC_parse += (flag) ? UTF8SKIP(RExC_parse) : 1;    \
-} STMT_END
-
-/* RExC_parse_inc_safef(flag)
- *
- * Safely increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not and NOT reading past the end of the buffer.
- * The 'flag' argument determines if content is UTF-8 or not,
- * intended for cases where this is NOT governed by the UTF macro.
- *
- * Use RExC_parse_safe() if UTF-8ness is controlled by the UTF macro.
- *
- * NOTE: Will NOT read past RExC_end when content is UTF-8.
- */
-#define RExC_parse_inc_safef(flag) STMT_START {                     \
-    RExC_parse += (flag) ? UTF8_SAFE_SKIP(RExC_parse,RExC_end) : 1; \
-} STMT_END
-
-/* RExC_parse_inc()
- *
- * Increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not.
- *
- * WARNING: Does NOT take into account RExC_end, it is the callers
- * responsibility to make sure there are enough octets left in
- * RExC_parse to ensure that when processing UTF-8 we would not read
- * past the end of the string.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc() RExC_parse_incf(UTF)
-
-/* RExC_parse_inc_safe()
- *
- * Safely increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not and NOT reading past the end of the buffer.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc_safe() RExC_parse_inc_safef(UTF)
-
-/* RExC_parse_inc_utf8()
- *
- * Increment RExC_parse to point at the next utf8 codepoint,
- * assumes content is UTF-8.
- *
- * WARNING: Does NOT take into account RExC_end; it is the callers
- * responsibility to make sure there are enough octets left in RExC_parse
- * to ensure that when processing UTF-8 we would not read past the end
- * of the string.
- */
-#define RExC_parse_inc_utf8() STMT_START {  \
-    RExC_parse += UTF8SKIP(RExC_parse);     \
-} STMT_END
-
-/* RExC_parse_inc_if_char()
- *
- * Increment RExC_parse to point at the next codepoint, if and only
- * if the current parse point is NOT a NULL, while doing the right thing
- * depending on whether we are parsing UTF-8 strings or not.
- *
- * WARNING: Does NOT take into account RExC_end, it is the callers
- * responsibility to make sure there are enough octets left in RExC_parse
- * to ensure that when processing UTF-8 we would not read past the end
- * of the string.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc_if_char() STMT_START {         \
-    RExC_parse += SKIP_IF_CHAR(RExC_parse,RExC_end);  \
-} STMT_END
-
-/* RExC_parse_inc_by(n_octets)
- *
- * Increment the parse cursor by the number of octets specified by
- * the 'n_octets' argument.
- *
- * NOTE: Does NOT check ANY constraints. It is the callers responsibility
- * that this will not move past the end of the string, or leave the
- * pointer in the middle of a UTF-8 sequence.
- *
- * Typically used to advanced past previously analyzed content.
- */
-#define RExC_parse_inc_by(n_octets) STMT_START {  \
-    RExC_parse += (n_octets);                     \
-} STMT_END
-
-/* RExC_parse_set(to_ptr)
- *
- * Sets the RExC_parse pointer to the pointer specified by the 'to'
- * argument. No validation whatsoever is performed on the to pointer.
- */
-#define RExC_parse_set(to_ptr) STMT_START { \
-    RExC_parse = (to_ptr);                  \
-} STMT_END
-
-/**********************************************************************/
-
-/* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
- * a flag to disable back-off on the fixed/floating substrings - if it's
- * a high complexity pattern we assume the benefit of avoiding a full match
- * is worth the cost of checking for the substrings even if they rarely help.
- */
-#define RExC_naughty    (pRExC_state->naughty)
-#define TOO_NAUGHTY (10)
-#define MARK_NAUGHTY(add) \
-    if (RExC_naughty < TOO_NAUGHTY) \
-        RExC_naughty += (add)
-#define MARK_NAUGHTY_EXP(exp, add) \
-    if (RExC_naughty < TOO_NAUGHTY) \
-        RExC_naughty += RExC_naughty / (exp) + (add)
-
-#define isNON_BRACE_QUANTIFIER(c)   ((c) == '*' || (c) == '+' || (c) == '?')
-#define isQUANTIFIER(s,e)  (   isNON_BRACE_QUANTIFIER(*s)                      \
-                            || ((*s) == '{' && regcurly(s, e, NULL)))
-
-/*
- * Flags to be passed up.
- */
-#define HASWIDTH        0x01    /* Known to not match null strings, could match
-                                   non-null ones. */
-#define SIMPLE          0x02    /* Exactly one character wide */
-                                /* (or LNBREAK as a special case) */
-#define POSTPONED       0x08    /* (?1),(?&name), (??{...}) or similar */
-#define TRYAGAIN        0x10    /* Weeded out a declaration. */
-#define RESTART_PARSE   0x20    /* Need to redo the parse */
-#define NEED_UTF8       0x40    /* In conjunction with RESTART_PARSE, need to
-                                   calcuate sizes as UTF-8 */
-
-#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
-
-/* whether trie related optimizations are enabled */
-#if PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION
-#define TRIE_STUDY_OPT
-#define FULL_TRIE_STUDY
-#define TRIE_STCLASS
-#endif
-
-/* About the term "restudy" and the var "restudied" and the defines
- * "SCF_TRIE_RESTUDY" and "SCF_TRIE_DOING_RESTUDY": All of these relate to
- * doing multiple study_chunk() calls over the same set of opcodes for* the
- * purpose of enhanced TRIE optimizations.
- *
- * Specifically, when TRIE_STUDY_OPT is defined, and it is defined in normal
- * builds, (see above), during compilation SCF_TRIE_RESTUDY may be enabled
- * which then causes the Perl_re_op_compile() to then call the optimizer
- * S_study_chunk() a second time to perform additional optimizations,
- * including the aho_corasick startclass optimization.
- * This additional pass will only happen once, which is managed by the
- * 'restudied' variable in Perl_re_op_compile().
- *
- * When this second pass is under way the flags passed into study_chunk() will
- * include SCF_TRIE_DOING_RESTUDY and this flag is and must be cascaded down
- * to any recursive calls to S_study_chunk().
- *
- * IMPORTANT: Any logic in study_chunk() that emits warnings should check that
- * the SCF_TRIE_DOING_RESTUDY flag is NOT set in 'flags', or the warning may
- * be produced twice.
- *
- * See commit 07be1b83a6b2d24b492356181ddf70e1c7917ae3 and
- * 688e03912e3bff2d2419c457d8b0e1bab3eb7112 for more details.
- */
-
-
-#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
-#define PBITVAL(paren) (1 << ((paren) & 7))
-#define PAREN_OFFSET(depth) \
-    (RExC_study_chunk_recursed + (depth) * RExC_study_chunk_recursed_bytes)
-#define PAREN_TEST(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) & PBITVAL(paren))
-#define PAREN_SET(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) |= PBITVAL(paren))
-#define PAREN_UNSET(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) &= ~PBITVAL(paren))
-
-#define REQUIRE_UTF8(flagp) STMT_START {                                   \
-                                     if (!UTF) {                           \
-                                         *flagp = RESTART_PARSE|NEED_UTF8; \
-                                         return 0;                         \
-                                     }                                     \
-                             } STMT_END
-
-/* /u is to be chosen if we are supposed to use Unicode rules, or if the
- * pattern is in UTF-8.  This latter condition is in case the outermost rules
- * are locale.  See GH #17278 */
-#define toUSE_UNI_CHARSET_NOT_DEPENDS (RExC_uni_semantics || UTF)
-
-/* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
- * a flag that indicates we need to override /d with /u as a result of
- * something in the pattern.  It should only be used in regards to calling
- * set_regex_charset() or get_regex_charset() */
-#define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
-    STMT_START {                                                            \
-            if (DEPENDS_SEMANTICS) {                                        \
-                set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
-                RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
-                    /* No need to restart the parse if we haven't seen      \
-                     * anything that differs between /u and /d, and no need \
-                     * to restart immediately if we're going to reparse     \
-                     * anyway to count parens */                            \
-                    *flagp |= RESTART_PARSE;                                \
-                    return restart_retval;                                  \
-                }                                                           \
-            }                                                               \
-    } STMT_END
-
-#define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
-    STMT_START {                                                            \
-                RExC_use_BRANCHJ = 1;                                       \
-                *flagp |= RESTART_PARSE;                                    \
-                return restart_retval;                                      \
-    } STMT_END
-
-/* Until we have completed the parse, we leave RExC_total_parens at 0 or
- * less.  After that, it must always be positive, because the whole re is
- * considered to be surrounded by virtual parens.  Setting it to negative
- * indicates there is some construct that needs to know the actual number of
- * parens to be properly handled.  And that means an extra pass will be
- * required after we've counted them all */
-#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
-#define REQUIRE_PARENS_PASS                                                 \
-    STMT_START {  /* No-op if have completed a pass */                      \
-                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
-    } STMT_END
-#define IN_PARENS_PASS (RExC_total_parens < 0)
-
-
-/* This is used to return failure (zero) early from the calling function if
- * various flags in 'flags' are set.  Two flags always cause a return:
- * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
- * additional flags that should cause a return; 0 if none.  If the return will
- * be done, '*flagp' is first set to be all of the flags that caused the
- * return. */
-#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
-    STMT_START {                                                            \
-            if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
-                *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
-                return 0;                                                   \
-            }                                                               \
-    } STMT_END
-
-#define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
-
-#define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
-                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
-#define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
-                                    if (MUST_RESTART(*(flagp))) return 0
-
-/* This converts the named class defined in regcomp.h to its equivalent class
- * number defined in handy.h. */
-#define namedclass_to_classnum(class)  ((int) ((class) / 2))
-#define classnum_to_namedclass(classnum)  ((classnum) * 2)
-
-#define _invlist_union_complement_2nd(a, b, output) \
-                        _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
-#define _invlist_intersection_complement_2nd(a, b, output) \
-                 _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
-
-/* We add a marker if we are deferring expansion of a property that is both
- * 1) potentiallly user-defined; and
- * 2) could also be an official Unicode property.
- *
- * Without this marker, any deferred expansion can only be for a user-defined
- * one.  This marker shouldn't conflict with any that could be in a legal name,
- * and is appended to its name to indicate this.  There is a string and
- * character form */
-#define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
-#define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
-
-/* What is infinity for optimization purposes */
-#define OPTIMIZE_INFTY  SSize_t_MAX
-
-/* About scan_data_t.
-
-  During optimisation we recurse through the regexp program performing
-  various inplace (keyhole style) optimisations. In addition study_chunk
-  and scan_commit populate this data structure with information about
-  what strings MUST appear in the pattern. We look for the longest
-  string that must appear at a fixed location, and we look for the
-  longest string that may appear at a floating location. So for instance
-  in the pattern:
-
-    /FOO[xX]A.*B[xX]BAR/
-
-  Both 'FOO' and 'A' are fixed strings. Both 'B' and 'BAR' are floating
-  strings (because they follow a .* construct). study_chunk will identify
-  both FOO and BAR as being the longest fixed and floating strings respectively.
-
-  The strings can be composites, for instance
-
-     /(f)(o)(o)/
-
-  will result in a composite fixed substring 'foo'.
-
-  For each string some basic information is maintained:
-
-  - min_offset
-    This is the position the string must appear at, or not before.
-    It also implicitly (when combined with minlenp) tells us how many
-    characters must match before the string we are searching for.
-    Likewise when combined with minlenp and the length of the string it
-    tells us how many characters must appear after the string we have
-    found.
-
-  - max_offset
-    Only used for floating strings. This is the rightmost point that
-    the string can appear at. If set to OPTIMIZE_INFTY it indicates that the
-    string can occur infinitely far to the right.
-    For fixed strings, it is equal to min_offset.
-
-  - minlenp
-    A pointer to the minimum number of characters of the pattern that the
-    string was found inside. This is important as in the case of positive
-    lookahead or positive lookbehind we can have multiple patterns
-    involved. Consider
-
-    /(?=FOO).*F/
-
-    The minimum length of the pattern overall is 3, the minimum length
-    of the lookahead part is 3, but the minimum length of the part that
-    will actually match is 1. So 'FOO's minimum length is 3, but the
-    minimum length for the F is 1. This is important as the minimum length
-    is used to determine offsets in front of and behind the string being
-    looked for.  Since strings can be composites this is the length of the
-    pattern at the time it was committed with a scan_commit. Note that
-    the length is calculated by study_chunk, so that the minimum lengths
-    are not known until the full pattern has been compiled, thus the
-    pointer to the value.
-
-  - lookbehind
-
-    In the case of lookbehind the string being searched for can be
-    offset past the start point of the final matching string.
-    If this value was just blithely removed from the min_offset it would
-    invalidate some of the calculations for how many chars must match
-    before or after (as they are derived from min_offset and minlen and
-    the length of the string being searched for).
-    When the final pattern is compiled and the data is moved from the
-    scan_data_t structure into the regexp structure the information
-    about lookbehind is factored in, with the information that would
-    have been lost precalculated in the end_shift field for the
-    associated string.
-
-  The fields pos_min and pos_delta are used to store the minimum offset
-  and the delta to the maximum offset at the current point in the pattern.
-
-*/
-
-struct scan_data_substrs {
-    SV      *str;       /* longest substring found in pattern */
-    SSize_t min_offset; /* earliest point in string it can appear */
-    SSize_t max_offset; /* latest point in string it can appear */
-    SSize_t *minlenp;   /* pointer to the minlen relevant to the string */
-    SSize_t lookbehind; /* is the pos of the string modified by LB */
-    I32 flags;          /* per substring SF_* and SCF_* flags */
-};
-
-typedef struct scan_data_t {
-    /*I32 len_min;      unused */
-    /*I32 len_delta;    unused */
-    SSize_t pos_min;
-    SSize_t pos_delta;
-    SV *last_found;
-    SSize_t last_end;       /* min value, <0 unless valid. */
-    SSize_t last_start_min;
-    SSize_t last_start_max;
-    U8      cur_is_floating; /* whether the last_* values should be set as
-                              * the next fixed (0) or floating (1)
-                              * substring */
-
-    /* [0] is longest fixed substring so far, [1] is longest float so far */
-    struct scan_data_substrs  substrs[2];
-
-    I32 flags;             /* common SF_* and SCF_* flags */
-    I32 whilem_c;
-    SSize_t *last_closep;
-    regnode **last_close_opp; /* pointer to pointer to last CLOSE regop
-                                 seen. DO NOT DEREFERENCE the regnode
-                                 pointer - the op may have been optimized
-                                 away */
-    regnode_ssc *start_class;
-} scan_data_t;
-
-/*
- * Forward declarations for pregcomp()'s friends.
- */
-
-static const scan_data_t zero_scan_data = {
-    0, 0, NULL, 0, 0, 0, 0,
-    {
-        { NULL, 0, 0, 0, 0, 0 },
-        { NULL, 0, 0, 0, 0, 0 },
-    },
-    0, 0, NULL, NULL, NULL
-};
-
-/* study flags */
-
-#define SF_BEFORE_SEOL          0x0001
-#define SF_BEFORE_MEOL          0x0002
-#define SF_BEFORE_EOL           (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
-
-#define SF_IS_INF               0x0040
-#define SF_HAS_PAR              0x0080
-#define SF_IN_PAR               0x0100
-#define SF_HAS_EVAL             0x0200
-
-
-/* SCF_DO_SUBSTR is the flag that tells the regexp analyzer to track the
- * longest substring in the pattern. When it is not set the optimiser keeps
- * track of position, but does not keep track of the actual strings seen,
- *
- * So for instance /foo/ will be parsed with SCF_DO_SUBSTR being true, but
- * /foo/i will not.
- *
- * Similarly, /foo.*(blah|erm|huh).*fnorble/ will have "foo" and "fnorble"
- * parsed with SCF_DO_SUBSTR on, but while processing the (...) it will be
- * turned off because of the alternation (BRANCH). */
-#define SCF_DO_SUBSTR           0x0400
-
-#define SCF_DO_STCLASS_AND      0x0800
-#define SCF_DO_STCLASS_OR       0x1000
-#define SCF_DO_STCLASS          (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
-#define SCF_WHILEM_VISITED_POS  0x2000
-
-#define SCF_TRIE_RESTUDY        0x4000 /* Need to do restudy in study_chunk()?
-                                          Search for "restudy" in this file
-                                          to find a detailed explanation.*/
-#define SCF_SEEN_ACCEPT         0x8000
-#define SCF_TRIE_DOING_RESTUDY 0x10000 /* Are we in restudy right now?
-                                          Search for "restudy" in this file
-                                          to find a detailed explanation. */
-#define SCF_IN_DEFINE          0x20000
-
-
-
-#define UTF cBOOL(RExC_utf8)
-
-/* The enums for all these are ordered so things work out correctly */
-#define LOC (get_regex_charset(RExC_flags) == REGEX_LOCALE_CHARSET)
-#define DEPENDS_SEMANTICS (get_regex_charset(RExC_flags)                    \
-                                                     == REGEX_DEPENDS_CHARSET)
-#define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
-#define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags)                \
-                                                     >= REGEX_UNICODE_CHARSET)
-#define ASCII_RESTRICTED (get_regex_charset(RExC_flags)                      \
-                                            == REGEX_ASCII_RESTRICTED_CHARSET)
-#define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags)             \
-                                            >= REGEX_ASCII_RESTRICTED_CHARSET)
-#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags)                 \
-                                        == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
-
-#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
-
-/* For programs that want to be strictly Unicode compatible by dying if any
- * attempt is made to match a non-Unicode code point against a Unicode
- * property.  */
-#define ALWAYS_WARN_SUPER  ckDEAD(packWARN(WARN_NON_UNICODE))
-
-#define OOB_NAMEDCLASS          -1
-
-/* There is no code point that is out-of-bounds, so this is problematic.  But
- * its only current use is to initialize a variable that is always set before
- * looked at. */
-#define OOB_UNICODE             0xDEADBEEF
-
-#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-
-
-/* length of regex to show in messages that don't mark a position within */
-#define RegexLengthToShowInErrorMessages 127
-
-/*
- * If MARKER[12] are adjusted, be sure to adjust the constants at the top
- * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
- * op/pragma/warn/regcomp.
- */
-#define MARKER1 "<-- HERE"    /* marker as it appears in the description */
-#define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
-
-#define REPORT_LOCATION " in regex; marked by " MARKER1    \
-                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
-
-/* The code in this file in places uses one level of recursion with parsing
- * rebased to an alternate string constructed by us in memory.  This can take
- * the form of something that is completely different from the input, or
- * something that uses the input as part of the alternate.  In the first case,
- * there should be no possibility of an error, as we are in complete control of
- * the alternate string.  But in the second case we don't completely control
- * the input portion, so there may be errors in that.  Here's an example:
- *      /[abc\x{DF}def]/ui
- * is handled specially because \x{df} folds to a sequence of more than one
- * character: 'ss'.  What is done is to create and parse an alternate string,
- * which looks like this:
- *      /(?:\x{DF}|[abc\x{DF}def])/ui
- * where it uses the input unchanged in the middle of something it constructs,
- * which is a branch for the DF outside the character class, and clustering
- * parens around the whole thing. (It knows enough to skip the DF inside the
- * class while in this substitute parse.) 'abc' and 'def' may have errors that
- * need to be reported.  The general situation looks like this:
- *
- *                                       |<------- identical ------>|
- *              sI                       tI               xI       eI
- * Input:       ---------------------------------------------------------------
- * Constructed:         ---------------------------------------------------
- *                      sC               tC               xC       eC     EC
- *                                       |<------- identical ------>|
- *
- * sI..eI   is the portion of the input pattern we are concerned with here.
- * sC..EC   is the constructed substitute parse string.
- *  sC..tC  is constructed by us
- *  tC..eC  is an exact duplicate of the portion of the input pattern tI..eI.
- *          In the diagram, these are vertically aligned.
- *  eC..EC  is also constructed by us.
- * xC       is the position in the substitute parse string where we found a
- *          problem.
- * xI       is the position in the original pattern corresponding to xC.
- *
- * We want to display a message showing the real input string.  Thus we need to
- * translate from xC to xI.  We know that xC >= tC, since the portion of the
- * string sC..tC has been constructed by us, and so shouldn't have errors.  We
- * get:
- *      xI = tI + (xC - tC)
- *
- * When the substitute parse is constructed, the code needs to set:
- *      RExC_start (sC)
- *      RExC_end (eC)
- *      RExC_copy_start_in_input  (tI)
- *      RExC_copy_start_in_constructed (tC)
- * and restore them when done.
- *
- * During normal processing of the input pattern, both
- * 'RExC_copy_start_in_input' and 'RExC_copy_start_in_constructed' are set to
- * sI, so that xC equals xI.
- */
-
-#define sI              RExC_precomp
-#define eI              RExC_precomp_end
-#define sC              RExC_start
-#define eC              RExC_end
-#define tI              RExC_copy_start_in_input
-#define tC              RExC_copy_start_in_constructed
-#define xI(xC)          (tI + (xC - tC))
-#define xI_offset(xC)   (xI(xC) - sI)
-
-#define REPORT_LOCATION_ARGS(xC)                                            \
-    UTF8fARG(UTF,                                                           \
-             (xI(xC) > eI) /* Don't run off end */                          \
-              ? eI - sI   /* Length before the <--HERE */                   \
-              : ((xI_offset(xC) >= 0)                                       \
-                 ? xI_offset(xC)                                            \
-                 : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
-                                    IVdf " trying to output message for "   \
-                                    " pattern %.*s",                        \
-                                    __FILE__, __LINE__, (IV) xI_offset(xC), \
-                                    ((int) (eC - sC)), sC), 0)),            \
-             sI),         /* The input pattern printed up to the <--HERE */ \
-    UTF8fARG(UTF,                                                           \
-             (xI(xC) > eI) ? 0 : eI - xI(xC), /* Length after <--HERE */    \
-             (xI(xC) > eI) ? eI : xI(xC))     /* pattern after <--HERE */
-
-/* Used to point after bad bytes for an error message, but avoid skipping
- * past a nul byte. */
-#define SKIP_IF_CHAR(s, e) (!*(s) ? 0 : UTF ? UTF8_SAFE_SKIP(s, e) : 1)
-
-/* Set up to clean up after our imminent demise */
-#define PREPARE_TO_DIE                                                      \
-    STMT_START {                                                            \
-        if (RExC_rx_sv)                                                     \
-            SAVEFREESV(RExC_rx_sv);                                         \
-        if (RExC_open_parens)                                               \
-            SAVEFREEPV(RExC_open_parens);                                   \
-        if (RExC_close_parens)                                              \
-            SAVEFREEPV(RExC_close_parens);                                  \
-    } STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
- * arg. Show regex, up to a maximum length. If it's too long, chop and add
- * "...".
- */
-#define _FAIL(code) STMT_START {                                        \
-    const char *ellipses = "";                                          \
-    IV len = RExC_precomp_end - RExC_precomp;                           \
-                                                                        \
-    PREPARE_TO_DIE;                                                     \
-    if (len > RegexLengthToShowInErrorMessages) {                       \
-        /* chop 10 shorter than the max, to ensure meaning of "..." */  \
-        len = RegexLengthToShowInErrorMessages - 10;                    \
-        ellipses = "...";                                               \
-    }                                                                   \
-    code;                                                               \
-} STMT_END
-
-#define FAIL(msg) _FAIL(                            \
-    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",         \
-            msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-#define FAIL2(msg,arg) _FAIL(                       \
-    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
-            arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-#define FAIL3(msg,arg1,arg2) _FAIL(                         \
-    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
-     arg1, arg2, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-/*
- * Simple_vFAIL -- like FAIL, but marks the current location in the scan
- */
-#define Simple_vFAIL(m) STMT_START {                                    \
-    Perl_croak(aTHX_ "%s" REPORT_LOCATION,                              \
-            m, REPORT_LOCATION_ARGS(RExC_parse));                       \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
- */
-#define vFAIL(m) STMT_START {                           \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL(m);                                    \
-} STMT_END
-
-/*
- * Like Simple_vFAIL(), but accepts two arguments.
- */
-#define Simple_vFAIL2(m,a1) STMT_START {                        \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,                \
-                      REPORT_LOCATION_ARGS(RExC_parse));        \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
- */
-#define vFAIL2(m,a1) STMT_START {                       \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL2(m, a1);                               \
-} STMT_END
-
-
-/*
- * Like Simple_vFAIL(), but accepts three arguments.
- */
-#define Simple_vFAIL3(m, a1, a2) STMT_START {                   \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,            \
-            REPORT_LOCATION_ARGS(RExC_parse));                  \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
- */
-#define vFAIL3(m,a1,a2) STMT_START {                    \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL3(m, a1, a2);                           \
-} STMT_END
-
-/*
- * Like Simple_vFAIL(), but accepts four arguments.
- */
-#define Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3,        \
-            REPORT_LOCATION_ARGS(RExC_parse));                  \
-} STMT_END
-
-#define vFAIL4(m,a1,a2,a3) STMT_START {                 \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL4(m, a1, a2, a3);                       \
-} STMT_END
-
-/* A specialized version of vFAIL2 that works with UTF8f */
-#define vFAIL2utf8f(m, a1) STMT_START {             \
-    PREPARE_TO_DIE;                                 \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
-            REPORT_LOCATION_ARGS(RExC_parse));      \
-} STMT_END
-
-#define vFAIL3utf8f(m, a1, a2) STMT_START {             \
-    PREPARE_TO_DIE;                                     \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
-            REPORT_LOCATION_ARGS(RExC_parse));          \
-} STMT_END
-
-/* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
-    STMT_START {                                                            \
-      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
-      RExC_copy_start_in_constructed = NULL;                                \
-    } STMT_END
-#define RESTORE_WARNINGS                                                    \
-    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
-
-/* Since a warning can be generated multiple times as the input is reparsed, we
- * output it the first time we come to that point in the parse, but suppress it
- * otherwise.  'RExC_copy_start_in_constructed' being NULL is a flag to not
- * generate any warnings */
-#define TO_OUTPUT_WARNINGS(loc)                                         \
-  (   RExC_copy_start_in_constructed                                    \
-   && ((xI(loc)) - RExC_precomp) > (Ptrdiff_t) RExC_latest_warn_offset)
-
-/* After we've emitted a warning, we save the position in the input so we don't
- * output it again */
-#define UPDATE_WARNINGS_LOC(loc)                                        \
-    STMT_START {                                                        \
-        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            RExC_latest_warn_offset = MAX(sI, MIN(eI, xI(loc)))         \
-                                                       - RExC_precomp;  \
-        }                                                               \
-    } STMT_END
-
-/* 'warns' is the output of the packWARNx macro used in 'code' */
-#define _WARN_HELPER(loc, warns, code)                                  \
-    STMT_START {                                                        \
-        if (! RExC_copy_start_in_constructed) {                         \
-            Perl_croak( aTHX_ "panic! %s: %d: Tried to warn when none"  \
-                              " expected at '%s'",                      \
-                              __FILE__, __LINE__, loc);                 \
-        }                                                               \
-        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            if (ckDEAD(warns))                                          \
-                PREPARE_TO_DIE;                                         \
-            code;                                                       \
-            UPDATE_WARNINGS_LOC(loc);                                   \
-        }                                                               \
-    } STMT_END
-
-/* m is not necessarily a "literal string", in this macro */
-#define warn_non_literal_string(loc, packed_warn, m)                    \
-    _WARN_HELPER(loc, packed_warn,                                      \
-                      Perl_warner(aTHX_ packed_warn,                    \
-                                       "%s" REPORT_LOCATION,            \
-                                  m, REPORT_LOCATION_ARGS(loc)))
-#define reg_warn_non_literal_string(loc, m)                             \
-                warn_non_literal_string(loc, packWARN(WARN_REGEXP), m)
-
-#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
-    STMT_START {                                                            \
-                char * format;                                              \
-                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
-                Newx(format, format_size, char);                            \
-                my_strlcpy(format, m, format_size);                         \
-                my_strlcat(format, REPORT_LOCATION, format_size);           \
-                SAVEFREEPV(format);                                         \
-                _WARN_HELPER(loc, packwarn,                                 \
-                      Perl_ck_warner(aTHX_ packwarn,                        \
-                                        format,                             \
-                                        a1, REPORT_LOCATION_ARGS(loc)));    \
-    } STMT_END
-
-#define ckWARNreg(loc,m)                                                \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
-                                          m REPORT_LOCATION,            \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN(loc, m)                                                   \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       REPORT_LOCATION_ARGS(loc)))      \
-
-#define vWARN_dep(loc, m)                                               \
-    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
-                      Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),      \
-                                       m REPORT_LOCATION,               \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNdep(loc,m)                                                \
-    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
-                      Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
-                                            m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNregdep(loc,m)                                                 \
-    _WARN_HELPER(loc, packWARN2(WARN_DEPRECATED, WARN_REGEXP),              \
-                      Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED,     \
-                                                      WARN_REGEXP),         \
-                                             m REPORT_LOCATION,             \
-                                             REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN2reg_d(loc,m, a1)                                             \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),         \
-                                            m REPORT_LOCATION,              \
-                                            a1, REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN2reg(loc, m, a1)                                              \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
-                                          m REPORT_LOCATION,                \
-                                          a1, REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN3(loc, m, a1, a2)                                              \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),              \
-                                       m REPORT_LOCATION,                   \
-                                       a1, a2, REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN3reg(loc, m, a1, a2)                                          \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
-                                          m REPORT_LOCATION,                \
-                                          a1, a2,                           \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN4(loc, m, a1, a2, a3)                                      \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       a1, a2, a3,                      \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN4reg(loc, m, a1, a2, a3)                                  \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
-                                          m REPORT_LOCATION,            \
-                                          a1, a2, a3,                   \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN5(loc, m, a1, a2, a3, a4)                                  \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       a1, a2, a3, a4,                  \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNexperimental(loc, class, m)                               \
-    STMT_START {                                                        \
-        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
-            RExC_warned_ ## class = 1;                                  \
-            _WARN_HELPER(loc, packWARN(class),                          \
-                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
-                                            m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)));\
-        }                                                               \
-    } STMT_END
-
-#define ckWARNexperimental_with_arg(loc, class, m, arg)                 \
-    STMT_START {                                                        \
-        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
-            RExC_warned_ ## class = 1;                                  \
-            _WARN_HELPER(loc, packWARN(class),                          \
-                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
-                                       m REPORT_LOCATION,               \
-                                       arg, REPORT_LOCATION_ARGS(loc)));\
-        }                                                               \
-    } STMT_END
-
-/* Convert between a pointer to a node and its offset from the beginning of the
- * program */
-#define REGNODE_p(offset)    (RExC_emit_start + (offset))
-#define REGNODE_OFFSET(node) (__ASSERT_((node) >= RExC_emit_start)      \
-                              (SSize_t) ((node) - RExC_emit_start))
-
-#define ProgLen(ri) ri->proglen
-#define SetProgLen(ri,x) ri->proglen = x
-
-#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
-#define EXPERIMENTAL_INPLACESCAN
-#endif /*PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS*/
-
-STATIC void
-S_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len)
-{
-    PERL_ARGS_ASSERT_POPULATE_BITMAP_FROM_INVLIST;
-
-    /* As the name says.  The zeroth bit corresponds to the code point given by
-     * 'offset' */
-
-    UV start, end;
-
-    Zero(bitmap, len, U8);
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        assert(start >= offset);
-
-        for (UV i = start; i <= end; i++) {
-            UV adjusted = i - offset;
-
-            BITMAP_BYTE(bitmap, adjusted) |= BITMAP_BIT(adjusted);
-        }
-    }
-    invlist_iterfinish(invlist);
-}
-
-STATIC void
-S_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_len, SV ** invlist, const UV offset)
-{
-    PERL_ARGS_ASSERT_POPULATE_INVLIST_FROM_BITMAP;
-
-    /* As the name says.  The zeroth bit corresponds to the code point given by
-     * 'offset' */
-
-    Size_t i;
-
-    for (i = 0; i < bitmap_len; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            int start = i++;
-
-            /* Save a little work by adding a range all at once instead of bit
-             * by bit */
-            while (i < bitmap_len && BITMAP_TEST(bitmap, i)) {
-                i++;
-            }
-
-            *invlist = _add_range_to_invlist(*invlist,
-                                             start + offset,
-                                             i + offset - 1);
-        }
-    }
-}
-
-#ifdef DEBUGGING
-int
-Perl_re_printf(pTHX_ const char *fmt, ...)
-{
-    va_list ap;
-    int result;
-    PerlIO *f= Perl_debug_log;
-    PERL_ARGS_ASSERT_RE_PRINTF;
-    va_start(ap, fmt);
-    result = PerlIO_vprintf(f, fmt, ap);
-    va_end(ap);
-    return result;
-}
-
-int
-Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
-{
-    va_list ap;
-    int result;
-    PerlIO *f= Perl_debug_log;
-    PERL_ARGS_ASSERT_RE_INDENTF;
-    va_start(ap, depth);
-    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
-    result = PerlIO_vprintf(f, fmt, ap);
-    va_end(ap);
-    return result;
-}
-#endif /* DEBUGGING */
-
-#define DEBUG_RExC_seen()                                                   \
-        DEBUG_OPTIMISE_MORE_r({                                             \
-            Perl_re_printf( aTHX_ "RExC_seen: ");                           \
-                                                                            \
-            if (RExC_seen & REG_ZERO_LEN_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                \
-                                                                            \
-            if (RExC_seen & REG_LOOKBEHIND_SEEN)                            \
-                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");              \
-                                                                            \
-            if (RExC_seen & REG_GPOS_SEEN)                                  \
-                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                    \
-                                                                            \
-            if (RExC_seen & REG_RECURSE_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                 \
-                                                                            \
-            if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                    \
-                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");      \
-                                                                            \
-            if (RExC_seen & REG_VERBARG_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                 \
-                                                                            \
-            if (RExC_seen & REG_CUTGROUP_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                \
-                                                                            \
-            if (RExC_seen & REG_RUN_ON_COMMENT_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");          \
-                                                                            \
-            if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");          \
-                                                                            \
-            if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                  \
-                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");    \
-                                                                            \
-            Perl_re_printf( aTHX_ "\n");                                    \
-        });
-
-#define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
-  if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
-
-
-#ifdef DEBUGGING
-static void
-S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
-                                    const char *close_str)
-{
-    if (!flags)
-        return;
-
-    Perl_re_printf( aTHX_  "%s", open_str);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
-    Perl_re_printf( aTHX_  "%s", close_str);
-}
-
-
-static void
-S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
-                    U32 depth, int is_inf,
-                    SSize_t min, SSize_t stopmin, SSize_t delta)
-{
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    DEBUG_OPTIMISE_MORE_r({
-        if (!data)
-            return;
-        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
-            depth,
-            where,
-            min, stopmin, delta,
-            (IV)data->pos_min,
-            (IV)data->pos_delta,
-            (UV)data->flags
-        );
-
-        S_debug_show_study_flags(aTHX_ data->flags," [","]");
-
-        Perl_re_printf( aTHX_
-            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
-            (IV)data->whilem_c,
-            (IV)(data->last_closep ? *((data)->last_closep) : -1),
-            is_inf ? "INF " : ""
-        );
-
-        if (data->last_found) {
-            int i;
-            Perl_re_printf(aTHX_
-                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
-                    SvPVX_const(data->last_found),
-                    (IV)data->last_end,
-                    (IV)data->last_start_min,
-                    (IV)data->last_start_max
-            );
-
-            for (i = 0; i < 2; i++) {
-                Perl_re_printf(aTHX_
-                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
-                    data->cur_is_floating == i ? "*" : "",
-                    i ? "Float" : "Fixed",
-                    SvPVX_const(data->substrs[i].str),
-                    (IV)data->substrs[i].min_offset,
-                    (IV)data->substrs[i].max_offset
-                );
-                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
-            }
-        }
-
-        Perl_re_printf( aTHX_ "\n");
-    });
-}
-
-
-static void
-S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
-                regnode *scan, U32 depth, U32 flags)
-{
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    DEBUG_OPTIMISE_r({
-        regnode *Next;
-
-        if (!scan)
-            return;
-        Next = regnext(scan);
-        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
-        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
-            depth,
-            str,
-            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
-            Next ? (REG_NODE_NUM(Next)) : 0 );
-        S_debug_show_study_flags(aTHX_ flags," [ ","]");
-        Perl_re_printf( aTHX_  "\n");
-   });
-}
-
-
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
-                    S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
-
-#  define DEBUG_PEEP(str, scan, depth, flags)   \
-                    S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
-
-#else
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
-#  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
-#endif
-
+#include "regcomp_internal.h"
 
 /* =========================================================
  * BEGIN edit_distance stuff.
@@ -1631,5313 +290,16 @@ S_edit_distance(const UV* src,
 /* END of edit_distance() stuff
  * ========================================================= */
 
-/* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring or the longest found
-   floating substrings if needed. */
-
-STATIC void
-S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
-                    SSize_t *minlenp, int is_inf)
-{
-    const STRLEN l = CHR_SVLEN(data->last_found);
-    SV * const longest_sv = data->substrs[data->cur_is_floating].str;
-    const STRLEN old_l = CHR_SVLEN(longest_sv);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_SCAN_COMMIT;
-
-    if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
-        const U8 i = data->cur_is_floating;
-        SvSetMagicSV(longest_sv, data->last_found);
-        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
-
-        if (!i) /* fixed */
-            data->substrs[0].max_offset = data->substrs[0].min_offset;
-        else { /* float */
-            data->substrs[1].max_offset =
-                      (is_inf)
-                       ? OPTIMIZE_INFTY
-                       : (l
-                          ? data->last_start_max
-                          : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
-                                         ? OPTIMIZE_INFTY
-                                         : data->pos_min + data->pos_delta));
-        }
-
-        data->substrs[i].flags &= ~SF_BEFORE_EOL;
-        data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
-        data->substrs[i].minlenp = minlenp;
-        data->substrs[i].lookbehind = 0;
-    }
-
-    SvCUR_set(data->last_found, 0);
-    {
-        SV * const sv = data->last_found;
-        if (SvUTF8(sv) && SvMAGICAL(sv)) {
-            MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
-            if (mg)
-                mg->mg_len = 0;
-        }
-    }
-    data->last_end = -1;
-    data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
-}
-
-/* An SSC is just a regnode_charclass_posix with an extra field: the inversion
- * list that describes which code points it matches */
-
-STATIC void
-S_ssc_anything(pTHX_ regnode_ssc *ssc)
-{
-    /* Set the SSC 'ssc' to match an empty string or any code point */
-
-    PERL_ARGS_ASSERT_SSC_ANYTHING;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* mortalize so won't leak */
-    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
-    ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
-}
-
-STATIC int
-S_ssc_is_anything(const regnode_ssc *ssc)
-{
-    /* Returns TRUE if the SSC 'ssc' can match the empty string and any code
-     * point; FALSE otherwise.  Thus, this is used to see if using 'ssc' buys
-     * us anything: if the function returns TRUE, 'ssc' hasn't been restricted
-     * in any way, so there's no point in using it */
-
-    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
-    bool ret;
-
-    PERL_ARGS_ASSERT_SSC_IS_ANYTHING;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    if (! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
-        return FALSE;
-    }
-
-    /* See if the list consists solely of the range 0 - Infinity */
-    invlist_iterinit(ssc->invlist);
-    ret = invlist_iternext(ssc->invlist, &start, &end)
-          && start == 0
-          && end == UV_MAX;
-
-    invlist_iterfinish(ssc->invlist);
-
-    if (ret) {
-        return TRUE;
-    }
-
-    /* If e.g., both \w and \W are set, matches everything */
-    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        int i;
-        for (i = 0; i < ANYOF_POSIXL_MAX; i += 2) {
-            if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i+1)) {
-                return TRUE;
-            }
-        }
-    }
-
-    return FALSE;
-}
-
-STATIC void
-S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
-{
-    /* Initializes the SSC 'ssc'.  This includes setting it to match an empty
-     * string, any code point, or any posix class under locale */
-
-    PERL_ARGS_ASSERT_SSC_INIT;
-
-    Zero(ssc, 1, regnode_ssc);
-    set_ANYOF_SYNTHETIC(ssc);
-    ARG_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
-    ssc_anything(ssc);
-
-    /* If any portion of the regex is to operate under locale rules that aren't
-     * fully known at compile time, initialization includes it.  The reason
-     * this isn't done for all regexes is that the optimizer was written under
-     * the assumption that locale was all-or-nothing.  Given the complexity and
-     * lack of documentation in the optimizer, and that there are inadequate
-     * test cases for locale, many parts of it may not work properly, it is
-     * safest to avoid locale unless necessary. */
-    if (RExC_contains_locale) {
-        ANYOF_POSIXL_SETALL(ssc);
-    }
-    else {
-        ANYOF_POSIXL_ZERO(ssc);
-    }
-}
-
-STATIC int
-S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
-                        const regnode_ssc *ssc)
-{
-    /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
-     * to the list of code points matched, and locale posix classes; hence does
-     * not check its flags) */
-
-    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
-    bool ret;
-
-    PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    invlist_iterinit(ssc->invlist);
-    ret = invlist_iternext(ssc->invlist, &start, &end)
-          && start == 0
-          && end == UV_MAX;
-
-    invlist_iterfinish(ssc->invlist);
-
-    if (! ret) {
-        return FALSE;
-    }
-
-    if (RExC_contains_locale && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
-        return FALSE;
-    }
-
-    return TRUE;
-}
-
-#define INVLIST_INDEX 0
-#define ONLY_LOCALE_MATCHES_INDEX 1
-#define DEFERRED_USER_DEFINED_INDEX 2
-
-STATIC SV*
-S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
-                               const regnode_charclass* const node)
-{
-    /* Returns a mortal inversion list defining which code points are matched
-     * by 'node', which is of ANYOF-ish type .  Handles complementing the
-     * result if appropriate.  If some code points aren't knowable at this
-     * time, the returned list must, and will, contain every code point that is
-     * a possibility. */
-
-    SV* invlist = NULL;
-    SV* only_utf8_locale_invlist = NULL;
-    bool new_node_has_latin1 = FALSE;
-    const U8 flags = (REGNODE_TYPE(OP(node)) == ANYOF)
-                      ? ANYOF_FLAGS(node)
-                      : 0;
-
-    PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
-
-    /* Look at the data structure created by S_set_ANYOF_arg() */
-    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
-        invlist = sv_2mortal(_new_invlist(1));
-        invlist = _add_range_to_invlist(invlist, NUM_ANYOF_CODE_POINTS, UV_MAX);
-    }
-    else if (ANYOF_HAS_AUX(node)) {
-        const U32 n = ARG(node);
-        SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
-        AV * const av = MUTABLE_AV(SvRV(rv));
-        SV **const ary = AvARRAY(av);
-
-        if (av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
-
-            /* Here there are things that won't be known until runtime -- we
-             * have to assume it could be anything */
-            invlist = sv_2mortal(_new_invlist(1));
-            return _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else if (ary[INVLIST_INDEX]) {
-
-            /* Use the node's inversion list */
-            invlist = sv_2mortal(invlist_clone(ary[INVLIST_INDEX], NULL));
-        }
-
-        /* Get the code points valid only under UTF-8 locales */
-        if (   (flags & ANYOFL_FOLD)
-            &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
-        {
-            only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
-        }
-    }
-
-    if (! invlist) {
-        invlist = sv_2mortal(_new_invlist(0));
-    }
-
-    /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
-     * code points, and an inversion list for the others, but if there are code
-     * points that should match only conditionally on the target string being
-     * UTF-8, those are placed in the inversion list, and not the bitmap.
-     * Since there are circumstances under which they could match, they are
-     * included in the SSC.  But if the ANYOF node is to be inverted, we have
-     * to exclude them here, so that when we invert below, the end result
-     * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
-     * have to do this here before we add the unconditionally matched code
-     * points */
-    if (flags & ANYOF_INVERT) {
-        _invlist_intersection_complement_2nd(invlist,
-                                             PL_UpperLatin1,
-                                             &invlist);
-    }
-
-    /* Add in the points from the bit map */
-    if (REGNODE_TYPE(OP(node)) == ANYOF){
-        for (unsigned i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-            if (ANYOF_BITMAP_TEST(node, i)) {
-                unsigned int start = i++;
-
-                for (;    i < NUM_ANYOF_CODE_POINTS
-                       && ANYOF_BITMAP_TEST(node, i); ++i)
-                {
-                    /* empty */
-                }
-                invlist = _add_range_to_invlist(invlist, start, i-1);
-                new_node_has_latin1 = TRUE;
-            }
-        }
-    }
-
-    /* If this can match all upper Latin1 code points, have to add them
-     * as well.  But don't add them if inverting, as when that gets done below,
-     * it would exclude all these characters, including the ones it shouldn't
-     * that were added just above */
-    if ( ! (flags & ANYOF_INVERT)
-        &&  OP(node) == ANYOFD
-        && (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared))
-    {
-        _invlist_union(invlist, PL_UpperLatin1, &invlist);
-    }
-
-    /* Similarly for these */
-    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
-        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
-    }
-
-    if (flags & ANYOF_INVERT) {
-        _invlist_invert(invlist);
-    }
-    else if (flags & ANYOFL_FOLD) {
-        if (new_node_has_latin1) {
-
-            /* These folds are potential in Turkic locales */
-            if (_invlist_contains_cp(invlist, 'i')) {
-                invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-            }
-            if (_invlist_contains_cp(invlist, 'I')) {
-                invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-            }
-
-            /* Under /li, any 0-255 could fold to any other 0-255, depending on
-             * the locale.  We can skip this if there are no 0-255 at all. */
-            _invlist_union(invlist, PL_Latin1, &invlist);
-        }
-        else {
-            if (_invlist_contains_cp(invlist, LATIN_SMALL_LETTER_DOTLESS_I)) {
-                invlist = add_cp_to_invlist(invlist, 'I');
-            }
-            if (_invlist_contains_cp(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
-            {
-                invlist = add_cp_to_invlist(invlist, 'i');
-            }
-        }
-    }
-
-    /* Similarly add the UTF-8 locale possible matches.  These have to be
-     * deferred until after the non-UTF-8 locale ones are taken care of just
-     * above, or it leads to wrong results under ANYOF_INVERT */
-    if (only_utf8_locale_invlist) {
-        _invlist_union_maybe_complement_2nd(invlist,
-                                            only_utf8_locale_invlist,
-                                            flags & ANYOF_INVERT,
-                                            &invlist);
-    }
-
-    return invlist;
-}
-
-/* These two functions currently do the exact same thing */
-#define ssc_init_zero           ssc_init
-
-#define ssc_add_cp(ssc, cp)   ssc_add_range((ssc), (cp), (cp))
-#define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
-
-/* 'AND' a given class with another one.  Can create false positives.  'ssc'
- * should not be inverted. */
-
-STATIC void
-S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
-                const regnode_charclass *and_with)
-{
-    /* Accumulate into SSC 'ssc' its 'AND' with 'and_with', which is either
-     * another SSC or a regular ANYOF class.  Can create false positives. */
-
-    SV* anded_cp_list;
-    U8  and_with_flags = (REGNODE_TYPE(OP(and_with)) == ANYOF)
-                          ? ANYOF_FLAGS(and_with)
-                          : 0;
-    U8  anded_flags;
-
-    PERL_ARGS_ASSERT_SSC_AND;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* 'and_with' is used as-is if it too is an SSC; otherwise have to extract
-     * the code point inversion list and just the relevant flags */
-    if (is_ANYOF_SYNTHETIC(and_with)) {
-        anded_cp_list = ((regnode_ssc *)and_with)->invlist;
-        anded_flags = and_with_flags;
-
-        /* XXX This is a kludge around what appears to be deficiencies in the
-         * optimizer.  If we make S_ssc_anything() add in the WARN_SUPER flag,
-         * there are paths through the optimizer where it doesn't get weeded
-         * out when it should.  And if we don't make some extra provision for
-         * it like the code just below, it doesn't get added when it should.
-         * This solution is to add it only when AND'ing, which is here, and
-         * only when what is being AND'ed is the pristine, original node
-         * matching anything.  Thus it is like adding it to ssc_anything() but
-         * only when the result is to be AND'ed.  Probably the same solution
-         * could be adopted for the same problem we have with /l matching,
-         * which is solved differently in S_ssc_init(), and that would lead to
-         * fewer false positives than that solution has.  But if this solution
-         * creates bugs, the consequences are only that a warning isn't raised
-         * that should be; while the consequences for having /l bugs is
-         * incorrect matches */
-        if (ssc_is_anything((regnode_ssc *)and_with)) {
-            anded_flags |= ANYOF_WARN_SUPER__shared;
-        }
-    }
-    else {
-        anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
-        if (OP(and_with) == ANYOFD) {
-            anded_flags = and_with_flags & ANYOF_COMMON_FLAGS;
-        }
-        else {
-            anded_flags = and_with_flags
-                            & ( ANYOF_COMMON_FLAGS
-                               |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-                               |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
-            if (and_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
-                anded_flags &= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
-            }
-        }
-    }
-
-    ANYOF_FLAGS(ssc) &= anded_flags;
-
-    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
-     * C2 is the list of code points in 'and-with'; P2, its posix classes.
-     * 'and_with' may be inverted.  When not inverted, we have the situation of
-     * computing:
-     *  (C1 | P1) & (C2 | P2)
-     *                     =  (C1 & (C2 | P2)) | (P1 & (C2 | P2))
-     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
-     *                    <=  ((C1 & C2) |       P2)) | ( P1       | (P1 & P2))
-     *                    <=  ((C1 & C2) | P1 | P2)
-     * Alternatively, the last few steps could be:
-     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
-     *                    <=  ((C1 & C2) |  C1      ) | (      C2  | (P1 & P2))
-     *                    <=  (C1 | C2 | (P1 & P2))
-     * We favor the second approach if either P1 or P2 is non-empty.  This is
-     * because these components are a barrier to doing optimizations, as what
-     * they match cannot be known until the moment of matching as they are
-     * dependent on the current locale, 'AND"ing them likely will reduce or
-     * eliminate them.
-     * But we can do better if we know that C1,P1 are in their initial state (a
-     * frequent occurrence), each matching everything:
-     *  (<everything>) & (C2 | P2) =  C2 | P2
-     * Similarly, if C2,P2 are in their initial state (again a frequent
-     * occurrence), the result is a no-op
-     *  (C1 | P1) & (<everything>) =  C1 | P1
-     *
-     * Inverted, we have
-     *  (C1 | P1) & ~(C2 | P2)  =  (C1 | P1) & (~C2 & ~P2)
-     *                          =  (C1 & (~C2 & ~P2)) | (P1 & (~C2 & ~P2))
-     *                         <=  (C1 & ~C2) | (P1 & ~P2)
-     * */
-
-    if ((and_with_flags & ANYOF_INVERT)
-        && ! is_ANYOF_SYNTHETIC(and_with))
-    {
-        unsigned int i;
-
-        ssc_intersection(ssc,
-                         anded_cp_list,
-                         FALSE /* Has already been inverted */
-                         );
-
-        /* If either P1 or P2 is empty, the intersection will be also; can skip
-         * the loop */
-        if (! (and_with_flags & ANYOF_MATCHES_POSIXL)) {
-            ANYOF_POSIXL_ZERO(ssc);
-        }
-        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-
-            /* Note that the Posix class component P from 'and_with' actually
-             * looks like:
-             *      P = Pa | Pb | ... | Pn
-             * where each component is one posix class, such as in [\w\s].
-             * Thus
-             *      ~P = ~(Pa | Pb | ... | Pn)
-             *         = ~Pa & ~Pb & ... & ~Pn
-             *        <= ~Pa | ~Pb | ... | ~Pn
-             * The last is something we can easily calculate, but unfortunately
-             * is likely to have many false positives.  We could do better
-             * in some (but certainly not all) instances if two classes in
-             * P have known relationships.  For example
-             *      :lower: <= :alpha: <= :alnum: <= \w <= :graph: <= :print:
-             * So
-             *      :lower: & :print: = :lower:
-             * And similarly for classes that must be disjoint.  For example,
-             * since \s and \w can have no elements in common based on rules in
-             * the POSIX standard,
-             *      \w & ^\S = nothing
-             * Unfortunately, some vendor locales do not meet the Posix
-             * standard, in particular almost everything by Microsoft.
-             * The loop below just changes e.g., \w into \W and vice versa */
-
-            regnode_charclass_posixl temp;
-            int add = 1;    /* To calculate the index of the complement */
-
-            Zero(&temp, 1, regnode_charclass_posixl);
-            ANYOF_POSIXL_ZERO(&temp);
-            for (i = 0; i < ANYOF_MAX; i++) {
-                assert(i % 2 != 0
-                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)
-                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i + 1));
-
-                if (ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)) {
-                    ANYOF_POSIXL_SET(&temp, i + add);
-                }
-                add = 0 - add; /* 1 goes to -1; -1 goes to 1 */
-            }
-            ANYOF_POSIXL_AND(&temp, ssc);
-
-        } /* else ssc already has no posixes */
-    } /* else: Not inverted.  This routine is a no-op if 'and_with' is an SSC
-         in its initial state */
-    else if (! is_ANYOF_SYNTHETIC(and_with)
-             || ! ssc_is_cp_posixl_init(pRExC_state, (regnode_ssc *)and_with))
-    {
-        /* But if 'ssc' is in its initial state, the result is just 'and_with';
-         * copy it over 'ssc' */
-        if (ssc_is_cp_posixl_init(pRExC_state, ssc)) {
-            if (is_ANYOF_SYNTHETIC(and_with)) {
-                StructCopy(and_with, ssc, regnode_ssc);
-            }
-            else {
-                ssc->invlist = anded_cp_list;
-                ANYOF_POSIXL_ZERO(ssc);
-                if (and_with_flags & ANYOF_MATCHES_POSIXL) {
-                    ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
-                }
-            }
-        }
-        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
-                 || (and_with_flags & ANYOF_MATCHES_POSIXL))
-        {
-            /* One or the other of P1, P2 is non-empty. */
-            if (and_with_flags & ANYOF_MATCHES_POSIXL) {
-                ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
-            }
-            ssc_union(ssc, anded_cp_list, FALSE);
-        }
-        else { /* P1 = P2 = empty */
-            ssc_intersection(ssc, anded_cp_list, FALSE);
-        }
-    }
-}
-
-STATIC void
-S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
-               const regnode_charclass *or_with)
-{
-    /* Accumulate into SSC 'ssc' its 'OR' with 'or_with', which is either
-     * another SSC or a regular ANYOF class.  Can create false positives if
-     * 'or_with' is to be inverted. */
-
-    SV* ored_cp_list;
-    U8 ored_flags;
-    U8  or_with_flags = (REGNODE_TYPE(OP(or_with)) == ANYOF)
-                         ? ANYOF_FLAGS(or_with)
-                         : 0;
-
-    PERL_ARGS_ASSERT_SSC_OR;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* 'or_with' is used as-is if it too is an SSC; otherwise have to extract
-     * the code point inversion list and just the relevant flags */
-    if (is_ANYOF_SYNTHETIC(or_with)) {
-        ored_cp_list = ((regnode_ssc*) or_with)->invlist;
-        ored_flags = or_with_flags;
-    }
-    else {
-        ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
-        ored_flags = or_with_flags & ANYOF_COMMON_FLAGS;
-        if (OP(or_with) != ANYOFD) {
-            ored_flags |=
-                or_with_flags & ( ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-                                 |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
-            if (or_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
-                ored_flags |= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
-            }
-        }
-    }
-
-    ANYOF_FLAGS(ssc) |= ored_flags;
-
-    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
-     * C2 is the list of code points in 'or-with'; P2, its posix classes.
-     * 'or_with' may be inverted.  When not inverted, we have the simple
-     * situation of computing:
-     *  (C1 | P1) | (C2 | P2)  =  (C1 | C2) | (P1 | P2)
-     * If P1|P2 yields a situation with both a class and its complement are
-     * set, like having both \w and \W, this matches all code points, and we
-     * can delete these from the P component of the ssc going forward.  XXX We
-     * might be able to delete all the P components, but I (khw) am not certain
-     * about this, and it is better to be safe.
-     *
-     * Inverted, we have
-     *  (C1 | P1) | ~(C2 | P2)  =  (C1 | P1) | (~C2 & ~P2)
-     *                         <=  (C1 | P1) | ~C2
-     *                         <=  (C1 | ~C2) | P1
-     * (which results in actually simpler code than the non-inverted case)
-     * */
-
-    if ((or_with_flags & ANYOF_INVERT)
-        && ! is_ANYOF_SYNTHETIC(or_with))
-    {
-        /* We ignore P2, leaving P1 going forward */
-    }   /* else  Not inverted */
-    else if (or_with_flags & ANYOF_MATCHES_POSIXL) {
-        ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
-        if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-            unsigned int i;
-            for (i = 0; i < ANYOF_MAX; i += 2) {
-                if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i + 1))
-                {
-                    ssc_match_all_cp(ssc);
-                    ANYOF_POSIXL_CLEAR(ssc, i);
-                    ANYOF_POSIXL_CLEAR(ssc, i+1);
-                }
-            }
-        }
-    }
-
-    ssc_union(ssc,
-              ored_cp_list,
-              FALSE /* Already has been inverted */
-              );
-}
-
-STATIC void
-S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
-{
-    PERL_ARGS_ASSERT_SSC_UNION;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    _invlist_union_maybe_complement_2nd(ssc->invlist,
-                                        invlist,
-                                        invert2nd,
-                                        &ssc->invlist);
-}
-
-STATIC void
-S_ssc_intersection(pTHX_ regnode_ssc *ssc,
-                         SV* const invlist,
-                         const bool invert2nd)
-{
-    PERL_ARGS_ASSERT_SSC_INTERSECTION;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    _invlist_intersection_maybe_complement_2nd(ssc->invlist,
-                                               invlist,
-                                               invert2nd,
-                                               &ssc->invlist);
-}
-
-STATIC void
-S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
-{
-    PERL_ARGS_ASSERT_SSC_ADD_RANGE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
-}
-
-STATIC void
-S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
-{
-    /* AND just the single code point 'cp' into the SSC 'ssc' */
-
-    SV* cp_list = _new_invlist(2);
-
-    PERL_ARGS_ASSERT_SSC_CP_AND;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    cp_list = add_cp_to_invlist(cp_list, cp);
-    ssc_intersection(ssc, cp_list,
-                     FALSE /* Not inverted */
-                     );
-    SvREFCNT_dec_NN(cp_list);
-}
-
-STATIC void
-S_ssc_clear_locale(regnode_ssc *ssc)
-{
-    /* Set the SSC 'ssc' to not match any locale things */
-    PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    ANYOF_POSIXL_ZERO(ssc);
-    ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
-}
-
-STATIC bool
-S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
-{
-    /* The synthetic start class is used to hopefully quickly winnow down
-     * places where a pattern could start a match in the target string.  If it
-     * doesn't really narrow things down that much, there isn't much point to
-     * having the overhead of using it.  This function uses some very crude
-     * heuristics to decide if to use the ssc or not.
-     *
-     * It returns TRUE if 'ssc' rules out more than half what it considers to
-     * be the "likely" possible matches, but of course it doesn't know what the
-     * actual things being matched are going to be; these are only guesses
-     *
-     * For /l matches, it assumes that the only likely matches are going to be
-     *      in the 0-255 range, uniformly distributed, so half of that is 127
-     * For /a and /d matches, it assumes that the likely matches will be just
-     *      the ASCII range, so half of that is 63
-     * For /u and there isn't anything matching above the Latin1 range, it
-     *      assumes that that is the only range likely to be matched, and uses
-     *      half that as the cut-off: 127.  If anything matches above Latin1,
-     *      it assumes that all of Unicode could match (uniformly), except for
-     *      non-Unicode code points and things in the General Category "Other"
-     *      (unassigned, private use, surrogates, controls and formats).  This
-     *      is a much large number. */
-
-    U32 count = 0;      /* Running total of number of code points matched by
-                           'ssc' */
-    UV start, end;      /* Start and end points of current range in inversion
-                           XXX outdated.  UTF-8 locales are common, what about invert? list */
-    const U32 max_code_points = (LOC)
-                                ?  256
-                                : ((  ! UNI_SEMANTICS
-                                    ||  invlist_highest(ssc->invlist) < 256)
-                                  ? 128
-                                  : NON_OTHER_COUNT);
-    const U32 max_match = max_code_points / 2;
-
-    PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
-
-    invlist_iterinit(ssc->invlist);
-    while (invlist_iternext(ssc->invlist, &start, &end)) {
-        if (start >= max_code_points) {
-            break;
-        }
-        end = MIN(end, max_code_points - 1);
-        count += end - start + 1;
-        if (count >= max_match) {
-            invlist_iterfinish(ssc->invlist);
-            return FALSE;
-        }
-    }
-
-    return TRUE;
-}
-
-
-STATIC void
-S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
-{
-    /* The inversion list in the SSC is marked mortal; now we need a more
-     * permanent copy, which is stored the same way that is done in a regular
-     * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
-     * map */
-
-    SV* invlist = invlist_clone(ssc->invlist, NULL);
-
-    PERL_ARGS_ASSERT_SSC_FINALIZE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* The code in this file assumes that all but these flags aren't relevant
-     * to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
-     * by the time we reach here */
-    assert(! (ANYOF_FLAGS(ssc)
-        & ~( ANYOF_COMMON_FLAGS
-            |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-            |ANYOF_HAS_EXTRA_RUNTIME_MATCHES)));
-
-    populate_anyof_bitmap_from_invlist( (regnode *) ssc, &invlist);
-
-    set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
-    SvREFCNT_dec(invlist);
-
-    /* Make sure is clone-safe */
-    ssc->invlist = NULL;
-
-    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
-        OP(ssc) = ANYOFPOSIXL;
-    }
-    else if (RExC_contains_locale) {
-        OP(ssc) = ANYOFL;
-    }
-
-    assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
-}
-
-#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
-#define TRIE_LIST_CUR(state)  ( TRIE_LIST_ITEM( state, 0 ).forid )
-#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
-#define TRIE_LIST_USED(idx)  ( trie->states[state].trans.list         \
-                               ? (TRIE_LIST_CUR( idx ) - 1)           \
-                               : 0 )
-
-
-#ifdef DEBUGGING
-/*
-   dump_trie(trie,widecharmap,revcharmap)
-   dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
-   dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
-
-   These routines dump out a trie in a somewhat readable format.
-   The _interim_ variants are used for debugging the interim
-   tables that are used to generate the final compressed
-   representation which is what dump_trie expects.
-
-   Part of the reason for their existence is to provide a form
-   of documentation as to how the different representations function.
-
-*/
-
-/*
-  Dumps the final compressed table form of the trie to Perl_debug_log.
-  Used for debugging make_trie().
-*/
-
-STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
-            AV *revcharmap, U32 depth)
-{
-    U32 state;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    U16 word;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE;
-
-    Perl_re_indentf( aTHX_  "Char : %-6s%-6s%-4s ",
-        depth+1, "Match","Base","Ofs" );
-
-    for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
-        SV ** const tmp = av_fetch_simple( revcharmap, state, 0);
-        if ( tmp ) {
-            Perl_re_printf( aTHX_  "%*s",
-                colwidth,
-                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
-                            PL_colors[0], PL_colors[1],
-                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                            PERL_PV_ESCAPE_FIRSTCHAR
-                )
-            );
-        }
-    }
-    Perl_re_printf( aTHX_  "\n");
-    Perl_re_indentf( aTHX_ "State|-----------------------", depth+1);
-
-    for( state = 0 ; state < trie->uniquecharcount ; state++ )
-        Perl_re_printf( aTHX_  "%.*s", colwidth, "--------");
-    Perl_re_printf( aTHX_  "\n");
-
-    for( state = 1 ; state < trie->statecount ; state++ ) {
-        const U32 base = trie->states[ state ].trans.base;
-
-        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
-
-        if ( trie->states[ state ].wordnum ) {
-            Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
-        } else {
-            Perl_re_printf( aTHX_  "%6s", "" );
-        }
-
-        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
-
-        if ( base ) {
-            U32 ofs = 0;
-
-            while( ( base + ofs  < trie->uniquecharcount ) ||
-                   ( base + ofs - trie->uniquecharcount < trie->lasttrans
-                     && trie->trans[ base + ofs - trie->uniquecharcount ].check
-                                                                    != state))
-                    ofs++;
-
-            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
-
-            for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
-                if ( ( base + ofs >= trie->uniquecharcount )
-                        && ( base + ofs - trie->uniquecharcount
-                                                        < trie->lasttrans )
-                        && trie->trans[ base + ofs
-                                    - trie->uniquecharcount ].check == state )
-                {
-                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
-                    (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
-                   );
-                } else {
-                    Perl_re_printf( aTHX_  "%*s", colwidth,"   ." );
-                }
-            }
-
-            Perl_re_printf( aTHX_  "]");
-
-        }
-        Perl_re_printf( aTHX_  "\n" );
-    }
-    Perl_re_indentf( aTHX_  "word_info N:(prev,len)=",
-                                depth);
-    for (word=1; word <= trie->wordcount; word++) {
-        Perl_re_printf( aTHX_  " %d:(%d,%d)",
-            (int)word, (int)(trie->wordinfo[word].prev),
-            (int)(trie->wordinfo[word].len));
-    }
-    Perl_re_printf( aTHX_  "\n" );
-}
-/*
-  Dumps a fully constructed but uncompressed trie in list form.
-  List tries normally only are used for construction when the number of
-  possible chars (trie->uniquecharcount) is very high.
-  Used for debugging make_trie().
-*/
-STATIC void
-S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
-                         HV *widecharmap, AV *revcharmap, U32 next_alloc,
-                         U32 depth)
-{
-    U32 state;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
-
-    /* print out the table precompression.  */
-    Perl_re_indentf( aTHX_  "State :Word | Transition Data\n",
-            depth+1 );
-    Perl_re_indentf( aTHX_  "%s",
-            depth+1, "------:-----+-----------------\n" );
-
-    for( state=1 ; state < next_alloc ; state ++ ) {
-        U16 charid;
-
-        Perl_re_indentf( aTHX_  " %4" UVXf " :",
-            depth+1, (UV)state  );
-        if ( ! trie->states[ state ].wordnum ) {
-            Perl_re_printf( aTHX_  "%5s| ","");
-        } else {
-            Perl_re_printf( aTHX_  "W%4x| ",
-                trie->states[ state ].wordnum
-            );
-        }
-        for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
-            SV ** const tmp = av_fetch_simple( revcharmap,
-                                        TRIE_LIST_ITEM(state, charid).forid, 0);
-            if ( tmp ) {
-                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
-                    colwidth,
-                    pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
-                              colwidth,
-                              PL_colors[0], PL_colors[1],
-                              (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
-                              | PERL_PV_ESCAPE_FIRSTCHAR
-                    ) ,
-                    TRIE_LIST_ITEM(state, charid).forid,
-                    (UV)TRIE_LIST_ITEM(state, charid).newstate
-                );
-                if (!(charid % 10))
-                    Perl_re_printf( aTHX_  "\n%*s| ",
-                        (int)((depth * 2) + 14), "");
-            }
-        }
-        Perl_re_printf( aTHX_  "\n");
-    }
-}
-
-/*
-  Dumps a fully constructed but uncompressed trie in table form.
-  This is the normal DFA style state transition table, with a few
-  twists to facilitate compression later.
-  Used for debugging make_trie().
-*/
-STATIC void
-S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
-                          HV *widecharmap, AV *revcharmap, U32 next_alloc,
-                          U32 depth)
-{
-    U32 state;
-    U16 charid;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
-
-    /*
-       print out the table precompression so that we can do a visual check
-       that they are identical.
-     */
-
-    Perl_re_indentf( aTHX_  "Char : ", depth+1 );
-
-    for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
-        SV ** const tmp = av_fetch_simple( revcharmap, charid, 0);
-        if ( tmp ) {
-            Perl_re_printf( aTHX_  "%*s",
-                colwidth,
-                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
-                            PL_colors[0], PL_colors[1],
-                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                            PERL_PV_ESCAPE_FIRSTCHAR
-                )
-            );
-        }
-    }
-
-    Perl_re_printf( aTHX_ "\n");
-    Perl_re_indentf( aTHX_  "State+-", depth+1 );
-
-    for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
-        Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
-    }
-
-    Perl_re_printf( aTHX_  "\n" );
-
-    for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
-
-        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
-            depth+1,
-            (UV)TRIE_NODENUM( state ) );
-
-        for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
-            UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
-            if (v)
-                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
-            else
-                Perl_re_printf( aTHX_  "%*s", colwidth, "." );
-        }
-        if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
-            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
-                                            (UV)trie->trans[ state ].check );
-        } else {
-            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
-                                            (UV)trie->trans[ state ].check,
-            trie->states[ TRIE_NODENUM( state ) ].wordnum );
-        }
-    }
-}
-
-#endif
-
-
-/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
-  startbranch: the first branch in the whole branch sequence
-  first      : start branch of sequence of branch-exact nodes.
-               May be the same as startbranch
-  last       : Thing following the last branch.
-               May be the same as tail.
-  tail       : item following the branch sequence
-  count      : words in the sequence
-  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/
-  depth      : indent depth
-
-Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
-
-A trie is an N'ary tree where the branches are determined by digital
-decomposition of the key. IE, at the root node you look up the 1st character and
-follow that branch repeat until you find the end of the branches. Nodes can be
-marked as "accepting" meaning they represent a complete word. Eg:
-
-  /he|she|his|hers/
-
-would convert into the following structure. Numbers represent states, letters
-following numbers represent valid transitions on the letter from that state, if
-the number is in square brackets it represents an accepting state, otherwise it
-will be in parenthesis.
-
-      +-h->+-e->[3]-+-r->(8)-+-s->[9]
-      |    |
-      |   (2)
-      |    |
-     (1)   +-i->(6)-+-s->[7]
-      |
-      +-s->(3)-+-h->(4)-+-e->[5]
-
-      Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
-
-This shows that when matching against the string 'hers' we will begin at state 1
-read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
-then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
-is also accepting. Thus we know that we can match both 'he' and 'hers' with a
-single traverse. We store a mapping from accepting to state to which word was
-matched, and then when we have multiple possibilities we try to complete the
-rest of the regex in the order in which they occurred in the alternation.
-
-The only prior NFA like behaviour that would be changed by the TRIE support is
-the silent ignoring of duplicate alternations which are of the form:
-
- / (DUPE|DUPE) X? (?{ ... }) Y /x
-
-Thus EVAL blocks following a trie may be called a different number of times with
-and without the optimisation. With the optimisations dupes will be silently
-ignored. This inconsistent behaviour of EVAL type nodes is well established as
-the following demonstrates:
-
- 'words'=~/(word|word|word)(?{ print $1 })[xyz]/
-
-which prints out 'word' three times, but
-
- 'words'=~/(word|word|word)(?{ print $1 })S/
-
-which doesnt print it out at all. This is due to other optimisations kicking in.
-
-Example of what happens on a structural level:
-
-The regexp /(ac|ad|ab)+/ will produce the following debug output:
-
-   1: CURLYM[1] {1,32767}(18)
-   5:   BRANCH(8)
-   6:     EXACT <ac>(16)
-   8:   BRANCH(11)
-   9:     EXACT <ad>(16)
-  11:   BRANCH(14)
-  12:     EXACT <ab>(16)
-  16:   SUCCEED(0)
-  17:   NOTHING(18)
-  18: END(0)
-
-This would be optimizable with startbranch=5, first=5, last=16, tail=16
-and should turn into:
-
-   1: CURLYM[1] {1,32767}(18)
-   5:   TRIE(16)
-        [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
-          <ac>
-          <ad>
-          <ab>
-  16:   SUCCEED(0)
-  17:   NOTHING(18)
-  18: END(0)
-
-Cases where tail != last would be like /(?foo|bar)baz/:
-
-   1: BRANCH(4)
-   2:   EXACT <foo>(8)
-   4: BRANCH(7)
-   5:   EXACT <bar>(8)
-   7: TAIL(8)
-   8: EXACT <baz>(10)
-  10: END(0)
-
-which would be optimizable with startbranch=1, first=1, last=7, tail=8
-and would end up looking like:
-
-    1: TRIE(8)
-      [Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
-        <foo>
-        <bar>
-   7: TAIL(8)
-   8: EXACT <baz>(10)
-  10: END(0)
-
-    d = uvchr_to_utf8_flags(d, uv, 0);
-
-is the recommended Unicode-aware way of saying
-
-    *(d++) = uv;
-*/
-
-#define TRIE_STORE_REVCHAR(val)                                            \
-    STMT_START {                                                           \
-        if (UTF) {                                                         \
-            SV *zlopp = newSV(UTF8_MAXBYTES);                              \
-            unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
-            *kapow = '\0';                                                 \
-            SvCUR_set(zlopp, kapow - flrbbbbb);                            \
-            SvPOK_on(zlopp);                                               \
-            SvUTF8_on(zlopp);                                              \
-            av_push_simple(revcharmap, zlopp);                                     \
-        } else {                                                           \
-            char ooooff = (char)val;                                           \
-            av_push_simple(revcharmap, newSVpvn(&ooooff, 1));                      \
-        }                                                                  \
-        } STMT_END
-
-/* This gets the next character from the input, folding it if not already
- * folded. */
-#define TRIE_READ_CHAR STMT_START {                                           \
-    wordlen++;                                                                \
-    if ( UTF ) {                                                              \
-        /* if it is UTF then it is either already folded, or does not need    \
-         * folding */                                                         \
-        uvc = valid_utf8_to_uvchr( (const U8*) uc, &len);                     \
-    }                                                                         \
-    else if (folder == PL_fold_latin1) {                                      \
-        /* This folder implies Unicode rules, which in the range expressible  \
-         *  by not UTF is the lower case, with the two exceptions, one of     \
-         *  which should have been taken care of before calling this */       \
-        assert(*uc != LATIN_SMALL_LETTER_SHARP_S);                            \
-        uvc = toLOWER_L1(*uc);                                                \
-        if (UNLIKELY(uvc == MICRO_SIGN)) uvc = GREEK_SMALL_LETTER_MU;         \
-        len = 1;                                                              \
-    } else {                                                                  \
-        /* raw data, will be folded later if needed */                        \
-        uvc = (U32)*uc;                                                       \
-        len = 1;                                                              \
-    }                                                                         \
-} STMT_END
-
-
-
-#define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
-    if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
-        U32 ging = TRIE_LIST_LEN( state ) * 2;                  \
-        Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
-        TRIE_LIST_LEN( state ) = ging;                          \
-    }                                                           \
-    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
-    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
-    TRIE_LIST_CUR( state )++;                                   \
-} STMT_END
-
-#define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newx( trie->states[ state ].trans.list,                     \
-        4, reg_trie_trans_le );                                 \
-     TRIE_LIST_CUR( state ) = 1;                                \
-     TRIE_LIST_LEN( state ) = 4;                                \
-} STMT_END
-
-#define TRIE_HANDLE_WORD(state) STMT_START {                    \
-    U16 dupe= trie->states[ state ].wordnum;                    \
-    regnode * const noper_next = regnext( noper );              \
-                                                                \
-    DEBUG_r({                                                   \
-        /* store the word for dumping */                        \
-        SV* tmp;                                                \
-        if (OP(noper) != NOTHING)                               \
-            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);    \
-        else                                                    \
-            tmp = newSVpvn_utf8( "", 0, UTF );                  \
-        av_push_simple( trie_words, tmp );                             \
-    });                                                         \
-                                                                \
-    curword++;                                                  \
-    trie->wordinfo[curword].prev   = 0;                         \
-    trie->wordinfo[curword].len    = wordlen;                   \
-    trie->wordinfo[curword].accept = state;                     \
-                                                                \
-    if ( noper_next < tail ) {                                  \
-        if (!trie->jump)                                        \
-            trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, \
-                                                 sizeof(U16) ); \
-        trie->jump[curword] = (U16)(noper_next - convert);      \
-        if (!jumper)                                            \
-            jumper = noper_next;                                \
-        if (!nextbranch)                                        \
-            nextbranch= regnext(cur);                           \
-    }                                                           \
-                                                                \
-    if ( dupe ) {                                               \
-        /* It's a dupe. Pre-insert into the wordinfo[].prev   */\
-        /* chain, so that when the bits of chain are later    */\
-        /* linked together, the dups appear in the chain      */\
-        trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
-        trie->wordinfo[dupe].prev = curword;                    \
-    } else {                                                    \
-        /* we haven't inserted this word yet.                */ \
-        trie->states[ state ].wordnum = curword;                \
-    }                                                           \
-} STMT_END
-
-
-#define TRIE_TRANS_STATE(state,base,ucharcount,charid,special)          \
-     ( ( base + charid >=  ucharcount                                   \
-         && base + charid < ubound                                      \
-         && state == trie->trans[ base - ucharcount + charid ].check    \
-         && trie->trans[ base - ucharcount + charid ].next )            \
-           ? trie->trans[ base - ucharcount + charid ].next             \
-           : ( state==1 ? special : 0 )                                 \
-      )
-
-#define TRIE_BITMAP_SET_FOLDED(trie, uvc, folder)           \
-STMT_START {                                                \
-    TRIE_BITMAP_SET(trie, uvc);                             \
-    /* store the folded codepoint */                        \
-    if ( folder )                                           \
-        TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);           \
-                                                            \
-    if ( !UTF ) {                                           \
-        /* store first byte of utf8 representation of */    \
-        /* variant codepoints */                            \
-        if (! UVCHR_IS_INVARIANT(uvc)) {                    \
-            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));   \
-        }                                                   \
-    }                                                       \
-} STMT_END
-#define MADE_TRIE       1
-#define MADE_JUMP_TRIE  2
-#define MADE_EXACT_TRIE 4
-
-STATIC I32
-S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
-                  regnode *first, regnode *last, regnode *tail,
-                  U32 word_count, U32 flags, U32 depth)
-{
-    /* first pass, loop through and scan words */
-    reg_trie_data *trie;
-    HV *widecharmap = NULL;
-    AV *revcharmap = newAV();
-    regnode *cur;
-    STRLEN len = 0;
-    UV uvc = 0;
-    U16 curword = 0;
-    U32 next_alloc = 0;
-    regnode *jumper = NULL;
-    regnode *nextbranch = NULL;
-    regnode *convert = NULL;
-    U32 *prev_states; /* temp array mapping each state to previous one */
-    /* we just use folder as a flag in utf8 */
-    const U8 * folder = NULL;
-
-    /* in the below add_data call we are storing either 'tu' or 'tuaa'
-     * which stands for one trie structure, one hash, optionally followed
-     * by two arrays */
-#ifdef DEBUGGING
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuaa"));
-    AV *trie_words = NULL;
-    /* along with revcharmap, this only used during construction but both are
-     * useful during debugging so we store them in the struct when debugging.
-     */
-#else
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tu"));
-    STRLEN trie_charcount=0;
-#endif
-    SV *re_trie_maxbuff;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_MAKE_TRIE;
-#ifndef DEBUGGING
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    switch (flags) {
-        case EXACT: case EXACT_REQ8: case EXACTL: break;
-        case EXACTFAA:
-        case EXACTFUP:
-        case EXACTFU:
-        case EXACTFLU8: folder = PL_fold_latin1; break;
-        case EXACTF:  folder = PL_fold; break;
-        default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, REGNODE_NAME(flags) );
-    }
-
-    trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
-    trie->refcount = 1;
-    trie->startstate = 1;
-    trie->wordcount = word_count;
-    RExC_rxi->data->data[ data_slot ] = (void*)trie;
-    trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
-    if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL)
-        trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
-    trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
-                       trie->wordcount+1, sizeof(reg_trie_wordinfo));
-
-    DEBUG_r({
-        trie_words = newAV();
-    });
-
-    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
-    assert(re_trie_maxbuff);
-    if (!SvIOK(re_trie_maxbuff)) {
-        sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
-    }
-    DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_
-          "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
-          depth+1,
-          REG_NODE_NUM(startbranch), REG_NODE_NUM(first),
-          REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth);
-    });
-
-   /* Find the node we are going to overwrite */
-    if ( first == startbranch && OP( last ) != BRANCH ) {
-        /* whole branch chain */
-        convert = first;
-    } else {
-        /* branch sub-chain */
-        convert = REGNODE_AFTER( first );
-    }
-
-    /*  -- First loop and Setup --
-
-       We first traverse the branches and scan each word to determine if it
-       contains widechars, and how many unique chars there are, this is
-       important as we have to build a table with at least as many columns as we
-       have unique chars.
-
-       We use an array of integers to represent the character codes 0..255
-       (trie->charmap) and we use a an HV* to store Unicode characters. We use
-       the native representation of the character value as the key and IV's for
-       the coded index.
-
-       *TODO* If we keep track of how many times each character is used we can
-       remap the columns so that the table compression later on is more
-       efficient in terms of memory by ensuring the most common value is in the
-       middle and the least common are on the outside.  IMO this would be better
-       than a most to least common mapping as theres a decent chance the most
-       common letter will share a node with the least common, meaning the node
-       will not be compressible. With a middle is most common approach the worst
-       case is when we have the least common nodes twice.
-
-     */
-
-    for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-        regnode *noper = REGNODE_AFTER( cur );
-        const U8 *uc;
-        const U8 *e;
-        int foldlen = 0;
-        U32 wordlen      = 0;         /* required init */
-        STRLEN minchars = 0;
-        STRLEN maxchars = 0;
-        bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the
-                                               bitmap?*/
-
-        if (OP(noper) == NOTHING) {
-            /* skip past a NOTHING at the start of an alternation
-             * eg, /(?:)a|(?:b)/ should be the same as /a|b/
-             *
-             * If the next node is not something we are supposed to process
-             * we will just ignore it due to the condition guarding the
-             * next block.
-             */
-
-            regnode *noper_next= regnext(noper);
-            if (noper_next < tail)
-                noper= noper_next;
-        }
-
-        if (    noper < tail
-            && (    OP(noper) == flags
-                || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                         || OP(noper) == EXACTFUP))))
-        {
-            uc= (U8*)STRING(noper);
-            e= uc + STR_LEN(noper);
-        } else {
-            trie->minlen= 0;
-            continue;
-        }
-
-
-        if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
-            TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
-                                          regardless of encoding */
-            if (OP( noper ) == EXACTFUP) {
-                /* false positives are ok, so just set this */
-                TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
-            }
-        }
-
-        for ( ; uc < e ; uc += len ) {  /* Look at each char in the current
-                                           branch */
-            TRIE_CHARCOUNT(trie)++;
-            TRIE_READ_CHAR;
-
-            /* TRIE_READ_CHAR returns the current character, or its fold if /i
-             * is in effect.  Under /i, this character can match itself, or
-             * anything that folds to it.  If not under /i, it can match just
-             * itself.  Most folds are 1-1, for example k, K, and KELVIN SIGN
-             * all fold to k, and all are single characters.   But some folds
-             * expand to more than one character, so for example LATIN SMALL
-             * LIGATURE FFI folds to the three character sequence 'ffi'.  If
-             * the string beginning at 'uc' is 'ffi', it could be matched by
-             * three characters, or just by the one ligature character. (It
-             * could also be matched by two characters: LATIN SMALL LIGATURE FF
-             * followed by 'i', or by 'f' followed by LATIN SMALL LIGATURE FI).
-             * (Of course 'I' and/or 'F' instead of 'i' and 'f' can also
-             * match.)  The trie needs to know the minimum and maximum number
-             * of characters that could match so that it can use size alone to
-             * quickly reject many match attempts.  The max is simple: it is
-             * the number of folded characters in this branch (since a fold is
-             * never shorter than what folds to it. */
-
-            maxchars++;
-
-            /* And the min is equal to the max if not under /i (indicated by
-             * 'folder' being NULL), or there are no multi-character folds.  If
-             * there is a multi-character fold, the min is incremented just
-             * once, for the character that folds to the sequence.  Each
-             * character in the sequence needs to be added to the list below of
-             * characters in the trie, but we count only the first towards the
-             * min number of characters needed.  This is done through the
-             * variable 'foldlen', which is returned by the macros that look
-             * for these sequences as the number of bytes the sequence
-             * occupies.  Each time through the loop, we decrement 'foldlen' by
-             * how many bytes the current char occupies.  Only when it reaches
-             * 0 do we increment 'minchars' or look for another multi-character
-             * sequence. */
-            if (folder == NULL) {
-                minchars++;
-            }
-            else if (foldlen > 0) {
-                foldlen -= (UTF) ? UTF8SKIP(uc) : 1;
-            }
-            else {
-                minchars++;
-
-                /* See if *uc is the beginning of a multi-character fold.  If
-                 * so, we decrement the length remaining to look at, to account
-                 * for the current character this iteration.  (We can use 'uc'
-                 * instead of the fold returned by TRIE_READ_CHAR because the
-                 * macro is smart enough to account for any unfolded
-                 * characters. */
-                if (UTF) {
-                    if ((foldlen = is_MULTI_CHAR_FOLD_utf8_safe(uc, e))) {
-                        foldlen -= UTF8SKIP(uc);
-                    }
-                }
-                else if ((foldlen = is_MULTI_CHAR_FOLD_latin1_safe(uc, e))) {
-                    foldlen--;
-                }
-            }
-
-            /* The current character (and any potential folds) should be added
-             * to the possible matching characters for this position in this
-             * branch */
-            if ( uvc < 256 ) {
-                if ( folder ) {
-                    U8 folded= folder[ (U8) uvc ];
-                    if ( !trie->charmap[ folded ] ) {
-                        trie->charmap[ folded ]=( ++trie->uniquecharcount );
-                        TRIE_STORE_REVCHAR( folded );
-                    }
-                }
-                if ( !trie->charmap[ uvc ] ) {
-                    trie->charmap[ uvc ]=( ++trie->uniquecharcount );
-                    TRIE_STORE_REVCHAR( uvc );
-                }
-                if ( set_bit ) {
-                    /* store the codepoint in the bitmap, and its folded
-                     * equivalent. */
-                    TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
-                    set_bit = 0; /* We've done our bit :-) */
-                }
-            } else {
-
-                /* XXX We could come up with the list of code points that fold
-                 * to this using PL_utf8_foldclosures, except not for
-                 * multi-char folds, as there may be multiple combinations
-                 * there that could work, which needs to wait until runtime to
-                 * resolve (The comment about LIGATURE FFI above is such an
-                 * example */
-
-                SV** svpp;
-                if ( !widecharmap )
-                    widecharmap = newHV();
-
-                svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
-
-                if ( !svpp )
-                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
-
-                if ( !SvTRUE( *svpp ) ) {
-                    sv_setiv( *svpp, ++trie->uniquecharcount );
-                    TRIE_STORE_REVCHAR(uvc);
-                }
-            }
-        } /* end loop through characters in this branch of the trie */
-
-        /* We take the min and max for this branch and combine to find the min
-         * and max for all branches processed so far */
-        if( cur == first ) {
-            trie->minlen = minchars;
-            trie->maxlen = maxchars;
-        } else if (minchars < trie->minlen) {
-            trie->minlen = minchars;
-        } else if (maxchars > trie->maxlen) {
-            trie->maxlen = maxchars;
-        }
-    } /* end first pass */
-    DEBUG_TRIE_COMPILE_r(
-        Perl_re_indentf( aTHX_
-                "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
-                depth+1,
-                ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
-                (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
-                (int)trie->minlen, (int)trie->maxlen )
-    );
-
-    /*
-        We now know what we are dealing with in terms of unique chars and
-        string sizes so we can calculate how much memory a naive
-        representation using a flat table  will take. If it's over a reasonable
-        limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
-        conservative but potentially much slower representation using an array
-        of lists.
-
-        At the end we convert both representations into the same compressed
-        form that will be used in regexec.c for matching with. The latter
-        is a form that cannot be used to construct with but has memory
-        properties similar to the list form and access properties similar
-        to the table form making it both suitable for fast searches and
-        small enough that its feasable to store for the duration of a program.
-
-        See the comment in the code where the compressed table is produced
-        inplace from the flat tabe representation for an explanation of how
-        the compression works.
-
-    */
-
-
-    Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32);
-    prev_states[1] = 0;
-
-    if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1)
-                                                    > SvIV(re_trie_maxbuff) )
-    {
-        /*
-            Second Pass -- Array Of Lists Representation
-
-            Each state will be represented by a list of charid:state records
-            (reg_trie_trans_le) the first such element holds the CUR and LEN
-            points of the allocated array. (See defines above).
-
-            We build the initial structure using the lists, and then convert
-            it into the compressed table form which allows faster lookups
-            (but cant be modified once converted).
-        */
-
-        STRLEN transcount = 1;
-
-        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using list compiler\n",
-            depth+1));
-
-        trie->states = (reg_trie_state *)
-            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
-                                  sizeof(reg_trie_state) );
-        TRIE_LIST_NEW(1);
-        next_alloc = 2;
-
-        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-
-            regnode *noper   = REGNODE_AFTER( cur );
-            U32 state        = 1;         /* required init */
-            U16 charid       = 0;         /* sanity init */
-            U32 wordlen      = 0;         /* required init */
-
-            if (OP(noper) == NOTHING) {
-                regnode *noper_next= regnext(noper);
-                if (noper_next < tail)
-                    noper= noper_next;
-                /* we will undo this assignment if noper does not
-                 * point at a trieable type in the else clause of
-                 * the following statement. */
-            }
-
-            if (    noper < tail
-                && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                             || OP(noper) == EXACTFUP))))
-            {
-                const U8 *uc= (U8*)STRING(noper);
-                const U8 *e= uc + STR_LEN(noper);
-
-                for ( ; uc < e ; uc += len ) {
-
-                    TRIE_READ_CHAR;
-
-                    if ( uvc < 256 ) {
-                        charid = trie->charmap[ uvc ];
-                    } else {
-                        SV** const svpp = hv_fetch( widecharmap,
-                                                    (char*)&uvc,
-                                                    sizeof( UV ),
-                                                    0);
-                        if ( !svpp ) {
-                            charid = 0;
-                        } else {
-                            charid=(U16)SvIV( *svpp );
-                        }
-                    }
-                    /* charid is now 0 if we dont know the char read, or
-                     * nonzero if we do */
-                    if ( charid ) {
-
-                        U16 check;
-                        U32 newstate = 0;
-
-                        charid--;
-                        if ( !trie->states[ state ].trans.list ) {
-                            TRIE_LIST_NEW( state );
-                        }
-                        for ( check = 1;
-                              check <= TRIE_LIST_USED( state );
-                              check++ )
-                        {
-                            if ( TRIE_LIST_ITEM( state, check ).forid
-                                                                    == charid )
-                            {
-                                newstate = TRIE_LIST_ITEM( state, check ).newstate;
-                                break;
-                            }
-                        }
-                        if ( ! newstate ) {
-                            newstate = next_alloc++;
-                            prev_states[newstate] = state;
-                            TRIE_LIST_PUSH( state, charid, newstate );
-                            transcount++;
-                        }
-                        state = newstate;
-                    } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
-                    }
-                }
-            } else {
-                /* If we end up here it is because we skipped past a NOTHING, but did not end up
-                 * on a trieable type. So we need to reset noper back to point at the first regop
-                 * in the branch before we call TRIE_HANDLE_WORD()
-                */
-                noper= REGNODE_AFTER(cur);
-            }
-            TRIE_HANDLE_WORD(state);
-
-        } /* end second pass */
-
-        /* next alloc is the NEXT state to be allocated */
-        trie->statecount = next_alloc;
-        trie->states = (reg_trie_state *)
-            PerlMemShared_realloc( trie->states,
-                                   next_alloc
-                                   * sizeof(reg_trie_state) );
-
-        /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
-                                                         revcharmap, next_alloc,
-                                                         depth+1)
-        );
-
-        trie->trans = (reg_trie_trans *)
-            PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
-        {
-            U32 state;
-            U32 tp = 0;
-            U32 zp = 0;
-
-
-            for( state=1 ; state < next_alloc ; state ++ ) {
-                U32 base=0;
-
-                /*
-                DEBUG_TRIE_COMPILE_MORE_r(
-                    Perl_re_printf( aTHX_  "tp: %d zp: %d ",tp,zp)
-                );
-                */
-
-                if (trie->states[state].trans.list) {
-                    U16 minid=TRIE_LIST_ITEM( state, 1).forid;
-                    U16 maxid=minid;
-                    U16 idx;
-
-                    for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
-                        const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
-                        if ( forid < minid ) {
-                            minid=forid;
-                        } else if ( forid > maxid ) {
-                            maxid=forid;
-                        }
-                    }
-                    if ( transcount < tp + maxid - minid + 1) {
-                        transcount *= 2;
-                        trie->trans = (reg_trie_trans *)
-                            PerlMemShared_realloc( trie->trans,
-                                                     transcount
-                                                     * sizeof(reg_trie_trans) );
-                        Zero( trie->trans + (transcount / 2),
-                              transcount / 2,
-                              reg_trie_trans );
-                    }
-                    base = trie->uniquecharcount + tp - minid;
-                    if ( maxid == minid ) {
-                        U32 set = 0;
-                        for ( ; zp < tp ; zp++ ) {
-                            if ( ! trie->trans[ zp ].next ) {
-                                base = trie->uniquecharcount + zp - minid;
-                                trie->trans[ zp ].next = TRIE_LIST_ITEM( state,
-                                                                   1).newstate;
-                                trie->trans[ zp ].check = state;
-                                set = 1;
-                                break;
-                            }
-                        }
-                        if ( !set ) {
-                            trie->trans[ tp ].next = TRIE_LIST_ITEM( state,
-                                                                   1).newstate;
-                            trie->trans[ tp ].check = state;
-                            tp++;
-                            zp = tp;
-                        }
-                    } else {
-                        for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
-                            const U32 tid = base
-                                           - trie->uniquecharcount
-                                           + TRIE_LIST_ITEM( state, idx ).forid;
-                            trie->trans[ tid ].next = TRIE_LIST_ITEM( state,
-                                                                idx ).newstate;
-                            trie->trans[ tid ].check = state;
-                        }
-                        tp += ( maxid - minid + 1 );
-                    }
-                    Safefree(trie->states[ state ].trans.list);
-                }
-                /*
-                DEBUG_TRIE_COMPILE_MORE_r(
-                    Perl_re_printf( aTHX_  " base: %d\n",base);
-                );
-                */
-                trie->states[ state ].trans.base=base;
-            }
-            trie->lasttrans = tp + 1;
-        }
-    } else {
-        /*
-           Second Pass -- Flat Table Representation.
-
-           we dont use the 0 slot of either trans[] or states[] so we add 1 to
-           each.  We know that we will need Charcount+1 trans at most to store
-           the data (one row per char at worst case) So we preallocate both
-           structures assuming worst case.
-
-           We then construct the trie using only the .next slots of the entry
-           structs.
-
-           We use the .check field of the first entry of the node temporarily
-           to make compression both faster and easier by keeping track of how
-           many non zero fields are in the node.
-
-           Since trans are numbered from 1 any 0 pointer in the table is a FAIL
-           transition.
-
-           There are two terms at use here: state as a TRIE_NODEIDX() which is
-           a number representing the first entry of the node, and state as a
-           TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1)
-           and TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3)
-           if there are 2 entrys per node. eg:
-
-             A B       A B
-          1. 2 4    1. 3 7
-          2. 0 3    3. 0 5
-          3. 0 0    5. 0 0
-          4. 0 0    7. 0 0
-
-           The table is internally in the right hand, idx form. However as we
-           also have to deal with the states array which is indexed by nodenum
-           we have to use TRIE_NODENUM() to convert.
-
-        */
-        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using table compiler\n",
-            depth+1));
-
-        trie->trans = (reg_trie_trans *)
-            PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
-                                  * trie->uniquecharcount + 1,
-                                  sizeof(reg_trie_trans) );
-        trie->states = (reg_trie_state *)
-            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
-                                  sizeof(reg_trie_state) );
-        next_alloc = trie->uniquecharcount + 1;
-
-
-        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-
-            regnode *noper   = REGNODE_AFTER( cur );
-
-            U32 state        = 1;         /* required init */
-
-            U16 charid       = 0;         /* sanity init */
-            U32 accept_state = 0;         /* sanity init */
-
-            U32 wordlen      = 0;         /* required init */
-
-            if (OP(noper) == NOTHING) {
-                regnode *noper_next= regnext(noper);
-                if (noper_next < tail)
-                    noper= noper_next;
-                /* we will undo this assignment if noper does not
-                 * point at a trieable type in the else clause of
-                 * the following statement. */
-            }
-
-            if (    noper < tail
-                && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                             || OP(noper) == EXACTFUP))))
-            {
-                const U8 *uc= (U8*)STRING(noper);
-                const U8 *e= uc + STR_LEN(noper);
-
-                for ( ; uc < e ; uc += len ) {
-
-                    TRIE_READ_CHAR;
-
-                    if ( uvc < 256 ) {
-                        charid = trie->charmap[ uvc ];
-                    } else {
-                        SV* const * const svpp = hv_fetch( widecharmap,
-                                                           (char*)&uvc,
-                                                           sizeof( UV ),
-                                                           0);
-                        charid = svpp ? (U16)SvIV(*svpp) : 0;
-                    }
-                    if ( charid ) {
-                        charid--;
-                        if ( !trie->trans[ state + charid ].next ) {
-                            trie->trans[ state + charid ].next = next_alloc;
-                            trie->trans[ state ].check++;
-                            prev_states[TRIE_NODENUM(next_alloc)]
-                                    = TRIE_NODENUM(state);
-                            next_alloc += trie->uniquecharcount;
-                        }
-                        state = trie->trans[ state + charid ].next;
-                    } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
-                    }
-                    /* charid is now 0 if we dont know the char read, or
-                     * nonzero if we do */
-                }
-            } else {
-                /* If we end up here it is because we skipped past a NOTHING, but did not end up
-                 * on a trieable type. So we need to reset noper back to point at the first regop
-                 * in the branch before we call TRIE_HANDLE_WORD().
-                */
-                noper= REGNODE_AFTER(cur);
-            }
-            accept_state = TRIE_NODENUM( state );
-            TRIE_HANDLE_WORD(accept_state);
-
-        } /* end second pass */
-
-        /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
-                                                          revcharmap,
-                                                          next_alloc, depth+1));
-
-        {
-        /*
-           * Inplace compress the table.*
-
-           For sparse data sets the table constructed by the trie algorithm will
-           be mostly 0/FAIL transitions or to put it another way mostly empty.
-           (Note that leaf nodes will not contain any transitions.)
-
-           This algorithm compresses the tables by eliminating most such
-           transitions, at the cost of a modest bit of extra work during lookup:
-
-           - Each states[] entry contains a .base field which indicates the
-           index in the state[] array wheres its transition data is stored.
-
-           - If .base is 0 there are no valid transitions from that node.
-
-           - If .base is nonzero then charid is added to it to find an entry in
-           the trans array.
-
-           -If trans[states[state].base+charid].check!=state then the
-           transition is taken to be a 0/Fail transition. Thus if there are fail
-           transitions at the front of the node then the .base offset will point
-           somewhere inside the previous nodes data (or maybe even into a node
-           even earlier), but the .check field determines if the transition is
-           valid.
-
-           XXX - wrong maybe?
-           The following process inplace converts the table to the compressed
-           table: We first do not compress the root node 1,and mark all its
-           .check pointers as 1 and set its .base pointer as 1 as well. This
-           allows us to do a DFA construction from the compressed table later,
-           and ensures that any .base pointers we calculate later are greater
-           than 0.
-
-           - We set 'pos' to indicate the first entry of the second node.
-
-           - We then iterate over the columns of the node, finding the first and
-           last used entry at l and m. We then copy l..m into pos..(pos+m-l),
-           and set the .check pointers accordingly, and advance pos
-           appropriately and repreat for the next node. Note that when we copy
-           the next pointers we have to convert them from the original
-           NODEIDX form to NODENUM form as the former is not valid post
-           compression.
-
-           - If a node has no transitions used we mark its base as 0 and do not
-           advance the pos pointer.
-
-           - If a node only has one transition we use a second pointer into the
-           structure to fill in allocated fail transitions from other states.
-           This pointer is independent of the main pointer and scans forward
-           looking for null transitions that are allocated to a state. When it
-           finds one it writes the single transition into the "hole".  If the
-           pointer doesnt find one the single transition is appended as normal.
-
-           - Once compressed we can Renew/realloc the structures to release the
-           excess space.
-
-           See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
-           specifically Fig 3.47 and the associated pseudocode.
-
-           demq
-        */
-        const U32 laststate = TRIE_NODENUM( next_alloc );
-        U32 state, charid;
-        U32 pos = 0, zp=0;
-        trie->statecount = laststate;
-
-        for ( state = 1 ; state < laststate ; state++ ) {
-            U8 flag = 0;
-            const U32 stateidx = TRIE_NODEIDX( state );
-            const U32 o_used = trie->trans[ stateidx ].check;
-            U32 used = trie->trans[ stateidx ].check;
-            trie->trans[ stateidx ].check = 0;
-
-            for ( charid = 0;
-                  used && charid < trie->uniquecharcount;
-                  charid++ )
-            {
-                if ( flag || trie->trans[ stateidx + charid ].next ) {
-                    if ( trie->trans[ stateidx + charid ].next ) {
-                        if (o_used == 1) {
-                            for ( ; zp < pos ; zp++ ) {
-                                if ( ! trie->trans[ zp ].next ) {
-                                    break;
-                                }
-                            }
-                            trie->states[ state ].trans.base
-                                                    = zp
-                                                      + trie->uniquecharcount
-                                                      - charid ;
-                            trie->trans[ zp ].next
-                                = SAFE_TRIE_NODENUM( trie->trans[ stateidx
-                                                             + charid ].next );
-                            trie->trans[ zp ].check = state;
-                            if ( ++zp > pos ) pos = zp;
-                            break;
-                        }
-                        used--;
-                    }
-                    if ( !flag ) {
-                        flag = 1;
-                        trie->states[ state ].trans.base
-                                       = pos + trie->uniquecharcount - charid ;
-                    }
-                    trie->trans[ pos ].next
-                        = SAFE_TRIE_NODENUM(
-                                       trie->trans[ stateidx + charid ].next );
-                    trie->trans[ pos ].check = state;
-                    pos++;
-                }
-            }
-        }
-        trie->lasttrans = pos + 1;
-        trie->states = (reg_trie_state *)
-            PerlMemShared_realloc( trie->states, laststate
-                                   * sizeof(reg_trie_state) );
-        DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
-                depth+1,
-                (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
-                       + 1 ),
-                (IV)next_alloc,
-                (IV)pos,
-                ( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
-            );
-
-        } /* end table compress */
-    }
-    DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
-                depth+1,
-                (UV)trie->statecount,
-                (UV)trie->lasttrans)
-    );
-    /* resize the trans array to remove unused space */
-    trie->trans = (reg_trie_trans *)
-        PerlMemShared_realloc( trie->trans, trie->lasttrans
-                               * sizeof(reg_trie_trans) );
-
-    {   /* Modify the program and insert the new TRIE node */
-        U8 nodetype =(U8) flags;
-        char *str=NULL;
-
-#ifdef DEBUGGING
-        regnode *optimize = NULL;
-#endif /* DEBUGGING */
-        /*
-           This means we convert either the first branch or the first Exact,
-           depending on whether the thing following (in 'last') is a branch
-           or not and whther first is the startbranch (ie is it a sub part of
-           the alternation or is it the whole thing.)
-           Assuming its a sub part we convert the EXACT otherwise we convert
-           the whole branch sequence, including the first.
-         */
-        /* Find the node we are going to overwrite */
-        if ( first != startbranch || OP( last ) == BRANCH ) {
-            /* branch sub-chain */
-            NEXT_OFF( first ) = (U16)(last - first);
-            /* whole branch chain */
-        }
-        /* But first we check to see if there is a common prefix we can
-           split out as an EXACT and put in front of the TRIE node.  */
-        trie->startstate= 1;
-        if ( trie->bitmap && !widecharmap && !trie->jump  ) {
-            /* we want to find the first state that has more than
-             * one transition, if that state is not the first state
-             * then we have a common prefix which we can remove.
-             */
-            U32 state;
-            for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
-                U32 ofs = 0;
-                I32 first_ofs = -1; /* keeps track of the ofs of the first
-                                       transition, -1 means none */
-                U32 count = 0;
-                const U32 base = trie->states[ state ].trans.base;
-
-                /* does this state terminate an alternation? */
-                if ( trie->states[state].wordnum )
-                        count = 1;
-
-                for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
-                    if ( ( base + ofs >= trie->uniquecharcount ) &&
-                         ( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
-                         trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
-                    {
-                        if ( ++count > 1 ) {
-                            /* we have more than one transition */
-                            SV **tmp;
-                            U8 *ch;
-                            /* if this is the first state there is no common prefix
-                             * to extract, so we can exit */
-                            if ( state == 1 ) break;
-                            tmp = av_fetch_simple( revcharmap, ofs, 0);
-                            ch = (U8*)SvPV_nolen_const( *tmp );
-
-                            /* if we are on count 2 then we need to initialize the
-                             * bitmap, and store the previous char if there was one
-                             * in it*/
-                            if ( count == 2 ) {
-                                /* clear the bitmap */
-                                Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
-                                DEBUG_OPTIMISE_r(
-                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
-                                        depth+1,
-                                        (UV)state));
-                                if (first_ofs >= 0) {
-                                    SV ** const tmp = av_fetch_simple( revcharmap, first_ofs, 0);
-                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
-
-                                    TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
-                                    DEBUG_OPTIMISE_r(
-                                        Perl_re_printf( aTHX_  "%s", (char*)ch)
-                                    );
-                                }
-                            }
-                            /* store the current firstchar in the bitmap */
-                            TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
-                            DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
-                        }
-                        first_ofs = ofs;
-                    }
-                }
-                if ( count == 1 ) {
-                    /* This state has only one transition, its transition is part
-                     * of a common prefix - we need to concatenate the char it
-                     * represents to what we have so far. */
-                    SV **tmp = av_fetch_simple( revcharmap, first_ofs, 0);
-                    STRLEN len;
-                    char *ch = SvPV( *tmp, len );
-                    DEBUG_OPTIMISE_r({
-                        SV *sv=sv_newmortal();
-                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
-                            depth+1,
-                            (UV)state, (UV)first_ofs,
-                            pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
-                                PL_colors[0], PL_colors[1],
-                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                                PERL_PV_ESCAPE_FIRSTCHAR
-                            )
-                        );
-                    });
-                    if ( state==1 ) {
-                        OP( convert ) = nodetype;
-                        str=STRING(convert);
-                        setSTR_LEN(convert, 0);
-                    }
-                    assert( ( STR_LEN(convert) + len ) < 256 );
-                    setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
-                    while (len--)
-                        *str++ = *ch++;
-                } else {
-#ifdef DEBUGGING
-                    if (state>1)
-                        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n"));
-#endif
-                    break;
-                }
-            }
-            trie->prefixlen = (state-1);
-            if (str) {
-                regnode *n = REGNODE_AFTER(convert);
-                assert( n - convert <= U16_MAX );
-                NEXT_OFF(convert) = n - convert;
-                trie->startstate = state;
-                trie->minlen -= (state - 1);
-                trie->maxlen -= (state - 1);
-#ifdef DEBUGGING
-               /* At least the UNICOS C compiler choked on this
-                * being argument to DEBUG_r(), so let's just have
-                * it right here. */
-               if (
-#ifdef PERL_EXT_RE_BUILD
-                   1
-#else
-                   DEBUG_r_TEST
-#endif
-                   ) {
-                   U32 word = trie->wordcount;
-                   while (word--) {
-                       SV ** const tmp = av_fetch_simple( trie_words, word, 0 );
-                       if (tmp) {
-                           if ( STR_LEN(convert) <= SvCUR(*tmp) )
-                               sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
-                           else
-                               sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
-                       }
-                   }
-               }
-#endif
-                if (trie->maxlen) {
-                    convert = n;
-                } else {
-                    NEXT_OFF(convert) = (U16)(tail - convert);
-                    DEBUG_r(optimize= n);
-                }
-            }
-        }
-        if (!jumper)
-            jumper = last;
-        if ( trie->maxlen ) {
-            NEXT_OFF( convert ) = (U16)(tail - convert);
-            ARG_SET( convert, data_slot );
-            /* Store the offset to the first unabsorbed branch in
-               jump[0], which is otherwise unused by the jump logic.
-               We use this when dumping a trie and during optimisation. */
-            if (trie->jump)
-                trie->jump[0] = (U16)(nextbranch - convert);
-
-            /* If the start state is not accepting (meaning there is no empty string/NOTHING)
-             *   and there is a bitmap
-             *   and the first "jump target" node we found leaves enough room
-             * then convert the TRIE node into a TRIEC node, with the bitmap
-             * embedded inline in the opcode - this is hypothetically faster.
-             */
-            if ( !trie->states[trie->startstate].wordnum
-                 && trie->bitmap
-                 && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
-            {
-                OP( convert ) = TRIEC;
-                Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
-                PerlMemShared_free(trie->bitmap);
-                trie->bitmap= NULL;
-            } else
-                OP( convert ) = TRIE;
-
-            /* store the type in the flags */
-            convert->flags = nodetype;
-            DEBUG_r({
-            optimize = convert
-                      + NODE_STEP_REGNODE
-                      + REGNODE_ARG_LEN( OP( convert ) );
-            });
-            /* XXX We really should free up the resource in trie now,
-                   as we won't use them - (which resources?) dmq */
-        }
-        /* needed for dumping*/
-        DEBUG_r(if (optimize) {
-            /*
-                Try to clean up some of the debris left after the
-                optimisation.
-             */
-            while( optimize < jumper ) {
-                OP( optimize ) = OPTIMIZED;
-                optimize++;
-            }
-        });
-    } /* end node insert */
-
-    /*  Finish populating the prev field of the wordinfo array.  Walk back
-     *  from each accept state until we find another accept state, and if
-     *  so, point the first word's .prev field at the second word. If the
-     *  second already has a .prev field set, stop now. This will be the
-     *  case either if we've already processed that word's accept state,
-     *  or that state had multiple words, and the overspill words were
-     *  already linked up earlier.
-     */
-    {
-        U16 word;
-        U32 state;
-        U16 prev;
-
-        for (word=1; word <= trie->wordcount; word++) {
-            prev = 0;
-            if (trie->wordinfo[word].prev)
-                continue;
-            state = trie->wordinfo[word].accept;
-            while (state) {
-                state = prev_states[state];
-                if (!state)
-                    break;
-                prev = trie->states[state].wordnum;
-                if (prev)
-                    break;
-            }
-            trie->wordinfo[word].prev = prev;
-        }
-        Safefree(prev_states);
-    }
-
-
-    /* and now dump out the compressed format */
-    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
-
-    RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
-#ifdef DEBUGGING
-    RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
-    RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
-#else
-    SvREFCNT_dec_NN(revcharmap);
-#endif
-    return trie->jump
-           ? MADE_JUMP_TRIE
-           : trie->startstate>1
-             ? MADE_EXACT_TRIE
-             : MADE_TRIE;
-}
-
-STATIC regnode *
-S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
-{
-/* The Trie is constructed and compressed now so we can build a fail array if
- * it's needed
-
-   This is basically the Aho-Corasick algorithm. Its from exercise 3.31 and
-   3.32 in the
-   "Red Dragon" -- Compilers, principles, techniques, and tools. Aho, Sethi,
-   Ullman 1985/88
-   ISBN 0-201-10088-6
-
-   We find the fail state for each state in the trie, this state is the longest
-   proper suffix of the current state's 'word' that is also a proper prefix of
-   another word in our trie. State 1 represents the word '' and is thus the
-   default fail state. This allows the DFA not to have to restart after its
-   tried and failed a word at a given point, it simply continues as though it
-   had been matching the other word in the first place.
-   Consider
-      'abcdgu'=~/abcdefg|cdgu/
-   When we get to 'd' we are still matching the first word, we would encounter
-   'g' which would fail, which would bring us to the state representing 'd' in
-   the second word where we would try 'g' and succeed, proceeding to match
-   'cdgu'.
- */
- /* add a fail transition */
-    const U32 trie_offset = ARG(source);
-    reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[trie_offset];
-    U32 *q;
-    const U32 ucharcount = trie->uniquecharcount;
-    const U32 numstates = trie->statecount;
-    const U32 ubound = trie->lasttrans + ucharcount;
-    U32 q_read = 0;
-    U32 q_write = 0;
-    U32 charid;
-    U32 base = trie->states[ 1 ].trans.base;
-    U32 *fail;
-    reg_ac_data *aho;
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
-    regnode *stclass;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
-    PERL_UNUSED_CONTEXT;
-#ifndef DEBUGGING
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    if ( OP(source) == TRIE ) {
-        struct regnode_1 *op = (struct regnode_1 *)
-            PerlMemShared_calloc(1, sizeof(struct regnode_1));
-        StructCopy(source, op, struct regnode_1);
-        stclass = (regnode *)op;
-    } else {
-        struct regnode_charclass *op = (struct regnode_charclass *)
-            PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
-        StructCopy(source, op, struct regnode_charclass);
-        stclass = (regnode *)op;
-    }
-    OP(stclass)+=2; /* convert the TRIE type to its AHO-CORASICK equivalent */
-
-    ARG_SET( stclass, data_slot );
-    aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
-    RExC_rxi->data->data[ data_slot ] = (void*)aho;
-    aho->trie=trie_offset;
-    aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
-    Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newx( q, numstates, U32);
-    aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
-    aho->refcount = 1;
-    fail = aho->fail;
-    /* initialize fail[0..1] to be 1 so that we always have
-       a valid final fail state */
-    fail[ 0 ] = fail[ 1 ] = 1;
-
-    for ( charid = 0; charid < ucharcount ; charid++ ) {
-        const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
-        if ( newstate ) {
-            q[ q_write ] = newstate;
-            /* set to point at the root */
-            fail[ q[ q_write++ ] ]=1;
-        }
-    }
-    while ( q_read < q_write) {
-        const U32 cur = q[ q_read++ % numstates ];
-        base = trie->states[ cur ].trans.base;
-
-        for ( charid = 0 ; charid < ucharcount ; charid++ ) {
-            const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
-            if (ch_state) {
-                U32 fail_state = cur;
-                U32 fail_base;
-                do {
-                    fail_state = fail[ fail_state ];
-                    fail_base = aho->states[ fail_state ].trans.base;
-                } while ( !TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 ) );
-
-                fail_state = TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 );
-                fail[ ch_state ] = fail_state;
-                if ( !aho->states[ ch_state ].wordnum && aho->states[ fail_state ].wordnum )
-                {
-                        aho->states[ ch_state ].wordnum =  aho->states[ fail_state ].wordnum;
-                }
-                q[ q_write++ % numstates] = ch_state;
-            }
-        }
-    }
-    /* restore fail[0..1] to 0 so that we "fall out" of the AC loop
-       when we fail in state 1, this allows us to use the
-       charclass scan to find a valid start char. This is based on the principle
-       that theres a good chance the string being searched contains lots of stuff
-       that cant be a start char.
-     */
-    fail[ 0 ] = fail[ 1 ] = 0;
-    DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
-                      depth, (UV)numstates
-        );
-        for( q_read=1; q_read<numstates; q_read++ ) {
-            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
-        }
-        Perl_re_printf( aTHX_  "\n");
-    });
-    Safefree(q);
-    /*RExC_seen |= REG_TRIEDFA_SEEN;*/
-    return stclass;
-}
-
-
-/* The below joins as many adjacent EXACTish nodes as possible into a single
- * one.  The regop may be changed if the node(s) contain certain sequences that
- * require special handling.  The joining is only done if:
- * 1) there is room in the current conglomerated node to entirely contain the
- *    next one.
- * 2) they are compatible node types
- *
- * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
- * these get optimized out
- *
- * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
- * as possible, even if that means splitting an existing node so that its first
- * part is moved to the preceding node.  This would maximise the efficiency of
- * memEQ during matching.
- *
- * If a node is to match under /i (folded), the number of characters it matches
- * can be different than its character length if it contains a multi-character
- * fold.  *min_subtract is set to the total delta number of characters of the
- * input nodes.
- *
- * And *unfolded_multi_char is set to indicate whether or not the node contains
- * an unfolded multi-char fold.  This happens when it won't be known until
- * runtime whether the fold is valid or not; namely
- *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
- *      target string being matched against turns out to be UTF-8 is that fold
- *      valid; or
- *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
- *      runtime.
- * (Multi-char folds whose components are all above the Latin1 range are not
- * run-time locale dependent, and have already been folded by the time this
- * function is called.)
- *
- * This is as good a place as any to discuss the design of handling these
- * multi-character fold sequences.  It's been wrong in Perl for a very long
- * time.  There are three code points in Unicode whose multi-character folds
- * were long ago discovered to mess things up.  The previous designs for
- * dealing with these involved assigning a special node for them.  This
- * approach doesn't always work, as evidenced by this example:
- *      "\xDFs" =~ /s\xDF/ui    # Used to fail before these patches
- * Both sides fold to "sss", but if the pattern is parsed to create a node that
- * would match just the \xDF, it won't be able to handle the case where a
- * successful match would have to cross the node's boundary.  The new approach
- * that hopefully generally solves the problem generates an EXACTFUP node
- * that is "sss" in this case.
- *
- * It turns out that there are problems with all multi-character folds, and not
- * just these three.  Now the code is general, for all such cases.  The
- * approach taken is:
- * 1)   This routine examines each EXACTFish node that could contain multi-
- *      character folded sequences.  Since a single character can fold into
- *      such a sequence, the minimum match length for this node is less than
- *      the number of characters in the node.  This routine returns in
- *      *min_subtract how many characters to subtract from the actual
- *      length of the string to get a real minimum match length; it is 0 if
- *      there are no multi-char foldeds.  This delta is used by the caller to
- *      adjust the min length of the match, and the delta between min and max,
- *      so that the optimizer doesn't reject these possibilities based on size
- *      constraints.
- *
- * 2)   For the sequence involving the LATIN SMALL LETTER SHARP S (U+00DF)
- *      under /u, we fold it to 'ss' in regatom(), and in this routine, after
- *      joining, we scan for occurrences of the sequence 'ss' in non-UTF-8
- *      EXACTFU nodes.  The node type of such nodes is then changed to
- *      EXACTFUP, indicating it is problematic, and needs careful handling.
- *      (The procedures in step 1) above are sufficient to handle this case in
- *      UTF-8 encoded nodes.)  The reason this is problematic is that this is
- *      the only case where there is a possible fold length change in non-UTF-8
- *      patterns.  By reserving a special node type for problematic cases, the
- *      far more common regular EXACTFU nodes can be processed faster.
- *      regexec.c takes advantage of this.
- *
- *      EXACTFUP has been created as a grab-bag for (hopefully uncommon)
- *      problematic cases.   These all only occur when the pattern is not
- *      UTF-8.  In addition to the 'ss' sequence where there is a possible fold
- *      length change, it handles the situation where the string cannot be
- *      entirely folded.  The strings in an EXACTFish node are folded as much
- *      as possible during compilation in regcomp.c.  This saves effort in
- *      regex matching.  By using an EXACTFUP node when it is not possible to
- *      fully fold at compile time, regexec.c can know that everything in an
- *      EXACTFU node is folded, so folding can be skipped at runtime.  The only
- *      case where folding in EXACTFU nodes can't be done at compile time is
- *      the presumably uncommon MICRO SIGN, when the pattern isn't UTF-8.  This
- *      is because its fold requires UTF-8 to represent.  Thus EXACTFUP nodes
- *      handle two very different cases.  Alternatively, there could have been
- *      a node type where there are length changes, one for unfolded, and one
- *      for both.  If yet another special case needed to be created, the number
- *      of required node types would have to go to 7.  khw figures that even
- *      though there are plenty of node types to spare, that the maintenance
- *      cost wasn't worth the small speedup of doing it that way, especially
- *      since he thinks the MICRO SIGN is rarely encountered in practice.
- *
- *      There are other cases where folding isn't done at compile time, but
- *      none of them are under /u, and hence not for EXACTFU nodes.  The folds
- *      in EXACTFL nodes aren't known until runtime, and vary as the locale
- *      changes.  Some folds in EXACTF depend on if the runtime target string
- *      is UTF-8 or not.  (regatom() will create an EXACTFU node even under /di
- *      when no fold in it depends on the UTF-8ness of the target string.)
- *
- * 3)   A problem remains for unfolded multi-char folds. (These occur when the
- *      validity of the fold won't be known until runtime, and so must remain
- *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
- *      nodes when the pattern isn't in UTF-8.  (Note, BTW, that there cannot
- *      be an EXACTF node with a UTF-8 pattern.)  They also occur for various
- *      folds in EXACTFL nodes, regardless of the UTF-ness of the pattern.)
- *      The reason this is a problem is that the optimizer part of regexec.c
- *      (probably unwittingly, in Perl_regexec_flags()) makes an assumption
- *      that a character in the pattern corresponds to at most a single
- *      character in the target string.  (And I do mean character, and not byte
- *      here, unlike other parts of the documentation that have never been
- *      updated to account for multibyte Unicode.)  Sharp s in EXACTF and
- *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
- *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
- *      EXACTFL nodes, violate the assumption, and they are the only instances
- *      where it is violated.  I'm reluctant to try to change the assumption,
- *      as the code involved is impenetrable to me (khw), so instead the code
- *      here punts.  This routine examines EXACTFL nodes, and (when the pattern
- *      isn't UTF-8) EXACTF and EXACTFAA for such unfolded folds, and returns a
- *      boolean indicating whether or not the node contains such a fold.  When
- *      it is true, the caller sets a flag that later causes the optimizer in
- *      this file to not set values for the floating and fixed string lengths,
- *      and thus avoids the optimizer code in regexec.c that makes the invalid
- *      assumption.  Thus, there is no optimization based on string lengths for
- *      EXACTFL nodes that contain these few folds, nor for non-UTF8-pattern
- *      EXACTF and EXACTFAA nodes that contain the sharp s.  (The reason the
- *      assumption is wrong only in these cases is that all other non-UTF-8
- *      folds are 1-1; and, for UTF-8 patterns, we pre-fold all other folds to
- *      their expanded versions.  (Again, we can't prefold sharp s to 'ss' in
- *      EXACTF nodes because we don't know at compile time if it actually
- *      matches 'ss' or not.  For EXACTF nodes it will match iff the target
- *      string is in UTF-8.  This is in contrast to EXACTFU nodes, where it
- *      always matches; and EXACTFAA where it never does.  In an EXACTFAA node
- *      in a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
- *      problem; but in a non-UTF8 pattern, folding it to that above-Latin1
- *      string would require the pattern to be forced into UTF-8, the overhead
- *      of which we want to avoid.  Similarly the unfolded multi-char folds in
- *      EXACTFL nodes will match iff the locale at the time of match is a UTF-8
- *      locale.)
- *
- *      Similarly, the code that generates tries doesn't currently handle
- *      not-already-folded multi-char folds, and it looks like a pain to change
- *      that.  Therefore, trie generation of EXACTFAA nodes with the sharp s
- *      doesn't work.  Instead, such an EXACTFAA is turned into a new regnode,
- *      EXACTFAA_NO_TRIE, which the trie code knows not to handle.  Most people
- *      using /iaa matching will be doing so almost entirely with ASCII
- *      strings, so this should rarely be encountered in practice */
-
-STATIC U32
-S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
-                   UV *min_subtract, bool *unfolded_multi_char,
-                   U32 flags, regnode *val, U32 depth)
-{
-    /* Merge several consecutive EXACTish nodes into one. */
-
-    regnode *n = regnext(scan);
-    U32 stringok = 1;
-    regnode *next = REGNODE_AFTER_varies(scan);
-    U32 merged = 0;
-    U32 stopnow = 0;
-#ifdef DEBUGGING
-    regnode *stop = scan;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-#else
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    PERL_ARGS_ASSERT_JOIN_EXACT;
-#ifndef EXPERIMENTAL_INPLACESCAN
-    PERL_UNUSED_ARG(flags);
-    PERL_UNUSED_ARG(val);
-#endif
-    DEBUG_PEEP("join", scan, depth, 0);
-
-    assert(REGNODE_TYPE(OP(scan)) == EXACT);
-
-    /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
-     * EXACT ones that are mergeable to the current one. */
-    while (    n
-           && (    REGNODE_TYPE(OP(n)) == NOTHING
-               || (stringok && REGNODE_TYPE(OP(n)) == EXACT))
-           && NEXT_OFF(n)
-           && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX)
-    {
-
-        if (OP(n) == TAIL || n > next)
-            stringok = 0;
-        if (REGNODE_TYPE(OP(n)) == NOTHING) {
-            DEBUG_PEEP("skip:", n, depth, 0);
-            NEXT_OFF(scan) += NEXT_OFF(n);
-            next = n + NODE_STEP_REGNODE;
-#ifdef DEBUGGING
-            if (stringok)
-                stop = n;
-#endif
-            n = regnext(n);
-        }
-        else if (stringok) {
-            const unsigned int oldl = STR_LEN(scan);
-            regnode * const nnext = regnext(n);
-
-            /* XXX I (khw) kind of doubt that this works on platforms (should
-             * Perl ever run on one) where U8_MAX is above 255 because of lots
-             * of other assumptions */
-            /* Don't join if the sum can't fit into a single node */
-            if (oldl + STR_LEN(n) > U8_MAX)
-                break;
-
-            /* Joining something that requires UTF-8 with something that
-             * doesn't, means the result requires UTF-8. */
-            if (OP(scan) == EXACT && (OP(n) == EXACT_REQ8)) {
-                OP(scan) = EXACT_REQ8;
-            }
-            else if (OP(scan) == EXACT_REQ8 && (OP(n) == EXACT)) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_REQ8)) {
-                OP(scan) = EXACTFU_REQ8;
-            }
-            else if ((OP(scan) == EXACTFU_REQ8) && (OP(n) == EXACTFU)) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
-
-                 /* Under /di, temporary EXACTFU_S_EDGE nodes are generated,
-                  * which can join with EXACTFU ones.  We check for this case
-                  * here.  These need to be resolved to either EXACTFU or
-                  * EXACTF at joining time.  They have nothing in them that
-                  * would forbid them from being the more desirable EXACTFU
-                  * nodes except that they begin and/or end with a single [Ss].
-                  * The reason this is problematic is because they could be
-                  * joined in this loop with an adjacent node that ends and/or
-                  * begins with [Ss] which would then form the sequence 'ss',
-                  * which matches differently under /di than /ui, in which case
-                  * EXACTFU can't be used.  If the 'ss' sequence doesn't get
-                  * formed, the nodes get absorbed into any adjacent EXACTFU
-                  * node.  And if the only adjacent node is EXACTF, they get
-                  * absorbed into that, under the theory that a longer node is
-                  * better than two shorter ones, even if one is EXACTFU.  Note
-                  * that EXACTFU_REQ8 is generated only for UTF-8 patterns,
-                  * and the EXACTFU_S_EDGE ones only for non-UTF-8.  */
-
-                if (STRING(n)[STR_LEN(n)-1] == 's') {
-
-                    /* Here the joined node would end with 's'.  If the node
-                     * following the combination is an EXACTF one, it's better to
-                     * join this trailing edge 's' node with that one, leaving the
-                     * current one in 'scan' be the more desirable EXACTFU */
-                    if (OP(nnext) == EXACTF) {
-                        break;
-                    }
-
-                    OP(scan) = EXACTFU_S_EDGE;
-
-                }   /* Otherwise, the beginning 's' of the 2nd node just
-                       becomes an interior 's' in 'scan' */
-            }
-            else if (OP(scan) == EXACTF && OP(n) == EXACTF) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
-
-                /* EXACTF nodes are compatible for joining with EXACTFU_S_EDGE
-                 * nodes.  But the latter nodes can be also joined with EXACTFU
-                 * ones, and that is a better outcome, so if the node following
-                 * 'n' is EXACTFU, quit now so that those two can be joined
-                 * later */
-                if (OP(nnext) == EXACTFU) {
-                    break;
-                }
-
-                /* The join is compatible, and the combined node will be
-                 * EXACTF.  (These don't care if they begin or end with 's' */
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
-                if (   STRING(scan)[STR_LEN(scan)-1] == 's'
-                    && STRING(n)[0] == 's')
-                {
-                    /* When combined, we have the sequence 'ss', which means we
-                     * have to remain /di */
-                    OP(scan) = EXACTF;
-                }
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
-                if (STRING(n)[0] == 's') {
-                    ;   /* Here the join is compatible and the combined node
-                           starts with 's', no need to change OP */
-                }
-                else {  /* Now the trailing 's' is in the interior */
-                    OP(scan) = EXACTFU;
-                }
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
-
-                /* The join is compatible, and the combined node will be
-                 * EXACTF.  (These don't care if they begin or end with 's' */
-                OP(scan) = EXACTF;
-            }
-            else if (OP(scan) != OP(n)) {
-
-                /* The only other compatible joinings are the same node type */
-                break;
-            }
-
-            DEBUG_PEEP("merg", n, depth, 0);
-            merged++;
-
-            next = REGNODE_AFTER_varies(n);
-            NEXT_OFF(scan) += NEXT_OFF(n);
-            assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
-            setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
-            /* Now we can overwrite *n : */
-            Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
-#ifdef DEBUGGING
-            stop = next - 1;
-#endif
-            n = nnext;
-            if (stopnow) break;
-        }
-
-#ifdef EXPERIMENTAL_INPLACESCAN
-        if (flags && !NEXT_OFF(n)) {
-            DEBUG_PEEP("atch", val, depth, 0);
-            if (REGNODE_OFF_BY_ARG(OP(n))) {
-                ARG_SET(n, val - n);
-            }
-            else {
-                NEXT_OFF(n) = val - n;
-            }
-            stopnow = 1;
-        }
-#endif
-    }
-
-    /* This temporary node can now be turned into EXACTFU, and must, as
-     * regexec.c doesn't handle it */
-    if (OP(scan) == EXACTFU_S_EDGE) {
-        OP(scan) = EXACTFU;
-    }
-
-    *min_subtract = 0;
-    *unfolded_multi_char = FALSE;
-
-    /* Here, all the adjacent mergeable EXACTish nodes have been merged.  We
-     * can now analyze for sequences of problematic code points.  (Prior to
-     * this final joining, sequences could have been split over boundaries, and
-     * hence missed).  The sequences only happen in folding, hence for any
-     * non-EXACT EXACTish node */
-    if (OP(scan) != EXACT && OP(scan) != EXACT_REQ8 && OP(scan) != EXACTL) {
-        U8* s0 = (U8*) STRING(scan);
-        U8* s = s0;
-        U8* s_end = s0 + STR_LEN(scan);
-
-        int total_count_delta = 0;  /* Total delta number of characters that
-                                       multi-char folds expand to */
-
-        /* One pass is made over the node's string looking for all the
-         * possibilities.  To avoid some tests in the loop, there are two main
-         * cases, for UTF-8 patterns (which can't have EXACTF nodes) and
-         * non-UTF-8 */
-        if (UTF) {
-            U8* folded = NULL;
-
-            if (OP(scan) == EXACTFL) {
-                U8 *d;
-
-                /* An EXACTFL node would already have been changed to another
-                 * node type unless there is at least one character in it that
-                 * is problematic; likely a character whose fold definition
-                 * won't be known until runtime, and so has yet to be folded.
-                 * For all but the UTF-8 locale, folds are 1-1 in length, but
-                 * to handle the UTF-8 case, we need to create a temporary
-                 * folded copy using UTF-8 locale rules in order to analyze it.
-                 * This is because our macros that look to see if a sequence is
-                 * a multi-char fold assume everything is folded (otherwise the
-                 * tests in those macros would be too complicated and slow).
-                 * Note that here, the non-problematic folds will have already
-                 * been done, so we can just copy such characters.  We actually
-                 * don't completely fold the EXACTFL string.  We skip the
-                 * unfolded multi-char folds, as that would just create work
-                 * below to figure out the size they already are */
-
-                Newx(folded, UTF8_MAX_FOLD_CHAR_EXPAND * STR_LEN(scan) + 1, U8);
-                d = folded;
-                while (s < s_end) {
-                    STRLEN s_len = UTF8SKIP(s);
-                    if (! is_PROBLEMATIC_LOCALE_FOLD_utf8(s)) {
-                        Copy(s, d, s_len, U8);
-                        d += s_len;
-                    }
-                    else if (is_FOLDS_TO_MULTI_utf8(s)) {
-                        *unfolded_multi_char = TRUE;
-                        Copy(s, d, s_len, U8);
-                        d += s_len;
-                    }
-                    else if (isASCII(*s)) {
-                        *(d++) = toFOLD(*s);
-                    }
-                    else {
-                        STRLEN len;
-                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
-                        d += len;
-                    }
-                    s += s_len;
-                }
-
-                /* Point the remainder of the routine to look at our temporary
-                 * folded copy */
-                s = folded;
-                s_end = d;
-            } /* End of creating folded copy of EXACTFL string */
-
-            /* Examine the string for a multi-character fold sequence.  UTF-8
-             * patterns have all characters pre-folded by the time this code is
-             * executed */
-            while (s < s_end - 1) /* Can stop 1 before the end, as minimum
-                                     length sequence we are looking for is 2 */
-            {
-                int count = 0;  /* How many characters in a multi-char fold */
-                int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
-                if (! len) {    /* Not a multi-char fold: get next char */
-                    s += UTF8SKIP(s);
-                    continue;
-                }
-
-                { /* Here is a generic multi-char fold. */
-                    U8* multi_end  = s + len;
-
-                    /* Count how many characters are in it.  In the case of
-                     * /aa, no folds which contain ASCII code points are
-                     * allowed, so check for those, and skip if found. */
-                    if (OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
-                        count = utf8_length(s, multi_end);
-                        s = multi_end;
-                    }
-                    else {
-                        while (s < multi_end) {
-                            if (isASCII(*s)) {
-                                s++;
-                                goto next_iteration;
-                            }
-                            else {
-                                s += UTF8SKIP(s);
-                            }
-                            count++;
-                        }
-                    }
-                }
-
-                /* The delta is how long the sequence is minus 1 (1 is how long
-                 * the character that folds to the sequence is) */
-                total_count_delta += count - 1;
-              next_iteration: ;
-            }
-
-            /* We created a temporary folded copy of the string in EXACTFL
-             * nodes.  Therefore we need to be sure it doesn't go below zero,
-             * as the real string could be shorter */
-            if (OP(scan) == EXACTFL) {
-                int total_chars = utf8_length((U8*) STRING(scan),
-                                           (U8*) STRING(scan) + STR_LEN(scan));
-                if (total_count_delta > total_chars) {
-                    total_count_delta = total_chars;
-                }
-            }
-
-            *min_subtract += total_count_delta;
-            Safefree(folded);
-        }
-        else if (OP(scan) == EXACTFAA) {
-
-            /* Non-UTF-8 pattern, EXACTFAA node.  There can't be a multi-char
-             * fold to the ASCII range (and there are no existing ones in the
-             * upper latin1 range).  But, as outlined in the comments preceding
-             * this function, we need to flag any occurrences of the sharp s.
-             * This character forbids trie formation (because of added
-             * complexity) */
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-            while (s < s_end) {
-                if (*s == LATIN_SMALL_LETTER_SHARP_S) {
-                    OP(scan) = EXACTFAA_NO_TRIE;
-                    *unfolded_multi_char = TRUE;
-                    break;
-                }
-                s++;
-            }
-        }
-        else if (OP(scan) != EXACTFAA_NO_TRIE) {
-
-            /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
-             * folds that are all Latin1.  As explained in the comments
-             * preceding this function, we look also for the sharp s in EXACTF
-             * and EXACTFL nodes; it can be in the final position.  Otherwise
-             * we can stop looking 1 byte earlier because have to find at least
-             * two characters for a multi-fold */
-            const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
-                              ? s_end
-                              : s_end -1;
-
-            while (s < upper) {
-                int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
-                if (! len) {    /* Not a multi-char fold. */
-                    if (*s == LATIN_SMALL_LETTER_SHARP_S
-                        && (OP(scan) == EXACTF || OP(scan) == EXACTFL))
-                    {
-                        *unfolded_multi_char = TRUE;
-                    }
-                    s++;
-                    continue;
-                }
-
-                if (len == 2
-                    && isALPHA_FOLD_EQ(*s, 's')
-                    && isALPHA_FOLD_EQ(*(s+1), 's'))
-                {
-
-                    /* EXACTF nodes need to know that the minimum length
-                     * changed so that a sharp s in the string can match this
-                     * ss in the pattern, but they remain EXACTF nodes, as they
-                     * won't match this unless the target string is in UTF-8,
-                     * which we don't know until runtime.  EXACTFL nodes can't
-                     * transform into EXACTFU nodes */
-                    if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
-                        OP(scan) = EXACTFUP;
-                    }
-                }
-
-                *min_subtract += len - 1;
-                s += len;
-            }
-#endif
-        }
-    }
-
-#ifdef DEBUGGING
-    /* Allow dumping but overwriting the collection of skipped
-     * ops and/or strings with fake optimized ops */
-    n = REGNODE_AFTER_varies(scan);
-    while (n <= stop) {
-        OP(n) = OPTIMIZED;
-        FLAGS(n) = 0;
-        NEXT_OFF(n) = 0;
-        n++;
-    }
-#endif
-    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl", scan, depth, 0);});
-    return stopnow;
-}
-
-/* REx optimizer.  Converts nodes into quicker variants "in place".
-   Finds fixed substrings.  */
-
-/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
-   to the position after last scanned or to NULL. */
-
-#define INIT_AND_WITHP \
-    assert(!and_withp); \
-    Newx(and_withp, 1, regnode_ssc); \
-    SAVEFREEPV(and_withp)
-
-
-static void
-S_unwind_scan_frames(pTHX_ const void *p)
-{
-    scan_frame *f= (scan_frame *)p;
-    do {
-        scan_frame *n= f->next_frame;
-        Safefree(f);
-        f= n;
-    } while (f);
-}
-
-/* Follow the next-chain of the current node and optimize away
-   all the NOTHINGs from it.
- */
-STATIC void
-S_rck_elide_nothing(pTHX_ regnode *node)
-{
-    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
-
-    if (OP(node) != CURLYX) {
-        const int max = (REGNODE_OFF_BY_ARG(OP(node))
-                        ? I32_MAX
-                          /* I32 may be smaller than U16 on CRAYs! */
-                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-        int off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG(node) : NEXT_OFF(node));
-        int noff;
-        regnode *n = node;
-
-        /* Skip NOTHING and LONGJMP. */
-        while (
-            (n = regnext(n))
-            && (
-                (REGNODE_TYPE(OP(n)) == NOTHING && (noff = NEXT_OFF(n)))
-                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
-            )
-            && off + noff < max
-        ) {
-            off += noff;
-        }
-        if (REGNODE_OFF_BY_ARG(OP(node)))
-            ARG(node) = off;
-        else
-            NEXT_OFF(node) = off;
-    }
-    return;
-}
-
-/* the return from this sub is the minimum length that could possibly match */
-STATIC SSize_t
-S_study_chunk(pTHX_
-    RExC_state_t *pRExC_state,
-    regnode **scanp,        /* Start here (read-write). */
-    SSize_t *minlenp,       /* used for the minlen of substrings? */
-    SSize_t *deltap,        /* Write maxlen-minlen here. */
-    regnode *last,          /* Stop before this one. */
-    scan_data_t *data,      /* string data about the pattern */
-    I32 stopparen,          /* treat CLOSE-N as END, see GOSUB */
-    U32 recursed_depth,     /* how deep have we recursed via GOSUB */
-    regnode_ssc *and_withp, /* Valid if flags & SCF_DO_STCLASS_OR */
-    U32 flags,              /* flags controlling this call, see SCF_ flags */
-    U32 depth,              /* how deep have we recursed period */
-    bool was_mutate_ok      /* TRUE if in-place optimizations are allowed.
-                               FALSE only if the caller (recursively) was
-                               prohibited from modifying the regops, because
-                               a higher caller is holding a ptr to them. */
-)
-{
-    /* vars about the regnodes we are working with */
-    regnode *scan = *scanp; /* the current opcode we are inspecting */
-    regnode *next = NULL;   /* the next opcode beyond scan, tmp var */
-    regnode *first_non_open = scan; /* FIXME: should this init to NULL?
-                                       the first non open regop, if the init
-                                       val IS an OPEN then we will skip past
-                                       it just after the var decls section */
-    I32 code = 0;           /* temp var used to hold the optype of a regop */
-
-    /* vars about the min and max length of the pattern */
-    SSize_t min = 0;    /* min length of this part of the pattern */
-    SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
-                                         this is adjusted down if we find
-                                         an ACCEPT */
-    SSize_t delta = 0;  /* difference between min and max length
-                           (not accounting for stopmin) */
-
-    /* vars about capture buffers in the pattern */
-    I32 pars = 0;       /* count of OPEN opcodes */
-    I32 is_par = OP(scan) == OPEN ? PARNO(scan) : 0; /* is this op an OPEN? */
-
-    /* vars about whether this pattern contains something that can match
-     * infinitely long strings, eg, X* or X+ */
-    int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
-    int is_inf_internal = 0;            /* The studied chunk is infinite */
-
-    /* scan_data_t (struct) is used to hold information about the substrings
-     * and start class we have extracted from the string */
-    scan_data_t data_fake; /* temp var used for recursing in some cases */
-
-    SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
-                                   trie optimizations */
-
-    scan_frame *frame = NULL;  /* used as part of fake recursion */
-
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_STUDY_CHUNK;
-    RExC_study_started= 1;
-
-    Zero(&data_fake, 1, scan_data_t);
-
-    if ( depth == 0 ) {
-        while (first_non_open && OP(first_non_open) == OPEN)
-            first_non_open=regnext(first_non_open);
-    }
-
-  fake_study_recurse:
-    DEBUG_r(
-        RExC_study_chunk_recursed_count++;
-    );
-    DEBUG_OPTIMISE_MORE_r(
-    {
-        Perl_re_indentf( aTHX_  "study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
-            depth, (long)stopparen,
-            (unsigned long)RExC_study_chunk_recursed_count,
-            (unsigned long)depth, (unsigned long)recursed_depth,
-            scan,
-            last);
-        if (recursed_depth) {
-            U32 i;
-            U32 j;
-            for ( j = 0 ; j < recursed_depth ; j++ ) {
-                for ( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
-                    if (PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
-                        Perl_re_printf( aTHX_ " %d",(int)i);
-                        break;
-                    }
-                }
-                if ( j + 1 < recursed_depth ) {
-                    Perl_re_printf( aTHX_  ",");
-                }
-            }
-        }
-        Perl_re_printf( aTHX_ "\n");
-    }
-    );
-    while ( scan && OP(scan) != END && scan < last ){
-        UV min_subtract = 0;    /* How mmany chars to subtract from the minimum
-                                   node length to get a real minimum (because
-                                   the folded version may be shorter) */
-        bool unfolded_multi_char = FALSE;
-        /* avoid mutating ops if we are anywhere within the recursed or
-         * enframed handling for a GOSUB: the outermost level will handle it.
-         */
-        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
-        /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
-        DEBUG_PEEP("Peep", scan, depth, flags);
-
-
-        /* The reason we do this here is that we need to deal with things like
-         * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT
-         * parsing code, as each (?:..) is handled by a different invocation of
-         * reg() -- Yves
-         */
-        if (REGNODE_TYPE(OP(scan)) == EXACT
-            && OP(scan) != LEXACT
-            && OP(scan) != LEXACT_REQ8
-            && mutate_ok
-        ) {
-            join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
-                    0, NULL, depth + 1);
-        }
-
-        /* Follow the next-chain of the current node and optimize
-           away all the NOTHINGs from it.
-         */
-        rck_elide_nothing(scan);
-
-        /* The principal pseudo-switch.  Cannot be a switch, since we look into
-         * several different things.  */
-        if ( OP(scan) == DEFINEP ) {
-            SSize_t minlen = 0;
-            SSize_t deltanext = 0;
-            SSize_t fake_last_close = 0;
-            regnode *fake_last_close_op = NULL;
-            U32 f = SCF_IN_DEFINE | (flags & SCF_TRIE_DOING_RESTUDY);
-
-            StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-            scan = regnext(scan);
-            assert( OP(scan) == IFTHEN );
-            DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
-
-            data_fake.last_closep= &fake_last_close;
-            data_fake.last_close_opp= &fake_last_close_op;
-            minlen = *minlenp;
-            next = regnext(scan);
-            scan = REGNODE_AFTER_type(scan,tregnode_IFTHEN);
-            DEBUG_PEEP("scan", scan, depth, flags);
-            DEBUG_PEEP("next", next, depth, flags);
-
-            /* we suppose the run is continuous, last=next...
-             * NOTE we dont use the return here! */
-            /* DEFINEP study_chunk() recursion */
-            (void)study_chunk(pRExC_state, &scan, &minlen,
-                              &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1, mutate_ok);
-
-            scan = next;
-        } else
-        if (
-            OP(scan) == BRANCH  ||
-            OP(scan) == BRANCHJ ||
-            OP(scan) == IFTHEN
-        ) {
-            next = regnext(scan);
-            code = OP(scan);
-
-            /* The op(next)==code check below is to see if we
-             * have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN"
-             * IFTHEN is special as it might not appear in pairs.
-             * Not sure whether BRANCH-BRANCHJ is possible, regardless
-             * we dont handle it cleanly. */
-            if (OP(next) == code || code == IFTHEN) {
-                /* NOTE - There is similar code to this block below for
-                 * handling TRIE nodes on a re-study.  If you change stuff here
-                 * check there too. */
-                SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
-                regnode_ssc accum;
-                regnode * const startbranch=scan;
-
-                if (flags & SCF_DO_SUBSTR) {
-                    /* Cannot merge strings after this. */
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                }
-
-                if (flags & SCF_DO_STCLASS)
-                    ssc_init_zero(pRExC_state, &accum);
-
-                while (OP(scan) == code) {
-                    SSize_t deltanext, minnext, fake_last_close = 0;
-                    regnode *fake_last_close_op = NULL;
-                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                    regnode_ssc this_class;
-
-                    DEBUG_PEEP("Branch", scan, depth, flags);
-
-                    num++;
-                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                    if (data) {
-                        data_fake.whilem_c = data->whilem_c;
-                        data_fake.last_closep = data->last_closep;
-                        data_fake.last_close_opp = data->last_close_opp;
-                    }
-                    else {
-                        data_fake.last_closep = &fake_last_close;
-                        data_fake.last_close_opp = &fake_last_close_op;
-                    }
-
-                    data_fake.pos_delta = delta;
-                    next = regnext(scan);
-
-                    scan = REGNODE_AFTER_opcode(scan, code);
-
-                    if (flags & SCF_DO_STCLASS) {
-                        ssc_init(pRExC_state, &this_class);
-                        data_fake.start_class = &this_class;
-                        f |= SCF_DO_STCLASS_AND;
-                    }
-                    if (flags & SCF_WHILEM_VISITED_POS)
-                        f |= SCF_WHILEM_VISITED_POS;
-
-                    /* we suppose the run is continuous, last=next...*/
-                    /* recurse study_chunk() for each BRANCH in an alternation */
-                    minnext = study_chunk(pRExC_state, &scan, minlenp,
-                                      &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1,
-                                      mutate_ok);
-
-                    if (min1 > minnext)
-                        min1 = minnext;
-                    if (deltanext == OPTIMIZE_INFTY) {
-                        is_inf = is_inf_internal = 1;
-                        max1 = OPTIMIZE_INFTY;
-                    } else if (max1 < minnext + deltanext)
-                        max1 = minnext + deltanext;
-                    scan = next;
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
-                        if ( stopmin > minnext)
-                            stopmin = min + min1;
-                        flags &= ~SCF_DO_SUBSTR;
-                        if (data)
-                            data->flags |= SCF_SEEN_ACCEPT;
-                    }
-                    if (data) {
-                        if (data_fake.flags & SF_HAS_EVAL)
-                            data->flags |= SF_HAS_EVAL;
-                        data->whilem_c = data_fake.whilem_c;
-                    }
-                    if (flags & SCF_DO_STCLASS)
-                        ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
-                    DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
-                }
-                if (code == IFTHEN && num < 2) /* Empty ELSE branch */
-                    min1 = 0;
-                if (flags & SCF_DO_SUBSTR) {
-                    data->pos_min += min1;
-                    if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
-                        data->pos_delta = OPTIMIZE_INFTY;
-                    else
-                        data->pos_delta += max1 - min1;
-                    if (max1 != min1 || is_inf)
-                        data->cur_is_floating = 1;
-                }
-                min += min1;
-                if (delta == OPTIMIZE_INFTY
-                 || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
-                    delta = OPTIMIZE_INFTY;
-                else
-                    delta += max1 - min1;
-                if (flags & SCF_DO_STCLASS_OR) {
-                    ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
-                    if (min1) {
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                        flags &= ~SCF_DO_STCLASS;
-                    }
-                }
-                else if (flags & SCF_DO_STCLASS_AND) {
-                    if (min1) {
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                        flags &= ~SCF_DO_STCLASS;
-                    }
-                    else {
-                        /* Switch to OR mode: cache the old value of
-                         * data->start_class */
-                        INIT_AND_WITHP;
-                        StructCopy(data->start_class, and_withp, regnode_ssc);
-                        flags &= ~SCF_DO_STCLASS_AND;
-                        StructCopy(&accum, data->start_class, regnode_ssc);
-                        flags |= SCF_DO_STCLASS_OR;
-                    }
-                }
-                DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
-
-                if (PERL_ENABLE_TRIE_OPTIMISATION
-                    && OP(startbranch) == BRANCH
-                    && mutate_ok
-                ) {
-                /* demq.
-
-                   Assuming this was/is a branch we are dealing with: 'scan'
-                   now points at the item that follows the branch sequence,
-                   whatever it is. We now start at the beginning of the
-                   sequence and look for subsequences of
-
-                   BRANCH->EXACT=>x1
-                   BRANCH->EXACT=>x2
-                   tail
-
-                   which would be constructed from a pattern like
-                   /A|LIST|OF|WORDS/
-
-                   If we can find such a subsequence we need to turn the first
-                   element into a trie and then add the subsequent branch exact
-                   strings to the trie.
-
-                   We have two cases
-
-                     1. patterns where the whole set of branches can be
-                        converted.
-
-                     2. patterns where only a subset can be converted.
-
-                   In case 1 we can replace the whole set with a single regop
-                   for the trie. In case 2 we need to keep the start and end
-                   branches so
-
-                     'BRANCH EXACT; BRANCH EXACT; BRANCH X'
-                     becomes BRANCH TRIE; BRANCH X;
-
-                  There is an additional case, that being where there is a
-                  common prefix, which gets split out into an EXACT like node
-                  preceding the TRIE node.
-
-                  If X(1..n)==tail then we can do a simple trie, if not we make
-                  a "jump" trie, such that when we match the appropriate word
-                  we "jump" to the appropriate tail node. Essentially we turn
-                  a nested if into a case structure of sorts.
-
-                */
-
-                    int made=0;
-                    if (!re_trie_maxbuff) {
-                        re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
-                        if (!SvIOK(re_trie_maxbuff))
-                            sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
-                    }
-                    if ( SvIV(re_trie_maxbuff)>=0  ) {
-                        regnode *cur;
-                        regnode *first = (regnode *)NULL;
-                        regnode *prev = (regnode *)NULL;
-                        regnode *tail = scan;
-                        U8 trietype = 0;
-                        U32 count=0;
-
-                        /* var tail is used because there may be a TAIL
-                           regop in the way. Ie, the exacts will point to the
-                           thing following the TAIL, but the last branch will
-                           point at the TAIL. So we advance tail. If we
-                           have nested (?:) we may have to move through several
-                           tails.
-                         */
-
-                        while ( OP( tail ) == TAIL ) {
-                            /* this is the TAIL generated by (?:) */
-                            tail = regnext( tail );
-                        }
-
-
-                        DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
-                              depth+1,
-                              "Looking for TRIE'able sequences. Tail node is ",
-                              (UV) REGNODE_OFFSET(tail),
-                              SvPV_nolen_const( RExC_mysv )
-                            );
-                        });
-
-                        /*
-
-                            Step through the branches
-                                cur represents each branch,
-                                noper is the first thing to be matched as part
-                                      of that branch
-                                noper_next is the regnext() of that node.
-
-                            We normally handle a case like this
-                            /FOO[xyz]|BAR[pqr]/ via a "jump trie" but we also
-                            support building with NOJUMPTRIE, which restricts
-                            the trie logic to structures like /FOO|BAR/.
-
-                            If noper is a trieable nodetype then the branch is
-                            a possible optimization target. If we are building
-                            under NOJUMPTRIE then we require that noper_next is
-                            the same as scan (our current position in the regex
-                            program).
-
-                            Once we have two or more consecutive such branches
-                            we can create a trie of the EXACT's contents and
-                            stitch it in place into the program.
-
-                            If the sequence represents all of the branches in
-                            the alternation we replace the entire thing with a
-                            single TRIE node.
-
-                            Otherwise when it is a subsequence we need to
-                            stitch it in place and replace only the relevant
-                            branches. This means the first branch has to remain
-                            as it is used by the alternation logic, and its
-                            next pointer, and needs to be repointed at the item
-                            on the branch chain following the last branch we
-                            have optimized away.
-
-                            This could be either a BRANCH, in which case the
-                            subsequence is internal, or it could be the item
-                            following the branch sequence in which case the
-                            subsequence is at the end (which does not
-                            necessarily mean the first node is the start of the
-                            alternation).
-
-                            TRIE_TYPE(X) is a define which maps the optype to a
-                            trietype.
-
-                                optype          |  trietype
-                                ----------------+-----------
-                                NOTHING         | NOTHING
-                                EXACT           | EXACT
-                                EXACT_REQ8      | EXACT
-                                EXACTFU         | EXACTFU
-                                EXACTFU_REQ8    | EXACTFU
-                                EXACTFUP        | EXACTFU
-                                EXACTFAA        | EXACTFAA
-                                EXACTL          | EXACTL
-                                EXACTFLU8       | EXACTFLU8
-
-
-                        */
-#define TRIE_TYPE(X) ( ( NOTHING == (X) )                                   \
-                       ? NOTHING                                            \
-                       : ( EXACT == (X) || EXACT_REQ8 == (X) )             \
-                         ? EXACT                                            \
-                         : (     EXACTFU == (X)                             \
-                              || EXACTFU_REQ8 == (X)                       \
-                              || EXACTFUP == (X) )                          \
-                           ? EXACTFU                                        \
-                           : ( EXACTFAA == (X) )                            \
-                             ? EXACTFAA                                     \
-                             : ( EXACTL == (X) )                            \
-                               ? EXACTL                                     \
-                               : ( EXACTFLU8 == (X) )                       \
-                                 ? EXACTFLU8                                \
-                                 : 0 )
-
-                        /* dont use tail as the end marker for this traverse */
-                        for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
-                            regnode * const noper = REGNODE_AFTER( cur );
-                            U8 noper_type = OP( noper );
-                            U8 noper_trietype = TRIE_TYPE( noper_type );
-#if defined(DEBUGGING) || defined(NOJUMPTRIE)
-                            regnode * const noper_next = regnext( noper );
-                            U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
-                            U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
-#endif
-
-                            DEBUG_TRIE_COMPILE_r({
-                                regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                Perl_re_indentf( aTHX_  "- %d:%s (%d)",
-                                   depth+1,
-                                   REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
-
-                                regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
-                                Perl_re_printf( aTHX_  " -> %d:%s",
-                                    REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
-
-                                if ( noper_next ) {
-                                  regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
-                                  Perl_re_printf( aTHX_ "\t=> %d:%s\t",
-                                    REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
-                                }
-                                Perl_re_printf( aTHX_  "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
-                                   REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
-                                   REGNODE_NAME(trietype), REGNODE_NAME(noper_trietype), REGNODE_NAME(noper_next_trietype)
-                                );
-                            });
-
-                            /* Is noper a trieable nodetype that can be merged
-                             * with the current trie (if there is one)? */
-                            if ( noper_trietype
-                                  &&
-                                  (
-                                        ( noper_trietype == NOTHING )
-                                        || ( trietype == NOTHING )
-                                        || ( trietype == noper_trietype )
-                                  )
-#ifdef NOJUMPTRIE
-                                  && noper_next >= tail
-#endif
-                                  && count < U16_MAX)
-                            {
-                                /* Handle mergable triable node Either we are
-                                 * the first node in a new trieable sequence,
-                                 * in which case we do some bookkeeping,
-                                 * otherwise we update the end pointer. */
-                                if ( !first ) {
-                                    first = cur;
-                                    if ( noper_trietype == NOTHING ) {
-#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
-                                        regnode * const noper_next = regnext( noper );
-                                        U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
-                                        U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
-#endif
-
-                                        if ( noper_next_trietype ) {
-                                            trietype = noper_next_trietype;
-                                        } else if (noper_next_type)  {
-                                            /* a NOTHING regop is 1 regop wide.
-                                             * We need at least two for a trie
-                                             * so we can't merge this in */
-                                            first = NULL;
-                                        }
-                                    } else {
-                                        trietype = noper_trietype;
-                                    }
-                                } else {
-                                    if ( trietype == NOTHING )
-                                        trietype = noper_trietype;
-                                    prev = cur;
-                                }
-                                if (first)
-                                    count++;
-                            } /* end handle mergable triable node */
-                            else {
-                                /* handle unmergable node -
-                                 * noper may either be a triable node which can
-                                 * not be tried together with the current trie,
-                                 * or a non triable node */
-                                if ( prev ) {
-                                    /* If last is set and trietype is not
-                                     * NOTHING then we have found at least two
-                                     * triable branch sequences in a row of a
-                                     * similar trietype so we can turn them
-                                     * into a trie. If/when we allow NOTHING to
-                                     * start a trie sequence this condition
-                                     * will be required, and it isn't expensive
-                                     * so we leave it in for now. */
-                                    if ( trietype && trietype != NOTHING )
-                                        make_trie( pRExC_state,
-                                                startbranch, first, cur, tail,
-                                                count, trietype, depth+1 );
-                                    prev = NULL; /* note: we clear/update
-                                                    first, trietype etc below,
-                                                    so we dont do it here */
-                                }
-                                if ( noper_trietype
-#ifdef NOJUMPTRIE
-                                     && noper_next >= tail
-#endif
-                                ){
-                                    /* noper is triable, so we can start a new
-                                     * trie sequence */
-                                    count = 1;
-                                    first = cur;
-                                    trietype = noper_trietype;
-                                } else if (first) {
-                                    /* if we already saw a first but the
-                                     * current node is not triable then we have
-                                     * to reset the first information. */
-                                    count = 0;
-                                    first = NULL;
-                                    trietype = 0;
-                                }
-                            } /* end handle unmergable node */
-                        } /* loop over branches */
-                        DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
-                              depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
-                            Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
-                               REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
-                               REGNODE_NAME(trietype)
-                            );
-
-                        });
-                        if ( prev && trietype ) {
-                            if ( trietype != NOTHING ) {
-                                /* the last branch of the sequence was part of
-                                 * a trie, so we have to construct it here
-                                 * outside of the loop */
-                                made= make_trie( pRExC_state, startbranch,
-                                                 first, scan, tail, count,
-                                                 trietype, depth+1 );
-#ifdef TRIE_STUDY_OPT
-                                if ( ((made == MADE_EXACT_TRIE &&
-                                     startbranch == first)
-                                     || ( first_non_open == first )) &&
-                                     depth==0 ) {
-                                    flags |= SCF_TRIE_RESTUDY;
-                                    if ( startbranch == first
-                                         && scan >= tail )
-                                    {
-                                        RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN;
-                                    }
-                                }
-#endif
-                            } else {
-                                /* at this point we know whatever we have is a
-                                 * NOTHING sequence/branch AND if 'startbranch'
-                                 * is 'first' then we can turn the whole thing
-                                 * into a NOTHING
-                                 */
-                                if ( startbranch == first ) {
-                                    regnode *opt;
-                                    /* the entire thing is a NOTHING sequence,
-                                     * something like this: (?:|) So we can
-                                     * turn it into a plain NOTHING op. */
-                                    DEBUG_TRIE_COMPILE_r({
-                                        regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                        Perl_re_indentf( aTHX_  "- %s (%d) <NOTHING BRANCH SEQUENCE>\n",
-                                          depth+1,
-                                          SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
-
-                                    });
-                                    OP(startbranch)= NOTHING;
-                                    NEXT_OFF(startbranch)= tail - startbranch;
-                                    for ( opt= startbranch + 1; opt < tail ; opt++ )
-                                        OP(opt)= OPTIMIZED;
-                                }
-                            }
-                        } /* end if ( prev) */
-                    } /* TRIE_MAXBUF is non zero */
-                } /* do trie */
-                DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
-            }
-            else
-                scan = REGNODE_AFTER_opcode(scan,code);
-            continue;
-        } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
-            I32 paren = 0;
-            regnode *start = NULL;
-            regnode *end = NULL;
-            U32 my_recursed_depth= recursed_depth;
-
-            if (OP(scan) != SUSPEND) { /* GOSUB */
-                /* Do setup, note this code has side effects beyond
-                 * the rest of this block. Specifically setting
-                 * RExC_recurse[] must happen at least once during
-                 * study_chunk(). */
-                paren = ARG(scan);
-                RExC_recurse[ARG2L(scan)] = scan;
-                start = REGNODE_p(RExC_open_parens[paren]);
-                end   = REGNODE_p(RExC_close_parens[paren]);
-
-                /* NOTE we MUST always execute the above code, even
-                 * if we do nothing with a GOSUB */
-                if (
-                    ( flags & SCF_IN_DEFINE )
-                    ||
-                    (
-                        (is_inf_internal || is_inf || (data && data->flags & SF_IS_INF))
-                        &&
-                        ( (flags & (SCF_DO_STCLASS | SCF_DO_SUBSTR)) == 0 )
-                    )
-                ) {
-                    /* no need to do anything here if we are in a define. */
-                    /* or we are after some kind of infinite construct
-                     * so we can skip recursing into this item.
-                     * Since it is infinite we will not change the maxlen
-                     * or delta, and if we miss something that might raise
-                     * the minlen it will merely pessimise a little.
-                     *
-                     * Iow /(?(DEFINE)(?<foo>foo|food))a+(?&foo)/
-                     * might result in a minlen of 1 and not of 4,
-                     * but this doesn't make us mismatch, just try a bit
-                     * harder than we should.
-                     *
-                     * However we must assume this GOSUB is infinite, to
-                     * avoid wrongly applying other optimizations in the
-                     * enclosing scope - see GH 18096, for example.
-                     */
-                    is_inf = is_inf_internal = 1;
-                    scan= regnext(scan);
-                    continue;
-                }
-
-                if (
-                    !recursed_depth
-                    || !PAREN_TEST(recursed_depth - 1, paren)
-                ) {
-                    /* it is quite possible that there are more efficient ways
-                     * to do this. We maintain a bitmap per level of recursion
-                     * of which patterns we have entered so we can detect if a
-                     * pattern creates a possible infinite loop. When we
-                     * recurse down a level we copy the previous levels bitmap
-                     * down. When we are at recursion level 0 we zero the top
-                     * level bitmap. It would be nice to implement a different
-                     * more efficient way of doing this. In particular the top
-                     * level bitmap may be unnecessary.
-                     */
-                    if (!recursed_depth) {
-                        Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
-                    } else {
-                        Copy(PAREN_OFFSET(recursed_depth - 1),
-                             PAREN_OFFSET(recursed_depth),
-                             RExC_study_chunk_recursed_bytes, U8);
-                    }
-                    /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
-                    PAREN_SET(recursed_depth, paren);
-                    my_recursed_depth= recursed_depth + 1;
-                } else {
-                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
-                    /* some form of infinite recursion, assume infinite length
-                     * */
-                    if (flags & SCF_DO_SUBSTR) {
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        data->cur_is_floating = 1;
-                    }
-                    is_inf = is_inf_internal = 1;
-                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                        ssc_anything(data->start_class);
-                    flags &= ~SCF_DO_STCLASS;
-
-                    start= NULL; /* reset start so we dont recurse later on. */
-                }
-            } else {
-                paren = stopparen;
-                start = scan + 2;
-                end = regnext(scan);
-            }
-            if (start) {
-                scan_frame *newframe;
-                assert(end);
-                if (!RExC_frame_last) {
-                    Newxz(newframe, 1, scan_frame);
-                    SAVEDESTRUCTOR_X(S_unwind_scan_frames, newframe);
-                    RExC_frame_head= newframe;
-                    RExC_frame_count++;
-                } else if (!RExC_frame_last->next_frame) {
-                    Newxz(newframe, 1, scan_frame);
-                    RExC_frame_last->next_frame= newframe;
-                    newframe->prev_frame= RExC_frame_last;
-                    RExC_frame_count++;
-                } else {
-                    newframe= RExC_frame_last->next_frame;
-                }
-                RExC_frame_last= newframe;
-
-                newframe->next_regnode = regnext(scan);
-                newframe->last_regnode = last;
-                newframe->stopparen = stopparen;
-                newframe->prev_recursed_depth = recursed_depth;
-                newframe->this_prev_frame= frame;
-                newframe->in_gosub = (
-                    (frame && frame->in_gosub) || OP(scan) == GOSUB
-                );
-
-                DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
-                DEBUG_PEEP("fnew", scan, depth, flags);
-
-                frame = newframe;
-                scan =  start;
-                stopparen = paren;
-                last = end;
-                depth = depth + 1;
-                recursed_depth= my_recursed_depth;
-
-                continue;
-            }
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EXACT && ! isEXACTFish(OP(scan))) {
-            SSize_t bytelen = STR_LEN(scan), charlen;
-            UV uc;
-            assert(bytelen);
-            if (UTF) {
-                const U8 * const s = (U8*)STRING(scan);
-                uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
-                charlen = utf8_length(s, s + bytelen);
-            } else {
-                uc = *((U8*)STRING(scan));
-                charlen = bytelen;
-            }
-            min += charlen;
-            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
-                /* The code below prefers earlier match for fixed
-                   offset, later match for variable offset.  */
-                if (data->last_end == -1) { /* Update the start info. */
-                    data->last_start_min = data->pos_min;
-                    data->last_start_max =
-                        is_inf ? OPTIMIZE_INFTY
-                        : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
-                            ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
-                }
-                sv_catpvn(data->last_found, STRING(scan), bytelen);
-                if (UTF)
-                    SvUTF8_on(data->last_found);
-                {
-                    SV * const sv = data->last_found;
-                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
-                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
-                    if (mg && mg->mg_len >= 0)
-                        mg->mg_len += charlen;
-                }
-                data->last_end = data->pos_min + charlen;
-                data->pos_min += charlen; /* As in the first entry. */
-                data->flags &= ~SF_BEFORE_EOL;
-            }
-
-            /* ANDing the code point leaves at most it, and not in locale, and
-             * can't match null string */
-            if (flags & SCF_DO_STCLASS_AND) {
-                ssc_cp_and(data->start_class, uc);
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                ssc_clear_locale(data->start_class);
-            }
-            else if (flags & SCF_DO_STCLASS_OR) {
-                ssc_add_cp(data->start_class, uc);
-                ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-            }
-            flags &= ~SCF_DO_STCLASS;
-            DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EXACT) {
-            /* But OP != EXACT!, so is EXACTFish */
-            SSize_t bytelen = STR_LEN(scan), charlen;
-            const U8 * s = (U8*)STRING(scan);
-
-            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
-             * with the mask set to the complement of the bit that differs
-             * between upper and lower case, and the lowest code point of the
-             * pair (which the '&' forces) */
-            if (     bytelen == 1
-                &&   isALPHA_A(*s)
-                &&  (         OP(scan) == EXACTFAA
-                     || (     OP(scan) == EXACTFU
-                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
-                &&   mutate_ok
-            ) {
-                U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
-
-                OP(scan) = ANYOFM;
-                ARG_SET(scan, *s & mask);
-                FLAGS(scan) = mask;
-                /* We're not EXACTFish any more, so restudy.
-                 * Search for "restudy" in this file to find
-                 * a comment with details. */
-                continue;
-            }
-
-            /* Search for fixed substrings supports EXACT only. */
-            if (flags & SCF_DO_SUBSTR) {
-                assert(data);
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-            }
-            charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
-            if (unfolded_multi_char) {
-                RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
-            }
-            min += charlen - min_subtract;
-            assert (min >= 0);
-            if ((SSize_t)min_subtract < OPTIMIZE_INFTY
-                && delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
-            ) {
-                delta += min_subtract;
-            } else {
-                delta = OPTIMIZE_INFTY;
-            }
-            if (flags & SCF_DO_SUBSTR) {
-                data->pos_min += charlen - min_subtract;
-                if (data->pos_min < 0) {
-                    data->pos_min = 0;
-                }
-                if ((SSize_t)min_subtract < OPTIMIZE_INFTY
-                    && data->pos_delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
-                ) {
-                    data->pos_delta += min_subtract;
-                } else {
-                    data->pos_delta = OPTIMIZE_INFTY;
-                }
-                if (min_subtract) {
-                    data->cur_is_floating = 1; /* float */
-                }
-            }
-
-            if (flags & SCF_DO_STCLASS) {
-                SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
-
-                assert(EXACTF_invlist);
-                if (flags & SCF_DO_STCLASS_AND) {
-                    if (OP(scan) != EXACTFL)
-                        ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                    ANYOF_POSIXL_ZERO(data->start_class);
-                    ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
-                }
-                else {  /* SCF_DO_STCLASS_OR */
-                    ssc_union(data->start_class, EXACTF_invlist, FALSE);
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                    /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                flags &= ~SCF_DO_STCLASS;
-                SvREFCNT_dec(EXACTF_invlist);
-            }
-            DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
-        }
-        else if (REGNODE_VARIES(OP(scan))) {
-            SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
-            I32 fl = 0;
-            U32 f = flags;
-            regnode * const oscan = scan;
-            regnode_ssc this_class;
-            regnode_ssc *oclass = NULL;
-            I32 next_is_eval = 0;
-
-            switch (REGNODE_TYPE(OP(scan))) {
-            case WHILEM:                /* End of (?:...)* . */
-                scan = REGNODE_AFTER(scan);
-                goto finish;
-            case PLUS:
-                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
-                    next = REGNODE_AFTER(scan);
-                    if (   (     REGNODE_TYPE(OP(next)) == EXACT
-                            && ! isEXACTFish(OP(next)))
-                        || (flags & SCF_DO_STCLASS))
-                    {
-                        mincount = 1;
-                        maxcount = REG_INFTY;
-                        next = regnext(scan);
-                        scan = REGNODE_AFTER(scan);
-                        goto do_curly;
-                    }
-                }
-                if (flags & SCF_DO_SUBSTR)
-                    data->pos_min++;
-                /* This will bypass the formal 'min += minnext * mincount'
-                 * calculation in the do_curly path, so assumes min width
-                 * of the PLUS payload is exactly one. */
-                min++;
-                /* FALLTHROUGH */
-            case STAR:
-                next = REGNODE_AFTER(scan);
-
-                /* This temporary node can now be turned into EXACTFU, and
-                 * must, as regexec.c doesn't handle it */
-                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
-                    OP(next) = EXACTFU;
-                }
-
-                if (     STR_LEN(next) == 1
-                    &&   isALPHA_A(* STRING(next))
-                    && (         OP(next) == EXACTFAA
-                        || (     OP(next) == EXACTFU
-                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
-                    &&   mutate_ok
-                ) {
-                    /* These differ in just one bit */
-                    U8 mask = ~ ('A' ^ 'a');
-
-                    assert(isALPHA_A(* STRING(next)));
-
-                    /* Then replace it by an ANYOFM node, with
-                    * the mask set to the complement of the
-                    * bit that differs between upper and lower
-                    * case, and the lowest code point of the
-                    * pair (which the '&' forces) */
-                    OP(next) = ANYOFM;
-                    ARG_SET(next, *STRING(next) & mask);
-                    FLAGS(next) = mask;
-                }
-
-                if (flags & SCF_DO_STCLASS) {
-                    mincount = 0;
-                    maxcount = REG_INFTY;
-                    next = regnext(scan);
-                    scan = REGNODE_AFTER(scan);
-                    goto do_curly;
-                }
-                if (flags & SCF_DO_SUBSTR) {
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    /* Cannot extend fixed substrings */
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                scan = regnext(scan);
-                goto optimize_curly_tail;
-            case CURLY:
-                if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
-                    && (scan->flags == stopparen))
-                {
-                    mincount = 1;
-                    maxcount = 1;
-                } else {
-                    mincount = ARG1(scan);
-                    maxcount = ARG2(scan);
-                }
-                next = regnext(scan);
-                if (OP(scan) == CURLYX) {
-                    I32 lp = (data ? *(data->last_closep) : 0);
-                    scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
-                }
-                scan = REGNODE_AFTER(scan);
-                next_is_eval = (OP(scan) == EVAL);
-              do_curly:
-                if (flags & SCF_DO_SUBSTR) {
-                    if (mincount == 0)
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                    /* Cannot extend fixed substrings */
-                    pos_before = data->pos_min;
-                }
-                if (data) {
-                    fl = data->flags;
-                    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
-                    if (is_inf)
-                        data->flags |= SF_IS_INF;
-                }
-                if (flags & SCF_DO_STCLASS) {
-                    ssc_init(pRExC_state, &this_class);
-                    oclass = data->start_class;
-                    data->start_class = &this_class;
-                    f |= SCF_DO_STCLASS_AND;
-                    f &= ~SCF_DO_STCLASS_OR;
-                }
-                /* Exclude from super-linear cache processing any {n,m}
-                   regops for which the combination of input pos and regex
-                   pos is not enough information to determine if a match
-                   will be possible.
-
-                   For example, in the regex /foo(bar\s*){4,8}baz/ with the
-                   regex pos at the \s*, the prospects for a match depend not
-                   only on the input position but also on how many (bar\s*)
-                   repeats into the {4,8} we are. */
-               if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY))
-                    f &= ~SCF_WHILEM_VISITED_POS;
-
-                /* This will finish on WHILEM, setting scan, or on NULL: */
-                /* recurse study_chunk() on loop bodies */
-                minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
-                                  last, data, stopparen, recursed_depth, NULL,
-                                  (mincount == 0
-                                   ? (f & ~SCF_DO_SUBSTR)
-                                   : f)
-                                  , depth+1, mutate_ok);
-
-                if (data && data->flags & SCF_SEEN_ACCEPT) {
-                    if (mincount > 1)
-                        mincount = 1;
-                }
-
-                if (flags & SCF_DO_STCLASS)
-                    data->start_class = oclass;
-                if (mincount == 0 || minnext == 0) {
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        /* Switch to OR mode: cache the old value of
-                         * data->start_class */
-                        INIT_AND_WITHP;
-                        StructCopy(data->start_class, and_withp, regnode_ssc);
-                        flags &= ~SCF_DO_STCLASS_AND;
-                        StructCopy(&this_class, data->start_class, regnode_ssc);
-                        flags |= SCF_DO_STCLASS_OR;
-                        ANYOF_FLAGS(data->start_class)
-                                                |= SSC_MATCHES_EMPTY_STRING;
-                    }
-                } else {                /* Non-zero len */
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND)
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-                if (!scan)              /* It was not CURLYX, but CURLY. */
-                    scan = next;
-                if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
-                    /* ? quantifier ok, except for (?{ ... }) */
-                    && (next_is_eval || !(mincount == 0 && maxcount == 1))
-                    && (minnext == 0) && (deltanext == 0)
-                    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                    && maxcount <= REG_INFTY/3) /* Complement check for big
-                                                   count */
-                {
-                    _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
-                        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
-                            "Quantifier unexpected on zero-length expression "
-                            "in regex m/%" UTF8f "/",
-                             UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
-                                  RExC_precomp)));
-                }
-
-                if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
-                    || min >= SSize_t_MAX - minnext * mincount )
-                {
-                    FAIL("Regexp out of space");
-                }
-
-                min += minnext * mincount;
-                is_inf_internal |= deltanext == OPTIMIZE_INFTY
-                         || (maxcount == REG_INFTY && minnext + deltanext > 0);
-                is_inf |= is_inf_internal;
-                if (is_inf) {
-                    delta = OPTIMIZE_INFTY;
-                } else {
-                    delta += (minnext + deltanext) * maxcount
-                             - minnext * mincount;
-                }
-
-                if (data && data->flags & SCF_SEEN_ACCEPT) {
-                    if (flags & SCF_DO_SUBSTR) {
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        flags &= ~SCF_DO_SUBSTR;
-                    }
-                    if (stopmin > min)
-                        stopmin = min;
-                    DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
-                }
-                /* Try powerful optimization CURLYX => CURLYN. */
-                if (  OP(oscan) == CURLYX && data
-                      && data->flags & SF_IN_PAR
-                      && !(data->flags & SF_HAS_EVAL)
-                      && !deltanext && minnext == 1
-                      && mutate_ok
-                ) {
-                    /* Try to optimize to CURLYN.  */
-                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX);
-                    regnode * const nxt1 = nxt;
-#ifdef DEBUGGING
-                    regnode *nxt2;
-#endif
-
-                    /* Skip open. */
-                    nxt = regnext(nxt);
-                    if (!REGNODE_SIMPLE(OP(nxt))
-                        && !(REGNODE_TYPE(OP(nxt)) == EXACT
-                             && STR_LEN(nxt) == 1))
-                        goto nogo;
-#ifdef DEBUGGING
-                    nxt2 = nxt;
-#endif
-                    nxt = regnext(nxt);
-                    if (OP(nxt) != CLOSE)
-                        goto nogo;
-                    if (RExC_open_parens) {
-
-                        /*open->CURLYM*/
-                        RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
-
-                        /*close->while*/
-                        RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
-                    }
-                    /* Now we know that nxt2 is the only contents: */
-                    oscan->flags = (U8)PARNO(nxt);
-                    OP(oscan) = CURLYN;
-                    OP(nxt1) = NOTHING; /* was OPEN. */
-
-#ifdef DEBUGGING
-                    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
-                    NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
-                    NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
-                    OP(nxt) = OPTIMIZED;        /* was CLOSE. */
-                    OP(nxt + 1) = OPTIMIZED; /* was count. */
-                    NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
-#endif
-                }
-              nogo:
-
-                /* Try optimization CURLYX => CURLYM. */
-                if (  OP(oscan) == CURLYX && data
-                      && !(data->flags & SF_HAS_PAR)
-                      && !(data->flags & SF_HAS_EVAL)
-                      && !deltanext     /* atom is fixed width */
-                      && minnext != 0   /* CURLYM can't handle zero width */
-                         /* Nor characters whose fold at run-time may be
-                          * multi-character */
-                      && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
-                      && mutate_ok
-                ) {
-                    /* XXXX How to optimize if data == 0? */
-                    /* Optimize to a simpler form.  */
-                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX); /* OPEN */
-                    regnode *nxt2;
-
-                    OP(oscan) = CURLYM;
-                    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
-                            && (OP(nxt2) != WHILEM))
-                        nxt = nxt2;
-                    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
-                    /* Need to optimize away parenths. */
-                    if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
-                        /* Set the parenth number.  */
-                        /* note that we have changed the type of oscan to CURLYM here */
-                        regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/
-
-                        oscan->flags = (U8)PARNO(nxt);
-                        if (RExC_open_parens) {
-                             /*open->CURLYM*/
-                            RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
-
-                            /*close->NOTHING*/
-                            RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt2)
-                                                         + 1;
-                        }
-                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
-                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
-
-#ifdef DEBUGGING
-                        OP(nxt1 + 1) = OPTIMIZED; /* was count. */
-                        OP(nxt + 1) = OPTIMIZED; /* was count. */
-                        NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
-                        NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
-#endif
-#if 0
-                        while ( nxt1 && (OP(nxt1) != WHILEM)) {
-                            regnode *nnxt = regnext(nxt1);
-                            if (nnxt == nxt) {
-                                if (REGNODE_OFF_BY_ARG(OP(nxt1)))
-                                    ARG_SET(nxt1, nxt2 - nxt1);
-                                else if (nxt2 - nxt1 < U16_MAX)
-                                    NEXT_OFF(nxt1) = nxt2 - nxt1;
-                                else
-                                    OP(nxt) = NOTHING;  /* Cannot beautify */
-                            }
-                            nxt1 = nnxt;
-                        }
-#endif
-                        /* Optimize again: */
-                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
-                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
-                                    NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1, mutate_ok);
-                    }
-                    else
-                        oscan->flags = 0;
-                }
-                else if ((OP(oscan) == CURLYX)
-                         && (flags & SCF_WHILEM_VISITED_POS)
-                         /* See the comment on a similar expression above.
-                            However, this time it's not a subexpression
-                            we care about, but the expression itself. */
-                         && (maxcount == REG_INFTY)
-                         && data) {
-                    /* This stays as CURLYX, we can put the count/of pair. */
-                    /* Find WHILEM (as in regexec.c) */
-                    regnode *nxt = oscan + NEXT_OFF(oscan);
-
-                    if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
-                        nxt += ARG(nxt);
-                    nxt = REGNODE_BEFORE(nxt);
-                    if (nxt->flags & 0xf) {
-                        /* we've already set whilem count on this node */
-                    } else if (++data->whilem_c < 16) {
-                        assert(data->whilem_c <= RExC_whilem_seen);
-                        nxt->flags = (U8)(data->whilem_c
-                            | (RExC_whilem_seen << 4)); /* On WHILEM */
-                    }
-                }
-                if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
-                    pars++;
-                if (flags & SCF_DO_SUBSTR) {
-                    SV *last_str = NULL;
-                    STRLEN last_chrs = 0;
-                    int counted = mincount != 0;
-
-                    if (data->last_end > 0 && mincount != 0) { /* Ends with a
-                                                                  string. */
-                        SSize_t b = pos_before >= data->last_start_min
-                            ? pos_before : data->last_start_min;
-                        STRLEN l;
-                        const char * const s = SvPV_const(data->last_found, l);
-                        SSize_t old = b - data->last_start_min;
-                        assert(old >= 0);
-
-                        if (UTF)
-                            old = utf8_hop_forward((U8*)s, old,
-                                               (U8 *) SvEND(data->last_found))
-                                - (U8*)s;
-                        l -= old;
-                        /* Get the added string: */
-                        last_str = newSVpvn_utf8(s  + old, l, UTF);
-                        last_chrs = UTF ? utf8_length((U8*)(s + old),
-                                            (U8*)(s + old + l)) : l;
-                        if (deltanext == 0 && pos_before == b) {
-                            /* What was added is a constant string */
-                            if (mincount > 1) {
-
-                                SvGROW(last_str, (mincount * l) + 1);
-                                repeatcpy(SvPVX(last_str) + l,
-                                          SvPVX_const(last_str), l,
-                                          mincount - 1);
-                                SvCUR_set(last_str, SvCUR(last_str) * mincount);
-                                /* Add additional parts. */
-                                SvCUR_set(data->last_found,
-                                          SvCUR(data->last_found) - l);
-                                sv_catsv(data->last_found, last_str);
-                                {
-                                    SV * sv = data->last_found;
-                                    MAGIC *mg =
-                                        SvUTF8(sv) && SvMAGICAL(sv) ?
-                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
-                                    if (mg && mg->mg_len >= 0)
-                                        mg->mg_len += last_chrs * (mincount-1);
-                                }
-                                last_chrs *= mincount;
-                                data->last_end += l * (mincount - 1);
-                            }
-                        } else {
-                            /* start offset must point into the last copy */
-                            data->last_start_min += minnext * (mincount - 1);
-                            data->last_start_max =
-                              is_inf
-                               ? OPTIMIZE_INFTY
-                               : data->last_start_max +
-                                 (maxcount - 1) * (minnext + data->pos_delta);
-                        }
-                    }
-                    /* It is counted once already... */
-                    data->pos_min += minnext * (mincount - counted);
-#if 0
-    Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
-                              " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
-                              " maxcount=%" UVuf " mincount=%" UVuf
-                              " data->pos_delta=%" UVuf "\n",
-        (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext,
-        (UV)maxcount, (UV)mincount, (UV)data->pos_delta);
-    if (deltanext != OPTIMIZE_INFTY)
-        Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
-            (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
-            - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
-#endif
-                    if (deltanext == OPTIMIZE_INFTY
-                        || data->pos_delta == OPTIMIZE_INFTY
-                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
-                        data->pos_delta = OPTIMIZE_INFTY;
-                    else
-                        data->pos_delta += - counted * deltanext +
-                        (minnext + deltanext) * maxcount - minnext * mincount;
-                    if (mincount != maxcount) {
-                         /* Cannot extend fixed substrings found inside
-                            the group.  */
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        if (mincount && last_str) {
-                            SV * const sv = data->last_found;
-                            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
-                                mg_find(sv, PERL_MAGIC_utf8) : NULL;
-
-                            if (mg)
-                                mg->mg_len = -1;
-                            sv_setsv(sv, last_str);
-                            data->last_end = data->pos_min;
-                            data->last_start_min = data->pos_min - last_chrs;
-                            data->last_start_max = is_inf
-                                ? OPTIMIZE_INFTY
-                                : data->pos_min + data->pos_delta - last_chrs;
-                        }
-                        data->cur_is_floating = 1; /* float */
-                    }
-                    SvREFCNT_dec(last_str);
-                }
-                if (data && (fl & SF_HAS_EVAL))
-                    data->flags |= SF_HAS_EVAL;
-              optimize_curly_tail:
-                rck_elide_nothing(oscan);
-                continue;
-
-            default:
-                Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
-                                                                    OP(scan));
-            case REF:
-            case CLUMP:
-                if (flags & SCF_DO_SUBSTR) {
-                    /* Cannot expect anything... */
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                if (flags & SCF_DO_STCLASS_OR) {
-                    if (OP(scan) == CLUMP) {
-                        /* Actually is any start char, but very few code points
-                         * aren't start characters */
-                        ssc_match_all_cp(data->start_class);
-                    }
-                    else {
-                        ssc_anything(data->start_class);
-                    }
-                }
-                flags &= ~SCF_DO_STCLASS;
-                break;
-            }
-        }
-        else if (OP(scan) == LNBREAK) {
-            if (flags & SCF_DO_STCLASS) {
-                if (flags & SCF_DO_STCLASS_AND) {
-                    ssc_intersection(data->start_class,
-                                    PL_XPosix_ptrs[CC_VERTSPACE_], FALSE);
-                    ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class)
-                                                &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                else if (flags & SCF_DO_STCLASS_OR) {
-                    ssc_union(data->start_class,
-                              PL_XPosix_ptrs[CC_VERTSPACE_],
-                              FALSE);
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                    /* See commit msg for
-                     * 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class)
-                                                &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                flags &= ~SCF_DO_STCLASS;
-            }
-            min++;
-            if (delta != OPTIMIZE_INFTY)
-                delta++;    /* Because of the 2 char string cr-lf */
-            if (flags & SCF_DO_SUBSTR) {
-                /* Cannot expect anything... */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min += 1;
-                if (data->pos_delta != OPTIMIZE_INFTY) {
-                    data->pos_delta += 1;
-                }
-                data->cur_is_floating = 1; /* float */
-            }
-        }
-        else if (REGNODE_SIMPLE(OP(scan))) {
-
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min++;
-            }
-            min++;
-            if (flags & SCF_DO_STCLASS) {
-                bool invert = 0;
-                SV* my_invlist = NULL;
-                U8 namedclass;
-
-                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-
-                /* Some of the logic below assumes that switching
-                   locale on will only add false positives. */
-                switch (OP(scan)) {
-
-                default:
-#ifdef DEBUGGING
-                   Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
-                                                                     OP(scan));
-#endif
-                case SANY:
-                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                        ssc_match_all_cp(data->start_class);
-                    break;
-
-                case REG_ANY:
-                    {
-                        SV* REG_ANY_invlist = _new_invlist(2);
-                        REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist,
-                                                            '\n');
-                        if (flags & SCF_DO_STCLASS_OR) {
-                            ssc_union(data->start_class,
-                                      REG_ANY_invlist,
-                                      TRUE /* TRUE => invert, hence all but \n
-                                            */
-                                      );
-                        }
-                        else if (flags & SCF_DO_STCLASS_AND) {
-                            ssc_intersection(data->start_class,
-                                             REG_ANY_invlist,
-                                             TRUE  /* TRUE => invert */
-                                             );
-                            ssc_clear_locale(data->start_class);
-                        }
-                        SvREFCNT_dec_NN(REG_ANY_invlist);
-                    }
-                    break;
-
-                case ANYOFD:
-                case ANYOFL:
-                case ANYOFPOSIXL:
-                case ANYOFH:
-                case ANYOFHb:
-                case ANYOFHr:
-                case ANYOFHs:
-                case ANYOF:
-                    if (flags & SCF_DO_STCLASS_AND)
-                        ssc_and(pRExC_state, data->start_class,
-                                (regnode_charclass *) scan);
-                    else
-                        ssc_or(pRExC_state, data->start_class,
-                                                          (regnode_charclass *) scan);
-                    break;
-
-                case ANYOFHbbm:
-                  {
-                    SV* cp_list = get_ANYOFHbbm_contents(scan);
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case NANYOFM: /* NANYOFM already contains the inversion of the
-                                 input ANYOF data, so, unlike things like
-                                 NPOSIXA, don't change 'invert' to TRUE */
-                    /* FALLTHROUGH */
-                case ANYOFM:
-                  {
-                    SV* cp_list = get_ANYOFM_contents(scan);
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case ANYOFR:
-                case ANYOFRb:
-                  {
-                    SV* cp_list = NULL;
-
-                    cp_list = _add_range_to_invlist(cp_list,
-                                        ANYOFRbase(scan),
-                                        ANYOFRbase(scan) + ANYOFRdelta(scan));
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case NPOSIXL:
-                    invert = 1;
-                    /* FALLTHROUGH */
-
-                case POSIXL:
-                    namedclass = classnum_to_namedclass(FLAGS(scan)) + invert;
-                    if (flags & SCF_DO_STCLASS_AND) {
-                        bool was_there = cBOOL(
-                                          ANYOF_POSIXL_TEST(data->start_class,
-                                                                 namedclass));
-                        ANYOF_POSIXL_ZERO(data->start_class);
-                        if (was_there) {    /* Do an AND */
-                            ANYOF_POSIXL_SET(data->start_class, namedclass);
-                        }
-                        /* No individual code points can now match */
-                        data->start_class->invlist
-                                                = sv_2mortal(_new_invlist(0));
-                    }
-                    else {
-                        int complement = namedclass + ((invert) ? -1 : 1);
-
-                        assert(flags & SCF_DO_STCLASS_OR);
-
-                        /* If the complement of this class was already there,
-                         * the result is that they match all code points,
-                         * (\d + \D == everything).  Remove the classes from
-                         * future consideration.  Locale is not relevant in
-                         * this case */
-                        if (ANYOF_POSIXL_TEST(data->start_class, complement)) {
-                            ssc_match_all_cp(data->start_class);
-                            ANYOF_POSIXL_CLEAR(data->start_class, namedclass);
-                            ANYOF_POSIXL_CLEAR(data->start_class, complement);
-                        }
-                        else {  /* The usual case; just add this class to the
-                                   existing set */
-                            ANYOF_POSIXL_SET(data->start_class, namedclass);
-                        }
-                    }
-                    break;
-
-                case NPOSIXA:   /* For these, we always know the exact set of
-                                   what's matched */
-                    invert = 1;
-                    /* FALLTHROUGH */
-                case POSIXA:
-                    my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
-                    goto join_posix_and_ascii;
-
-                case NPOSIXD:
-                case NPOSIXU:
-                    invert = 1;
-                    /* FALLTHROUGH */
-                case POSIXD:
-                case POSIXU:
-                    my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
-
-                    /* NPOSIXD matches all upper Latin1 code points unless the
-                     * target string being matched is UTF-8, which is
-                     * unknowable until match time.  Since we are going to
-                     * invert, we want to get rid of all of them so that the
-                     * inversion will match all */
-                    if (OP(scan) == NPOSIXD) {
-                        _invlist_subtract(my_invlist, PL_UpperLatin1,
-                                          &my_invlist);
-                    }
-
-                  join_posix_and_ascii:
-
-                    if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, my_invlist, invert);
-                        ssc_clear_locale(data->start_class);
-                    }
-                    else {
-                        assert(flags & SCF_DO_STCLASS_OR);
-                        ssc_union(data->start_class, my_invlist, invert);
-                    }
-                    SvREFCNT_dec(my_invlist);
-                }
-                if (flags & SCF_DO_STCLASS_OR)
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                flags &= ~SCF_DO_STCLASS;
-            }
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EOL && flags & SCF_DO_SUBSTR) {
-            data->flags |= (OP(scan) == MEOL
-                            ? SF_BEFORE_MEOL
-                            : SF_BEFORE_SEOL);
-            scan_commit(pRExC_state, data, minlenp, is_inf);
-
-        }
-        else if (  REGNODE_TYPE(OP(scan)) == BRANCHJ
-                 /* Lookbehind, or need to calculate parens/evals/stclass: */
-                   && (scan->flags || data || (flags & SCF_DO_STCLASS))
-                   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
-        {
-            if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
-                || OP(scan) == UNLESSM )
-            {
-                /* Negative Lookahead/lookbehind
-                   In this case we can't do fixed string optimisation.
-                */
-
-                bool is_positive = OP(scan) == IFMATCH ? 1 : 0;
-                SSize_t deltanext, minnext;
-                SSize_t fake_last_close = 0;
-                regnode *fake_last_close_op = NULL;
-                regnode *cur_last_close_op;
-                regnode *nscan;
-                regnode_ssc intrnl;
-                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-
-                StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                if (data) {
-                    data_fake.whilem_c = data->whilem_c;
-                    data_fake.last_closep = data->last_closep;
-                    data_fake.last_close_opp = data->last_close_opp;
-                }
-                else {
-                    data_fake.last_closep = &fake_last_close;
-                    data_fake.last_close_opp = &fake_last_close_op;
-                }
-
-                /* remember the last_close_op we saw so we can see if
-                 * we are dealing with variable length lookbehind that
-                 * contains capturing buffers, which are considered
-                 * experimental */
-                cur_last_close_op= *(data_fake.last_close_opp);
-
-                data_fake.pos_delta = delta;
-                if ( flags & SCF_DO_STCLASS && !scan->flags
-                     && OP(scan) == IFMATCH ) { /* Lookahead */
-                    ssc_init(pRExC_state, &intrnl);
-                    data_fake.start_class = &intrnl;
-                    f |= SCF_DO_STCLASS_AND;
-                }
-                if (flags & SCF_WHILEM_VISITED_POS)
-                    f |= SCF_WHILEM_VISITED_POS;
-                next = regnext(scan);
-                nscan = REGNODE_AFTER(scan);
-
-                /* recurse study_chunk() for lookahead body */
-                minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
-                                      last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1,
-                                      mutate_ok);
-
-                if (scan->flags) {
-                    if (   deltanext < 0
-                        || deltanext > (I32) U8_MAX
-                        || minnext > (I32)U8_MAX
-                        || minnext + deltanext > (I32)U8_MAX)
-                    {
-                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
-                              (UV)U8_MAX);
-                    }
-
-                    /* The 'next_off' field has been repurposed to count the
-                     * additional starting positions to try beyond the initial
-                     * one.  (This leaves it at 0 for non-variable length
-                     * matches to avoid breakage for those not using this
-                     * extension) */
-                    if (deltanext)  {
-                        scan->next_off = deltanext;
-                        if (
-                            /* See a CLOSE op inside this lookbehind? */
-                            cur_last_close_op != *(data_fake.last_close_opp)
-                            /* and not doing restudy. see: restudied */
-                            && !(flags & SCF_TRIE_DOING_RESTUDY)
-                        ) {
-                            /* this is positive variable length lookbehind with
-                             * capture buffers inside of it */
-                            ckWARNexperimental_with_arg(RExC_parse,
-                                WARN_EXPERIMENTAL__VLB,
-                                "Variable length %s lookbehind with capturing is experimental",
-                                is_positive ? "positive" : "negative");
-                        }
-                    }
-                    scan->flags = (U8)minnext + deltanext;
-                }
-                if (data) {
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SF_HAS_EVAL)
-                        data->flags |= SF_HAS_EVAL;
-                    data->whilem_c = data_fake.whilem_c;
-                }
-                if (f & SCF_DO_STCLASS_AND) {
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        /* OR before, AND after: ideally we would recurse with
-                         * data_fake to get the AND applied by study of the
-                         * remainder of the pattern, and then derecurse;
-                         * *** HACK *** for now just treat as "no information".
-                         * See [perl #56690].
-                         */
-                        ssc_init(pRExC_state, data->start_class);
-                    }  else {
-                        /* AND before and after: combine and continue.  These
-                         * assertions are zero-length, so can match an EMPTY
-                         * string */
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
-                        ANYOF_FLAGS(data->start_class)
-                                                   |= SSC_MATCHES_EMPTY_STRING;
-                    }
-                }
-                DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
-            }
-#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
-            else {
-                /* Positive Lookahead/lookbehind
-                   In this case we can do fixed string optimisation,
-                   but we must be careful about it. Note in the case of
-                   lookbehind the positions will be offset by the minimum
-                   length of the pattern, something we won't know about
-                   until after the recurse.
-                */
-                SSize_t deltanext, fake_last_close = 0;
-                regnode *last_close_op = NULL;
-                regnode *nscan;
-                regnode_ssc intrnl;
-                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                /* We use SAVEFREEPV so that when the full compile
-                    is finished perl will clean up the allocated
-                    minlens when it's all done. This way we don't
-                    have to worry about freeing them when we know
-                    they wont be used, which would be a pain.
-                 */
-                SSize_t *minnextp;
-                Newx( minnextp, 1, SSize_t );
-                SAVEFREEPV(minnextp);
-
-                if (data) {
-                    StructCopy(data, &data_fake, scan_data_t);
-                    if ((flags & SCF_DO_SUBSTR) && data->last_found) {
-                        f |= SCF_DO_SUBSTR;
-                        if (scan->flags)
-                            scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
-                        data_fake.last_found=newSVsv(data->last_found);
-                    }
-                }
-                else {
-                    data_fake.last_closep = &fake_last_close;
-                    data_fake.last_close_opp = &fake_last_close_opp;
-                }
-                data_fake.flags = 0;
-                data_fake.substrs[0].flags = 0;
-                data_fake.substrs[1].flags = 0;
-                data_fake.pos_delta = delta;
-                if (is_inf)
-                    data_fake.flags |= SF_IS_INF;
-                if ( flags & SCF_DO_STCLASS && !scan->flags
-                     && OP(scan) == IFMATCH ) { /* Lookahead */
-                    ssc_init(pRExC_state, &intrnl);
-                    data_fake.start_class = &intrnl;
-                    f |= SCF_DO_STCLASS_AND;
-                }
-                if (flags & SCF_WHILEM_VISITED_POS)
-                    f |= SCF_WHILEM_VISITED_POS;
-                next = regnext(scan);
-                nscan = REGNODE_AFTER(scan);
-
-                /* positive lookahead study_chunk() recursion */
-                *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
-                                        &deltanext, last, &data_fake,
-                                        stopparen, recursed_depth, NULL,
-                                        f, depth+1, mutate_ok);
-                if (scan->flags) {
-                    assert(0);  /* This code has never been tested since this
-                                   is normally not compiled */
-                    if (   deltanext < 0
-                        || deltanext > (I32) U8_MAX
-                        || *minnextp > (I32)U8_MAX
-                        || *minnextp + deltanext > (I32)U8_MAX)
-                    {
-                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
-                              (UV)U8_MAX);
-                    }
-
-                    if (deltanext) {
-                        scan->next_off = deltanext;
-                    }
-                    scan->flags = (U8)*minnextp + deltanext;
-                }
-
-                *minnextp += min;
-
-                if (f & SCF_DO_STCLASS_AND) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
-                    ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
-                }
-                if (data) {
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SF_HAS_EVAL)
-                        data->flags |= SF_HAS_EVAL;
-                    data->whilem_c = data_fake.whilem_c;
-                    if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
-                        int i;
-                        if (RExC_rx->minlen < *minnextp)
-                            RExC_rx->minlen = *minnextp;
-                        scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
-                        SvREFCNT_dec_NN(data_fake.last_found);
-
-                        for (i = 0; i < 2; i++) {
-                            if (data_fake.substrs[i].minlenp != minlenp) {
-                                data->substrs[i].min_offset =
-                                            data_fake.substrs[i].min_offset;
-                                data->substrs[i].max_offset =
-                                            data_fake.substrs[i].max_offset;
-                                data->substrs[i].minlenp =
-                                            data_fake.substrs[i].minlenp;
-                                data->substrs[i].lookbehind += scan->flags;
-                            }
-                        }
-                    }
-                }
-            }
-#endif
-        }
-        else if (OP(scan) == OPEN) {
-            if (stopparen != (I32)PARNO(scan))
-                pars++;
-        }
-        else if (OP(scan) == CLOSE) {
-            if (stopparen == (I32)PARNO(scan)) {
-                break;
-            }
-            if ((I32)PARNO(scan) == is_par) {
-                next = regnext(scan);
-
-                if ( next && (OP(next) != WHILEM) && next < last)
-                    is_par = 0;         /* Disable optimization */
-            }
-            if (data) {
-                *(data->last_closep) = PARNO(scan);
-                *(data->last_close_opp) = scan;
-            }
-        }
-        else if (OP(scan) == EVAL) {
-            if (data)
-                data->flags |= SF_HAS_EVAL;
-        }
-        else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                flags &= ~SCF_DO_SUBSTR;
-            }
-            if (OP(scan)==ACCEPT) {
-                /* m{(*ACCEPT)x} does not have to start with 'x' */
-                flags &= ~SCF_DO_STCLASS;
-                if (data)
-                    data->flags |= SCF_SEEN_ACCEPT;
-                if (stopmin > min)
-                    stopmin = min;
-            }
-        }
-        else if (OP(scan) == COMMIT) {
-            /* gh18770: m{abc(*COMMIT)xyz} must fail on "abc abcxyz", so we
-             * must not end up with "abcxyz" as a fixed substring else we'll
-             * skip straight to attempting to match at offset 4.
-             */
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                flags &= ~SCF_DO_SUBSTR;
-            }
-        }
-        else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
-        {
-                if (flags & SCF_DO_SUBSTR) {
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                    ssc_anything(data->start_class);
-                flags &= ~SCF_DO_STCLASS;
-        }
-        else if (OP(scan) == GPOS) {
-            if (!(RExC_rx->intflags & PREGf_GPOS_FLOAT) &&
-                !(delta || is_inf || (data && data->pos_delta)))
-            {
-                if (!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR))
-                    RExC_rx->intflags |= PREGf_ANCH_GPOS;
-                if (RExC_rx->gofs < (STRLEN)min)
-                    RExC_rx->gofs = min;
-            } else {
-                RExC_rx->intflags |= PREGf_GPOS_FLOAT;
-                RExC_rx->gofs = 0;
-            }
-        }
-#ifdef TRIE_STUDY_OPT
-#ifdef FULL_TRIE_STUDY
-        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
-            /* NOTE - There is similar code to this block above for handling
-               BRANCH nodes on the initial study.  If you change stuff here
-               check there too. */
-            regnode *trie_node= scan;
-            regnode *tail= regnext(scan);
-            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
-            regnode_ssc accum;
-
-            if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
-                /* Cannot merge strings after this. */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-            }
-            if (flags & SCF_DO_STCLASS)
-                ssc_init_zero(pRExC_state, &accum);
-
-            if (!trie->jump) {
-                min1= trie->minlen;
-                max1= trie->maxlen;
-            } else {
-                const regnode *nextbranch= NULL;
-                U32 word;
-
-                for ( word=1 ; word <= trie->wordcount ; word++)
-                {
-                    SSize_t deltanext = 0, minnext = 0;
-                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                    SSize_t fake_last_close = 0;
-                    regnode *fake_last_close_op = NULL;
-                    regnode_ssc this_class;
-
-                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                    if (data) {
-                        data_fake.whilem_c = data->whilem_c;
-                        data_fake.last_closep = data->last_closep;
-                        data_fake.last_close_opp = data->last_close_opp;
-                    }
-                    else {
-                        data_fake.last_closep = &fake_last_close;
-                        data_fake.last_close_opp = &fake_last_close_op;
-                    }
-                    data_fake.pos_delta = delta;
-                    if (flags & SCF_DO_STCLASS) {
-                        ssc_init(pRExC_state, &this_class);
-                        data_fake.start_class = &this_class;
-                        f |= SCF_DO_STCLASS_AND;
-                    }
-                    if (flags & SCF_WHILEM_VISITED_POS)
-                        f |= SCF_WHILEM_VISITED_POS;
-
-                    if (trie->jump[word]) {
-                        if (!nextbranch)
-                            nextbranch = trie_node + trie->jump[0];
-                        scan= trie_node + trie->jump[word];
-                        /* We go from the jump point to the branch that follows
-                           it. Note this means we need the vestigal unused
-                           branches even though they arent otherwise used. */
-                        /* optimise study_chunk() for TRIE */
-                        minnext = study_chunk(pRExC_state, &scan, minlenp,
-                            &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1,
-                            mutate_ok);
-                    }
-                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
-                        nextbranch= regnext((regnode*)nextbranch);
-
-                    if (min1 > (SSize_t)(minnext + trie->minlen))
-                        min1 = minnext + trie->minlen;
-                    if (deltanext == OPTIMIZE_INFTY) {
-                        is_inf = is_inf_internal = 1;
-                        max1 = OPTIMIZE_INFTY;
-                    } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
-                        max1 = minnext + deltanext + trie->maxlen;
-
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
-                        if ( stopmin > min + min1)
-                            stopmin = min + min1;
-                        flags &= ~SCF_DO_SUBSTR;
-                        if (data)
-                            data->flags |= SCF_SEEN_ACCEPT;
-                    }
-                    if (data) {
-                        if (data_fake.flags & SF_HAS_EVAL)
-                            data->flags |= SF_HAS_EVAL;
-                        data->whilem_c = data_fake.whilem_c;
-                    }
-                    if (flags & SCF_DO_STCLASS)
-                        ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
-                }
-                DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
-            }
-            if (flags & SCF_DO_SUBSTR) {
-                data->pos_min += min1;
-                data->pos_delta += max1 - min1;
-                if (max1 != min1 || is_inf)
-                    data->cur_is_floating = 1; /* float */
-            }
-            min += min1;
-            if (delta != OPTIMIZE_INFTY) {
-                if (OPTIMIZE_INFTY - (max1 - min1) >= delta)
-                    delta += max1 - min1;
-                else
-                    delta = OPTIMIZE_INFTY;
-            }
-            if (flags & SCF_DO_STCLASS_OR) {
-                ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                if (min1) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-            }
-            else if (flags & SCF_DO_STCLASS_AND) {
-                if (min1) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-                else {
-                    /* Switch to OR mode: cache the old value of
-                     * data->start_class */
-                    INIT_AND_WITHP;
-                    StructCopy(data->start_class, and_withp, regnode_ssc);
-                    flags &= ~SCF_DO_STCLASS_AND;
-                    StructCopy(&accum, data->start_class, regnode_ssc);
-                    flags |= SCF_DO_STCLASS_OR;
-                }
-            }
-            scan= tail;
-            DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
-            continue;
-        }
-#else
-        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
-            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            U8*bang=NULL;
-
-            min += trie->minlen;
-            delta += (trie->maxlen - trie->minlen);
-            flags &= ~SCF_DO_STCLASS; /* xxx */
-            if (flags & SCF_DO_SUBSTR) {
-                /* Cannot expect anything... */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min += trie->minlen;
-                data->pos_delta += (trie->maxlen - trie->minlen);
-                if (trie->maxlen != trie->minlen)
-                    data->cur_is_floating = 1; /* float */
-            }
-            if (trie->jump) /* no more substrings -- for now /grr*/
-               flags &= ~SCF_DO_SUBSTR;
-        }
-
-#endif /* old or new */
-#endif /* TRIE_STUDY_OPT */
-
-        else if (OP(scan) == REGEX_SET) {
-            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
-                             " before optimization", REGNODE_NAME(REGEX_SET));
-        }
-
-        /* Else: zero-length, ignore. */
-        scan = regnext(scan);
-    }
-
-  finish:
-    if (frame) {
-        /* we need to unwind recursion. */
-        depth = depth - 1;
-
-        DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
-        DEBUG_PEEP("fend", scan, depth, flags);
-
-        /* restore previous context */
-        last = frame->last_regnode;
-        scan = frame->next_regnode;
-        stopparen = frame->stopparen;
-        recursed_depth = frame->prev_recursed_depth;
-
-        RExC_frame_last = frame->prev_frame;
-        frame = frame->this_prev_frame;
-        goto fake_study_recurse;
-    }
-
-    assert(!frame);
-    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
-
-    /* is this pattern infinite? Eg, consider /(a|b+)/ */
-    if (is_inf_internal)
-        delta = OPTIMIZE_INFTY;
-
-    /* deal with (*ACCEPT), Eg, consider /(foo(*ACCEPT)|bop)bar/ */
-    if (min > stopmin) {
-        /*
-        At this point 'min' represents the minimum length string we can
-        match while *ignoring* the implication of ACCEPT, and 'delta'
-        represents the difference between the minimum length and maximum
-        length, and if the pattern matches an infinitely long string
-        (consider the + and * quantifiers) then we use the special delta
-        value of OPTIMIZE_INFTY to represent it. 'stopmin' is the
-        minimum length that can be matched *and* accepted.
-
-        A pattern is accepted when matching was successful *and*
-        complete, and thus there is no further matching needing to be
-        done, no backtracking to occur, etc. Prior to the introduction
-        of ACCEPT the only opcode that signaled acceptance was the END
-        opcode, which is always the very last opcode in a regex program.
-        ACCEPT is thus conceptually an early successful return out of
-        the matching process. stopmin starts out as OPTIMIZE_INFTY to
-        represent "the entire pattern", and is ratched down to the
-        "current min" if necessary when an ACCEPT opcode is encountered.
-
-        Thus stopmin might be smaller than min if we saw an (*ACCEPT),
-        and we now need to account for it in both min and delta.
-        Consider that in a pattern /AB/ normally the min length it can
-        match can be computed as min(A)+min(B). But (*ACCEPT) means
-        that it might be something else, not even neccesarily min(A) at
-        all. Consider
-
-             A  = /(foo(*ACCEPT)|x+)/
-             B  = /whop/
-             AB = /(foo(*ACCEPT)|x+)whop/
-
-        The min for A is 1 for "x" and the delta for A is OPTIMIZE_INFTY
-        for "xxxxx...", its stopmin is 3 for "foo". The min for B is 4 for
-        "whop", and the delta of 0 as the pattern is of fixed length, the
-        stopmin would be OPTIMIZE_INFTY as it does not contain an ACCEPT.
-        When handling AB we expect to see a min of 5 for "xwhop", and a
-        delta of OPTIMIZE_INFTY for "xxxxx...whop", and a stopmin of 3
-        for "foo". This should result in a final min of 3 for "foo", and
-        a final delta of OPTIMIZE_INFTY for "xxxxx...whop".
-
-        In something like /(dude(*ACCEPT)|irk)x{3,7}/ we would have a
-        min of 6 for "irkxxx" and a delta of 4 for "irkxxxxxxx", and the
-        stop min would be 4 for "dude". This should result in a final
-        min of 4 for "dude", and a final delta of 6, for "irkxxxxxxx".
-
-        When min is smaller than stopmin then we can ignore it. In the
-        fragment /(x{10,20}(*ACCEPT)|a)b+/, we would have a min of 2,
-        and a delta of OPTIMIZE_INFTY, and a stopmin of 10. Obviously
-        the ACCEPT doesn't reduce the minimum length of the string that
-        might be matched, nor affect the maximum length.
-
-        In something like /foo(*ACCEPT)ba?r/ we would have a min of 5
-        for "foobr", a delta of 1 for "foobar", and a stopmin of 3 for
-        "foo". We currently turn this into a min of 3 for "foo" and a
-        delta of 3 for "foobar" even though technically "foobar" isn't
-        possible. ACCEPT affects some aspects of the optimizer, like
-        length computations and mandatory substring optimizations, but
-        there are other optimzations this routine perfoms that are not
-        affected and this compromise simplifies implementation.
-
-        It might be helpful to consider that this C function is called
-        recursively on the pattern in a bottom up fashion, and that the
-        min returned by a nested call may be marked as coming from an
-        ACCEPT, causing its callers to treat the returned min as a
-        stopmin as the recursion unwinds. Thus a single ACCEPT can affect
-        multiple calls into this function in different ways.
-        */
-
-        if (OPTIMIZE_INFTY - delta >= min - stopmin)
-            delta += min - stopmin;
-        else
-            delta = OPTIMIZE_INFTY;
-        min = stopmin;
-    }
-
-    *scanp = scan;
-    *deltap = delta;
-
-    if (flags & SCF_DO_SUBSTR && is_inf)
-        data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
-    if (is_par > (I32)U8_MAX)
-        is_par = 0;
-    if (is_par && pars==1 && data) {
-        data->flags |= SF_IN_PAR;
-        data->flags &= ~SF_HAS_PAR;
-    }
-    else if (pars && data) {
-        data->flags |= SF_HAS_PAR;
-        data->flags &= ~SF_IN_PAR;
-    }
-    if (flags & SCF_DO_STCLASS_OR)
-        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-    if (flags & SCF_TRIE_RESTUDY)
-        data->flags |=  SCF_TRIE_RESTUDY;
-
-
-    if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-        if (min > OPTIMIZE_INFTY - delta)
-            RExC_maxlen = OPTIMIZE_INFTY;
-        else if (RExC_maxlen < min + delta)
-            RExC_maxlen = min + delta;
-    }
-    DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
-    return min;
-}
-
 /* add a data member to the struct reg_data attached to this regex, it should
  * always return a non-zero return. the 's' argument is the type of the items
  * being added and the n is the number of items. The length of 's' should match
  * the number of items. */
-STATIC U32
-S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
+U32
+Perl_reg_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
 {
     U32 count = RExC_rxi->data ? RExC_rxi->data->count : 1;
 
-    PERL_ARGS_ASSERT_ADD_DATA;
+    PERL_ARGS_ASSERT_REG_ADD_DATA;
 
     /* in the below expression we have (count + n - 1), the minus one is there
      * because the struct that we allocate already contains a slot for 1 data
@@ -6958,9 +320,9 @@ S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
         /* when count == 1 it means we have not initialized anything.
          * we always fill the 0 slot of the data array with a '%' entry, which
          * means "zero" (all the other types are letters) which exists purely
-         * so the return from add_data is ALWAYS true, so we can tell it apart
+         * so the return from reg_add_data is ALWAYS true, so we can tell it apart
          * from a "no value" idx=0 in places where we would return an index
-         * into add_data.  This is particularly important with the new "single
+         * into reg_add_data.  This is particularly important with the new "single
          * pass, usually, but not always" strategy that we use, where the code
          * will use a 0 to represent "not able to compute this yet".
          */
@@ -7435,9 +797,9 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                     assert(n < pRExC_state->code_blocks->count);
                     src = &ri->code_blocks->cb[i];
                     dst = &pRExC_state->code_blocks->cb[n];
-                    dst->start      = src->start + offset;
-                    dst->end        = src->end   + offset;
-                    dst->block      = src->block;
+                    dst->start	    = src->start + offset;
+                    dst->end	    = src->end   + offset;
+                    dst->block	    = src->block;
                     dst->src_regex  = (REGEXP*) SvREFCNT_inc( (SV*)
                                             src->src_regex
                                                 ? src->src_regex
@@ -7689,9 +1051,9 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 
             assert(pat[src->start] == '(');
             assert(pat[src->end]   == ')');
-            dst->start      = src->start;
-            dst->end        = src->end;
-            dst->block      = src->block;
+            dst->start	    = src->start;
+            dst->end	    = src->end;
+            dst->block	    = src->block;
             dst->src_regex  = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
                                     : src->src_regex;
             dst++;
@@ -8236,7 +1598,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     RExC_rx->intflags = 0;
 
-    RExC_flags = rx_flags;      /* don't let top level (?i) bleed */
+    RExC_flags = rx_flags;	/* don't let top level (?i) bleed */
     RExC_parse_set(exp);
 
     /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
@@ -8423,11 +1785,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
 
     if (UTF)
-        SvUTF8_on(Rx);  /* Unicode in it? */
+        SvUTF8_on(Rx);	/* Unicode in it? */
     RExC_rxi->regstclass = NULL;
-    if (RExC_naughty >= TOO_NAUGHTY)    /* Probably an expensive pattern. */
+    if (RExC_naughty >= TOO_NAUGHTY)	/* Probably an expensive pattern. */
         RExC_rx->intflags |= PREGf_NAUGHTY;
-    scan = RExC_rxi->program + 1;               /* First BRANCH. */
+    scan = RExC_rxi->program + 1;		/* First BRANCH. */
 
     /* testing for BRANCH here tells us whether there is "must appear"
        data in the pattern. If there is then we can use it for optimisations */
@@ -8487,7 +1849,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* Ignore EXACT as we deal with it later. */
         if (REGNODE_TYPE(OP(first)) == EXACT) {
             if (! isEXACTFish(OP(first))) {
-                NOOP;   /* Empty, get anchored substr later. */
+                NOOP;	/* Empty, get anchored substr later. */
             }
             else
                 RExC_rxi->regstclass = first;
@@ -8581,7 +1943,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             ssc_init(pRExC_state, &ch_class);
             data.start_class = &ch_class;
             stclass_flag = SCF_DO_STCLASS_AND;
-        } else                          /* XXXX Check for BOUND? */
+        } else				/* XXXX Check for BOUND? */
             stclass_flag = 0;
         data.last_closep = &last_close;
         data.last_close_opp = &last_close_op;
@@ -8661,7 +2023,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
         {
-            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+            const U32 n = reg_add_data(pRExC_state, STR_WITH_LEN("f"));
 
             ssc_finalize(pRExC_state, data.start_class);
 
@@ -8670,7 +2032,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                        (regnode_ssc*)RExC_rxi->data->data[n],
                        regnode_ssc);
             RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
-            RExC_rx->intflags &= ~PREGf_SKIP;   /* Used in find_byclass(). */
+            RExC_rx->intflags &= ~PREGf_SKIP;	/* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
                       regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       Perl_re_printf( aTHX_
@@ -8745,7 +2107,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
         {
-            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+            const U32 n = reg_add_data(pRExC_state, STR_WITH_LEN("f"));
 
             ssc_finalize(pRExC_state, data.start_class);
 
@@ -8754,7 +2116,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                        (regnode_ssc*)RExC_rxi->data->data[n],
                        regnode_ssc);
             RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
-            RExC_rx->intflags &= ~PREGf_SKIP;   /* Used in find_byclass(). */
+            RExC_rx->intflags &= ~PREGf_SKIP;	/* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
                       regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       Perl_re_printf( aTHX_
@@ -8860,7 +2222,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
 #ifdef DEBUGGING
     if (RExC_paren_names) {
-        RExC_rxi->name_list_idx = add_data( pRExC_state, STR_WITH_LEN("a"));
+        RExC_rxi->name_list_idx = reg_add_data( pRExC_state, STR_WITH_LEN("a"));
         RExC_rxi->data->data[RExC_rxi->name_list_idx]
                                    = (void*)SvREFCNT_inc(RExC_paren_name_list);
     } else
@@ -8916,426 +2278,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 }
 
 
-SV*
-Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
-                    const U32 flags)
-{
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF;
-
-    PERL_UNUSED_ARG(value);
-
-    if (flags & RXapif_FETCH) {
-        return reg_named_buff_fetch(rx, key, flags);
-    } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
-        Perl_croak_no_modify();
-        return NULL;
-    } else if (flags & RXapif_EXISTS) {
-        return reg_named_buff_exists(rx, key, flags)
-            ? &PL_sv_yes
-            : &PL_sv_no;
-    } else if (flags & RXapif_REGNAMES) {
-        return reg_named_buff_all(rx, flags);
-    } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
-        return reg_named_buff_scalar(rx, flags);
-    } else {
-        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
-        return NULL;
-    }
-}
-
-SV*
-Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
-                         const U32 flags)
-{
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
-    PERL_UNUSED_ARG(lastkey);
-
-    if (flags & RXapif_FIRSTKEY)
-        return reg_named_buff_firstkey(rx, flags);
-    else if (flags & RXapif_NEXTKEY)
-        return reg_named_buff_nextkey(rx, flags);
-    else {
-        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter",
-                                            (int)flags);
-        return NULL;
-    }
-}
-
-SV*
-Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
-                          const U32 flags)
-{
-    SV *ret;
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
-        if (he_str) {
-            IV i;
-            SV* sv_dat=HeVAL(he_str);
-            I32 *nums=(I32*)SvPVX(sv_dat);
-            AV * const retarray = (flags & RXapif_ALL) ? newAV_alloc_x(SvIVX(sv_dat)) : NULL;
-            for ( i=0; i<SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->nparens) >= nums[i]
-                    && rx->offs[nums[i]].start != -1
-                    && rx->offs[nums[i]].end != -1)
-                {
-                    ret = newSVpvs("");
-                    CALLREG_NUMBUF_FETCH(r, nums[i], ret);
-                    if (!retarray)
-                        return ret;
-                } else {
-                    if (retarray)
-                        ret = newSV_type(SVt_NULL);
-                }
-                if (retarray)
-                    av_push_simple(retarray, ret);
-            }
-            if (retarray)
-                return newRV_noinc(MUTABLE_SV(retarray));
-        }
-    }
-    return NULL;
-}
-
-bool
-Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
-                           const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        if (flags & RXapif_ALL) {
-            return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
-        } else {
-            SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
-            if (sv) {
-                SvREFCNT_dec_NN(sv);
-                return TRUE;
-            } else {
-                return FALSE;
-            }
-        }
-    } else {
-        return FALSE;
-    }
-}
-
-SV*
-Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
-
-    if ( rx && RXp_PAREN_NAMES(rx) ) {
-        (void)hv_iterinit(RXp_PAREN_NAMES(rx));
-
-        return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
-    } else {
-        return FALSE;
-    }
-}
-
-SV*
-Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HV *hv = RXp_PAREN_NAMES(rx);
-        HE *temphe;
-        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
-            IV i;
-            IV parno = 0;
-            SV* sv_dat = HeVAL(temphe);
-            I32 *nums = (I32*)SvPVX(sv_dat);
-            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastparen) >= nums[i] &&
-                    rx->offs[nums[i]].start != -1 &&
-                    rx->offs[nums[i]].end != -1)
-                {
-                    parno = nums[i];
-                    break;
-                }
-            }
-            if (parno || flags & RXapif_ALL) {
-                return newSVhek(HeKEY_hek(temphe));
-            }
-        }
-    }
-    return NULL;
-}
-
-SV*
-Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
-{
-    SV *ret;
-    AV *av;
-    SSize_t length;
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
-            return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
-        } else if (flags & RXapif_ONE) {
-            ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
-            av = MUTABLE_AV(SvRV(ret));
-            length = av_count(av);
-            SvREFCNT_dec_NN(ret);
-            return newSViv(length);
-        } else {
-            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
-                                                (int)flags);
-            return NULL;
-        }
-    }
-    return &PL_sv_undef;
-}
-
-SV*
-Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-    AV *av = newAV();
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HV *hv= RXp_PAREN_NAMES(rx);
-        HE *temphe;
-        (void)hv_iterinit(hv);
-        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
-            IV i;
-            IV parno = 0;
-            SV* sv_dat = HeVAL(temphe);
-            I32 *nums = (I32*)SvPVX(sv_dat);
-            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastparen) >= nums[i] &&
-                    rx->offs[nums[i]].start != -1 &&
-                    rx->offs[nums[i]].end != -1)
-                {
-                    parno = nums[i];
-                    break;
-                }
-            }
-            if (parno || flags & RXapif_ALL) {
-                av_push_simple(av, newSVhek(HeKEY_hek(temphe)));
-            }
-        }
-    }
-
-    return newRV_noinc(MUTABLE_SV(av));
-}
-
-void
-Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
-                             SV * const sv)
-{
-    struct regexp *const rx = ReANY(r);
-    char *s = NULL;
-    SSize_t i = 0;
-    SSize_t s1, t1;
-    I32 n = paren;
-
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
-
-    if (      n == RX_BUFF_IDX_CARET_PREMATCH
-           || n == RX_BUFF_IDX_CARET_FULLMATCH
-           || n == RX_BUFF_IDX_CARET_POSTMATCH
-       )
-    {
-        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
-        if (!keepcopy) {
-            /* on something like
-             *    $r = qr/.../;
-             *    /$qr/p;
-             * the KEEPCOPY is set on the PMOP rather than the regex */
-            if (PL_curpm && r == PM_GETRE(PL_curpm))
-                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
-        }
-        if (!keepcopy)
-            goto ret_undef;
-    }
-
-    if (!rx->subbeg)
-        goto ret_undef;
-
-    if (n == RX_BUFF_IDX_CARET_FULLMATCH)
-        /* no need to distinguish between them any more */
-        n = RX_BUFF_IDX_FULLMATCH;
-
-    if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
-        && rx->offs[0].start != -1)
-    {
-        /* $`, ${^PREMATCH} */
-        i = rx->offs[0].start;
-        s = rx->subbeg;
-    }
-    else
-    if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
-        && rx->offs[0].end != -1)
-    {
-        /* $', ${^POSTMATCH} */
-        s = rx->subbeg - rx->suboffset + rx->offs[0].end;
-        i = rx->sublen + rx->suboffset - rx->offs[0].end;
-    }
-    else
-    if (inRANGE(n, 0, (I32)rx->nparens) &&
-        (s1 = rx->offs[n].start) != -1  &&
-        (t1 = rx->offs[n].end) != -1)
-    {
-        /* $&, ${^MATCH},  $1 ... */
-        i = t1 - s1;
-        s = rx->subbeg + s1 - rx->suboffset;
-    } else {
-        goto ret_undef;
-    }
-
-    assert(s >= rx->subbeg);
-    assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
-    if (i >= 0) {
-#ifdef NO_TAINT_SUPPORT
-        sv_setpvn(sv, s, i);
-#else
-        const int oldtainted = TAINT_get;
-        TAINT_NOT;
-        sv_setpvn(sv, s, i);
-        TAINT_set(oldtainted);
-#endif
-        if (RXp_MATCH_UTF8(rx))
-            SvUTF8_on(sv);
-        else
-            SvUTF8_off(sv);
-        if (TAINTING_get) {
-            if (RXp_MATCH_TAINTED(rx)) {
-                if (SvTYPE(sv) >= SVt_PVMG) {
-                    MAGIC* const mg = SvMAGIC(sv);
-                    MAGIC* mgt;
-                    TAINT;
-                    SvMAGIC_set(sv, mg->mg_moremagic);
-                    SvTAINT(sv);
-                    if ((mgt = SvMAGIC(sv))) {
-                        mg->mg_moremagic = mgt;
-                        SvMAGIC_set(sv, mg);
-                    }
-                } else {
-                    TAINT;
-                    SvTAINT(sv);
-                }
-            } else
-                SvTAINTED_off(sv);
-        }
-    } else {
-      ret_undef:
-        sv_set_undef(sv);
-        return;
-    }
-}
-
-void
-Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
-                                                         SV const * const value)
-{
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
-
-    PERL_UNUSED_ARG(rx);
-    PERL_UNUSED_ARG(paren);
-    PERL_UNUSED_ARG(value);
-
-    if (!PL_localizing)
-        Perl_croak_no_modify();
-}
-
-I32
-Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
-                              const I32 paren)
-{
-    struct regexp *const rx = ReANY(r);
-    I32 i;
-    I32 s1, t1;
-
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
-
-    if (   paren == RX_BUFF_IDX_CARET_PREMATCH
-        || paren == RX_BUFF_IDX_CARET_FULLMATCH
-        || paren == RX_BUFF_IDX_CARET_POSTMATCH
-    )
-    {
-        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
-        if (!keepcopy) {
-            /* on something like
-             *    $r = qr/.../;
-             *    /$qr/p;
-             * the KEEPCOPY is set on the PMOP rather than the regex */
-            if (PL_curpm && r == PM_GETRE(PL_curpm))
-                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
-        }
-        if (!keepcopy)
-            goto warn_undef;
-    }
-
-    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
-    switch (paren) {
-      case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
-      case RX_BUFF_IDX_PREMATCH:       /* $` */
-        if (rx->offs[0].start != -1) {
-                        i = rx->offs[0].start;
-                        if (i > 0) {
-                                s1 = 0;
-                                t1 = i;
-                                goto getlen;
-                        }
-            }
-        return 0;
-
-      case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
-      case RX_BUFF_IDX_POSTMATCH:       /* $' */
-            if (rx->offs[0].end != -1) {
-                        i = rx->sublen - rx->offs[0].end;
-                        if (i > 0) {
-                                s1 = rx->offs[0].end;
-                                t1 = rx->sublen;
-                                goto getlen;
-                        }
-            }
-        return 0;
-
-      default: /* $& / ${^MATCH}, $1, $2, ... */
-            if (paren <= (I32)rx->nparens &&
-            (s1 = rx->offs[paren].start) != -1 &&
-            (t1 = rx->offs[paren].end) != -1)
-            {
-            i = t1 - s1;
-            goto getlen;
-        } else {
-          warn_undef:
-            if (ckWARN(WARN_UNINITIALIZED))
-                report_uninit((const SV *)sv);
-            return 0;
-        }
-    }
-  getlen:
-    if (i > 0 && RXp_MATCH_UTF8(rx)) {
-        const char * const s = rx->subbeg - rx->suboffset + s1;
-        const U8 *ep;
-        STRLEN el;
-
-        i = t1 - s1;
-        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
-            i = el;
-    }
-    return i;
-}
 
 SV*
 Perl_reg_qr_package(pTHX_ REGEXP * const rx)
@@ -9458,1730 +2400,6 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
     Perl_re_printf( aTHX_ fmt "\n",args);                               \
 })
 
-/* This section of code defines the inversion list object and its methods.  The
- * interfaces are highly subject to change, so as much as possible is static to
- * this file.  An inversion list is here implemented as a malloc'd C UV array
- * as an SVt_INVLIST scalar.
- *
- * An inversion list for Unicode is an array of code points, sorted by ordinal
- * number.  Each element gives the code point that begins a range that extends
- * up-to but not including the code point given by the next element.  The final
- * element gives the first code point of a range that extends to the platform's
- * infinity.  The even-numbered elements (invlist[0], invlist[2], invlist[4],
- * ...) give ranges whose code points are all in the inversion list.  We say
- * that those ranges are in the set.  The odd-numbered elements give ranges
- * whose code points are not in the inversion list, and hence not in the set.
- * Thus, element [0] is the first code point in the list.  Element [1]
- * is the first code point beyond that not in the list; and element [2] is the
- * first code point beyond that that is in the list.  In other words, the first
- * range is invlist[0]..(invlist[1]-1), and all code points in that range are
- * in the inversion list.  The second range is invlist[1]..(invlist[2]-1), and
- * all code points in that range are not in the inversion list.  The third
- * range invlist[2]..(invlist[3]-1) gives code points that are in the inversion
- * list, and so forth.  Thus every element whose index is divisible by two
- * gives the beginning of a range that is in the list, and every element whose
- * index is not divisible by two gives the beginning of a range not in the
- * list.  If the final element's index is divisible by two, the inversion list
- * extends to the platform's infinity; otherwise the highest code point in the
- * inversion list is the contents of that element minus 1.
- *
- * A range that contains just a single code point N will look like
- *  invlist[i]   == N
- *  invlist[i+1] == N+1
- *
- * If N is UV_MAX (the highest representable code point on the machine), N+1 is
- * impossible to represent, so element [i+1] is omitted.  The single element
- * inversion list
- *  invlist[0] == UV_MAX
- * contains just UV_MAX, but is interpreted as matching to infinity.
- *
- * Taking the complement (inverting) an inversion list is quite simple, if the
- * first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element at the beginning of each inversion
- * list to always contain 0; there is an additional flag in the header which
- * indicates if the list begins at the 0, or is offset to begin at the next
- * element.  This means that the inversion list can be inverted without any
- * copying; just flip the flag.
- *
- * More about inversion lists can be found in "Unicode Demystified"
- * Chapter 13 by Richard Gillam, published by Addison-Wesley.
- *
- * The inversion list data structure is currently implemented as an SV pointing
- * to an array of UVs that the SV thinks are bytes.  This allows us to have an
- * array of UV whose memory management is automatically handled by the existing
- * facilities for SV's.
- *
- * Some of the methods should always be private to the implementation, and some
- * should eventually be made public */
-
-/* The header definitions are in F<invlist_inline.h> */
-
-#ifndef PERL_IN_XSUB_RE
-
-PERL_STATIC_INLINE UV*
-S__invlist_array_init(SV* const invlist, const bool will_have_0)
-{
-    /* Returns a pointer to the first element in the inversion list's array.
-     * This is called upon initialization of an inversion list.  Where the
-     * array begins depends on whether the list has the code point U+0000 in it
-     * or not.  The other parameter tells it whether the code that follows this
-     * call is about to put a 0 in the inversion list or not.  The first
-     * element is either the element reserved for 0, if TRUE, or the element
-     * after it, if FALSE */
-
-    bool* offset = get_invlist_offset_addr(invlist);
-    UV* zero_addr = (UV *) SvPVX(invlist);
-
-    PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
-
-    /* Must be empty */
-    assert(! _invlist_len(invlist));
-
-    *zero_addr = 0;
-
-    /* 1^1 = 0; 1^0 = 1 */
-    *offset = 1 ^ will_have_0;
-    return zero_addr + *offset;
-}
-
-STATIC void
-S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
-{
-    /* Replaces the inversion list in 'dest' with the one from 'src'.  It
-     * steals the list from 'src', so 'src' is made to have a NULL list.  This
-     * is similar to what SvSetMagicSV() would do, if it were implemented on
-     * inversion lists, though this routine avoids a copy */
-
-    const UV src_len          = _invlist_len(src);
-    const bool src_offset     = *get_invlist_offset_addr(src);
-    const STRLEN src_byte_len = SvLEN(src);
-    char * array              = SvPVX(src);
-
-#ifndef NO_TAINT_SUPPORT
-    const int oldtainted = TAINT_get;
-#endif
-
-    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
-
-    assert(is_invlist(src));
-    assert(is_invlist(dest));
-    assert(! invlist_is_iterating(src));
-    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
-
-    /* Make sure it ends in the right place with a NUL, as our inversion list
-     * manipulations aren't careful to keep this true, but sv_usepvn_flags()
-     * asserts it */
-    array[src_byte_len - 1] = '\0';
-
-    TAINT_NOT;      /* Otherwise it breaks */
-    sv_usepvn_flags(dest,
-                    (char *) array,
-                    src_byte_len - 1,
-
-                    /* This flag is documented to cause a copy to be avoided */
-                    SV_HAS_TRAILING_NUL);
-    TAINT_set(oldtainted);
-    SvPV_set(src, 0);
-    SvLEN_set(src, 0);
-    SvCUR_set(src, 0);
-
-    /* Finish up copying over the other fields in an inversion list */
-    *get_invlist_offset_addr(dest) = src_offset;
-    invlist_set_len(dest, src_len, src_offset);
-    *get_invlist_previous_index_addr(dest) = 0;
-    invlist_iterfinish(dest);
-}
-
-PERL_STATIC_INLINE IV*
-S_get_invlist_previous_index_addr(SV* invlist)
-{
-    /* Return the address of the IV that is reserved to hold the cached index
-     * */
-    PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
-
-    assert(is_invlist(invlist));
-
-    return &(((XINVLIST*) SvANY(invlist))->prev_index);
-}
-
-PERL_STATIC_INLINE IV
-S_invlist_previous_index(SV* const invlist)
-{
-    /* Returns cached index of previous search */
-
-    PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
-
-    return *get_invlist_previous_index_addr(invlist);
-}
-
-PERL_STATIC_INLINE void
-S_invlist_set_previous_index(SV* const invlist, const IV index)
-{
-    /* Caches <index> for later retrieval */
-
-    PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
-
-    assert(index == 0 || index < (int) _invlist_len(invlist));
-
-    *get_invlist_previous_index_addr(invlist) = index;
-}
-
-PERL_STATIC_INLINE void
-S_invlist_trim(SV* invlist)
-{
-    /* Free the not currently-being-used space in an inversion list */
-
-    /* But don't free up the space needed for the 0 UV that is always at the
-     * beginning of the list, nor the trailing NUL */
-    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
-
-    PERL_ARGS_ASSERT_INVLIST_TRIM;
-
-    assert(is_invlist(invlist));
-
-    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
-}
-
-PERL_STATIC_INLINE void
-S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
-{
-    PERL_ARGS_ASSERT_INVLIST_CLEAR;
-
-    assert(is_invlist(invlist));
-
-    invlist_set_len(invlist, 0, 0);
-    invlist_trim(invlist);
-}
-
-#endif /* ifndef PERL_IN_XSUB_RE */
-
-PERL_STATIC_INLINE bool
-S_invlist_is_iterating(const SV* const invlist)
-{
-    PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
-
-    /* get_invlist_iter_addr()'s sv is non-const only because it returns a
-     * value that can be used to modify the invlist, it doesn't modify the
-     * invlist itself */
-    return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX;
-}
-
-#ifndef PERL_IN_XSUB_RE
-
-PERL_STATIC_INLINE UV
-S_invlist_max(const SV* const invlist)
-{
-    /* Returns the maximum number of elements storable in the inversion list's
-     * array, without having to realloc() */
-
-    PERL_ARGS_ASSERT_INVLIST_MAX;
-
-    assert(is_invlist(invlist));
-
-    /* Assumes worst case, in which the 0 element is not counted in the
-     * inversion list, so subtracts 1 for that */
-    return SvLEN(invlist) == 0  /* This happens under _new_invlist_C_array */
-           ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
-           : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
-}
-
-STATIC void
-S_initialize_invlist_guts(pTHX_ SV* invlist, const Size_t initial_size)
-{
-    PERL_ARGS_ASSERT_INITIALIZE_INVLIST_GUTS;
-
-    /* First 1 is in case the zero element isn't in the list; second 1 is for
-     * trailing NUL */
-    SvGROW(invlist, TO_INTERNAL_SIZE(initial_size + 1) + 1);
-    invlist_set_len(invlist, 0, 0);
-
-    /* Force iterinit() to be used to get iteration to work */
-    invlist_iterfinish(invlist);
-
-    *get_invlist_previous_index_addr(invlist) = 0;
-    SvPOK_on(invlist);  /* This allows B to extract the PV */
-}
-
-SV*
-Perl__new_invlist(pTHX_ IV initial_size)
-{
-
-    /* Return a pointer to a newly constructed inversion list, with enough
-     * space to store 'initial_size' elements.  If that number is negative, a
-     * system default is used instead */
-
-    SV* new_list;
-
-    if (initial_size < 0) {
-        initial_size = 10;
-    }
-
-    new_list = newSV_type(SVt_INVLIST);
-    initialize_invlist_guts(new_list, initial_size);
-
-    return new_list;
-}
-
-SV*
-Perl__new_invlist_C_array(pTHX_ const UV* const list)
-{
-    /* Return a pointer to a newly constructed inversion list, initialized to
-     * point to <list>, which has to be in the exact correct inversion list
-     * form, including internal fields.  Thus this is a dangerous routine that
-     * should not be used in the wrong hands.  The passed in 'list' contains
-     * several header fields at the beginning that are not part of the
-     * inversion list body proper */
-
-    const STRLEN length = (STRLEN) list[0];
-    const UV version_id =          list[1];
-    const bool offset   =    cBOOL(list[2]);
-#define HEADER_LENGTH 3
-    /* If any of the above changes in any way, you must change HEADER_LENGTH
-     * (if appropriate) and regenerate INVLIST_VERSION_ID by running
-     *      perl -E 'say int(rand 2**31-1)'
-     */
-#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
-                                        data structure type, so that one being
-                                        passed in can be validated to be an
-                                        inversion list of the correct vintage.
-                                       */
-
-    SV* invlist = newSV_type(SVt_INVLIST);
-
-    PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
-
-    if (version_id != INVLIST_VERSION_ID) {
-        Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
-    }
-
-    /* The generated array passed in includes header elements that aren't part
-     * of the list proper, so start it just after them */
-    SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
-
-    SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
-                               shouldn't touch it */
-
-    *(get_invlist_offset_addr(invlist)) = offset;
-
-    /* The 'length' passed to us is the physical number of elements in the
-     * inversion list.  But if there is an offset the logical number is one
-     * less than that */
-    invlist_set_len(invlist, length  - offset, offset);
-
-    invlist_set_previous_index(invlist, 0);
-
-    /* Initialize the iteration pointer. */
-    invlist_iterfinish(invlist);
-
-    SvREADONLY_on(invlist);
-    SvPOK_on(invlist);
-
-    return invlist;
-}
-
-STATIC void
-S__append_range_to_invlist(pTHX_ SV* const invlist,
-                                 const UV start, const UV end)
-{
-   /* Subject to change or removal.  Append the range from 'start' to 'end' at
-    * the end of the inversion list.  The range must be above any existing
-    * ones. */
-
-    UV* array;
-    UV max = invlist_max(invlist);
-    UV len = _invlist_len(invlist);
-    bool offset;
-
-    PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
-
-    if (len == 0) { /* Empty lists must be initialized */
-        offset = start != 0;
-        array = _invlist_array_init(invlist, ! offset);
-    }
-    else {
-        /* Here, the existing list is non-empty. The current max entry in the
-         * list is generally the first value not in the set, except when the
-         * set extends to the end of permissible values, in which case it is
-         * the first entry in that final set, and so this call is an attempt to
-         * append out-of-order */
-
-        UV final_element = len - 1;
-        array = invlist_array(invlist);
-        if (   array[final_element] > start
-            || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
-        {
-            Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
-                     array[final_element], start,
-                     ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
-        }
-
-        /* Here, it is a legal append.  If the new range begins 1 above the end
-         * of the range below it, it is extending the range below it, so the
-         * new first value not in the set is one greater than the newly
-         * extended range.  */
-        offset = *get_invlist_offset_addr(invlist);
-        if (array[final_element] == start) {
-            if (end != UV_MAX) {
-                array[final_element] = end + 1;
-            }
-            else {
-                /* But if the end is the maximum representable on the machine,
-                 * assume that infinity was actually what was meant.  Just let
-                 * the range that this would extend to have no end */
-                invlist_set_len(invlist, len - 1, offset);
-            }
-            return;
-        }
-    }
-
-    /* Here the new range doesn't extend any existing set.  Add it */
-
-    len += 2;   /* Includes an element each for the start and end of range */
-
-    /* If wll overflow the existing space, extend, which may cause the array to
-     * be moved */
-    if (max < len) {
-        invlist_extend(invlist, len);
-
-        /* Have to set len here to avoid assert failure in invlist_array() */
-        invlist_set_len(invlist, len, offset);
-
-        array = invlist_array(invlist);
-    }
-    else {
-        invlist_set_len(invlist, len, offset);
-    }
-
-    /* The next item on the list starts the range, the one after that is
-     * one past the new range.  */
-    array[len - 2] = start;
-    if (end != UV_MAX) {
-        array[len - 1] = end + 1;
-    }
-    else {
-        /* But if the end is the maximum representable on the machine, just let
-         * the range have no end */
-        invlist_set_len(invlist, len - 1, offset);
-    }
-}
-
-SSize_t
-Perl__invlist_search(SV* const invlist, const UV cp)
-{
-    /* Searches the inversion list for the entry that contains the input code
-     * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
-     * return value is the index into the list's array of the range that
-     * contains <cp>, that is, 'i' such that
-     *  array[i] <= cp < array[i+1]
-     */
-
-    IV low = 0;
-    IV mid;
-    IV high = _invlist_len(invlist);
-    const IV highest_element = high - 1;
-    const UV* array;
-
-    PERL_ARGS_ASSERT__INVLIST_SEARCH;
-
-    /* If list is empty, return failure. */
-    if (UNLIKELY(high == 0)) {
-        return -1;
-    }
-
-    /* (We can't get the array unless we know the list is non-empty) */
-    array = invlist_array(invlist);
-
-    mid = invlist_previous_index(invlist);
-    assert(mid >=0);
-    if (UNLIKELY(mid > highest_element)) {
-        mid = highest_element;
-    }
-
-    /* <mid> contains the cache of the result of the previous call to this
-     * function (0 the first time).  See if this call is for the same result,
-     * or if it is for mid-1.  This is under the theory that calls to this
-     * function will often be for related code points that are near each other.
-     * And benchmarks show that caching gives better results.  We also test
-     * here if the code point is within the bounds of the list.  These tests
-     * replace others that would have had to be made anyway to make sure that
-     * the array bounds were not exceeded, and these give us extra information
-     * at the same time */
-    if (cp >= array[mid]) {
-        if (cp >= array[highest_element]) {
-            return highest_element;
-        }
-
-        /* Here, array[mid] <= cp < array[highest_element].  This means that
-         * the final element is not the answer, so can exclude it; it also
-         * means that <mid> is not the final element, so can refer to 'mid + 1'
-         * safely */
-        if (cp < array[mid + 1]) {
-            return mid;
-        }
-        high--;
-        low = mid + 1;
-    }
-    else { /* cp < aray[mid] */
-        if (cp < array[0]) { /* Fail if outside the array */
-            return -1;
-        }
-        high = mid;
-        if (cp >= array[mid - 1]) {
-            goto found_entry;
-        }
-    }
-
-    /* Binary search.  What we are looking for is <i> such that
-     *  array[i] <= cp < array[i+1]
-     * The loop below converges on the i+1.  Note that there may not be an
-     * (i+1)th element in the array, and things work nonetheless */
-    while (low < high) {
-        mid = (low + high) / 2;
-        assert(mid <= highest_element);
-        if (array[mid] <= cp) { /* cp >= array[mid] */
-            low = mid + 1;
-
-            /* We could do this extra test to exit the loop early.
-            if (cp < array[low]) {
-                return mid;
-            }
-            */
-        }
-        else { /* cp < array[mid] */
-            high = mid;
-        }
-    }
-
-  found_entry:
-    high--;
-    invlist_set_previous_index(invlist, high);
-    return high;
-}
-
-void
-Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
-                                         const bool complement_b, SV** output)
-{
-    /* Take the union of two inversion lists and point '*output' to it.  On
-     * input, '*output' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
-     * even 'a' or 'b').  If to an inversion list, the contents of the original
-     * list will be replaced by the union.  The first list, 'a', may be
-     * NULL, in which case a copy of the second list is placed in '*output'.
-     * If 'complement_b' is TRUE, the union is taken of the complement
-     * (inversion) of 'b' instead of b itself.
-     *
-     * The basis for this comes from "Unicode Demystified" Chapter 13 by
-     * Richard Gillam, published by Addison-Wesley, and explained at some
-     * length there.  The preface says to incorporate its examples into your
-     * code at your own risk.
-     *
-     * The algorithm is like a merge sort. */
-
-    const UV* array_a;    /* a's array */
-    const UV* array_b;
-    UV len_a;       /* length of a's array */
-    UV len_b;
-
-    SV* u;                      /* the resulting union */
-    UV* array_u;
-    UV len_u = 0;
-
-    UV i_a = 0;             /* current index into a's array */
-    UV i_b = 0;
-    UV i_u = 0;
-
-    /* running count, as explained in the algorithm source book; items are
-     * stopped accumulating and are output when the count changes to/from 0.
-     * The count is incremented when we start a range that's in an input's set,
-     * and decremented when we start a range that's not in a set.  So this
-     * variable can be 0, 1, or 2.  When it is 0 neither input is in their set,
-     * and hence nothing goes into the union; 1, just one of the inputs is in
-     * its set (and its current range gets added to the union); and 2 when both
-     * inputs are in their sets.  */
-    UV count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
-    assert(a != b);
-    assert(*output == NULL || is_invlist(*output));
-
-    len_b = _invlist_len(b);
-    if (len_b == 0) {
-
-        /* Here, 'b' is empty, hence it's complement is all possible code
-         * points.  So if the union includes the complement of 'b', it includes
-         * everything, and we need not even look at 'a'.  It's easiest to
-         * create a new inversion list that matches everything.  */
-        if (complement_b) {
-            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
-
-            if (*output == NULL) { /* If the output didn't exist, just point it
-                                      at the new list */
-                *output = everything;
-            }
-            else { /* Otherwise, replace its contents with the new list */
-                invlist_replace_list_destroys_src(*output, everything);
-                SvREFCNT_dec_NN(everything);
-            }
-
-            return;
-        }
-
-        /* Here, we don't want the complement of 'b', and since 'b' is empty,
-         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
-         * output will be empty */
-
-        if (a == NULL || _invlist_len(a) == 0) {
-            if (*output == NULL) {
-                *output = _new_invlist(0);
-            }
-            else {
-                invlist_clear(*output);
-            }
-            return;
-        }
-
-        /* Here, 'a' is not empty, but 'b' is, so 'a' entirely determines the
-         * union.  We can just return a copy of 'a' if '*output' doesn't point
-         * to an existing list */
-        if (*output == NULL) {
-            *output = invlist_clone(a, NULL);
-            return;
-        }
-
-        /* If the output is to overwrite 'a', we have a no-op, as it's
-         * already in 'a' */
-        if (*output == a) {
-            return;
-        }
-
-        /* Here, '*output' is to be overwritten by 'a' */
-        u = invlist_clone(a, NULL);
-        invlist_replace_list_destroys_src(*output, u);
-        SvREFCNT_dec_NN(u);
-
-        return;
-    }
-
-    /* Here 'b' is not empty.  See about 'a' */
-
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-
-        /* Here, 'a' is empty (and b is not).  That means the union will come
-         * entirely from 'b'.  If '*output' is NULL, we can directly return a
-         * clone of 'b'.  Otherwise, we replace the contents of '*output' with
-         * the clone */
-
-        SV ** dest = (*output == NULL) ? output : &u;
-        *dest = invlist_clone(b, NULL);
-        if (complement_b) {
-            _invlist_invert(*dest);
-        }
-
-        if (dest == &u) {
-            invlist_replace_list_destroys_src(*output, u);
-            SvREFCNT_dec_NN(u);
-        }
-
-        return;
-    }
-
-    /* Here both lists exist and are non-empty */
-    array_a = invlist_array(a);
-    array_b = invlist_array(b);
-
-    /* If are to take the union of 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* To complement, we invert: if the first element is 0, remove it.  To
-         * do this, we just pretend the array starts one later */
-        if (array_b[0] == 0) {
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    /* Size the union for the worst case: that the sets are completely
-     * disjoint */
-    u = _new_invlist(len_a + len_b);
-
-    /* Will contain U+0000 if either component does */
-    array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
-                                      || (len_b > 0 && array_b[0] == 0));
-
-    /* Go through each input list item by item, stopping when have exhausted
-     * one of them */
-    while (i_a < len_a && i_b < len_b) {
-        UV cp;      /* The element to potentially add to the union's array */
-        bool cp_in_set;   /* is it in the input list's set or not */
-
-        /* We need to take one or the other of the two inputs for the union.
-         * Since we are merging two sorted lists, we take the smaller of the
-         * next items.  In case of a tie, we take first the one that is in its
-         * set.  If we first took the one not in its set, it would decrement
-         * the count, possibly to 0 which would cause it to be output as ending
-         * the range, and the next time through we would take the same number,
-         * and output it again as beginning the next range.  By doing it the
-         * opposite way, there is no possibility that the count will be
-         * momentarily decremented to 0, and thus the two adjoining ranges will
-         * be seamlessly merged.  (In a tie and both are in the set or both not
-         * in the set, it doesn't matter which we take first.) */
-        if (       array_a[i_a] < array_b[i_b]
-            || (   array_a[i_a] == array_b[i_b]
-                && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
-        {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-            cp = array_a[i_a++];
-        }
-        else {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
-            cp = array_b[i_b++];
-        }
-
-        /* Here, have chosen which of the two inputs to look at.  Only output
-         * if the running count changes to/from 0, which marks the
-         * beginning/end of a range that's in the set */
-        if (cp_in_set) {
-            if (count == 0) {
-                array_u[i_u++] = cp;
-            }
-            count++;
-        }
-        else {
-            count--;
-            if (count == 0) {
-                array_u[i_u++] = cp;
-            }
-        }
-    }
-
-
-    /* The loop above increments the index into exactly one of the input lists
-     * each iteration, and ends when either index gets to its list end.  That
-     * means the other index is lower than its end, and so something is
-     * remaining in that one.  We decrement 'count', as explained below, if
-     * that list is in its set.  (i_a and i_b each currently index the element
-     * beyond the one we care about.) */
-    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-        || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
-    {
-        count--;
-    }
-
-    /* Above we decremented 'count' if the list that had unexamined elements in
-     * it was in its set.  This has made it so that 'count' being non-zero
-     * means there isn't anything left to output; and 'count' equal to 0 means
-     * that what is left to output is precisely that which is left in the
-     * non-exhausted input list.
-     *
-     * To see why, note first that the exhausted input obviously has nothing
-     * left to add to the union.  If it was in its set at its end, that means
-     * the set extends from here to the platform's infinity, and hence so does
-     * the union and the non-exhausted set is irrelevant.  The exhausted set
-     * also contributed 1 to 'count'.  If 'count' was 2, it got decremented to
-     * 1, but if it was 1, the non-exhausted set wasn't in its set, and so
-     * 'count' remains at 1.  This is consistent with the decremented 'count'
-     * != 0 meaning there's nothing left to add to the union.
-     *
-     * But if the exhausted input wasn't in its set, it contributed 0 to
-     * 'count', and the rest of the union will be whatever the other input is.
-     * If 'count' was 0, neither list was in its set, and 'count' remains 0;
-     * otherwise it gets decremented to 0.  This is consistent with 'count'
-     * == 0 meaning the remainder of the union is whatever is left in the
-     * non-exhausted list. */
-    if (count != 0) {
-        len_u = i_u;
-    }
-    else {
-        IV copy_count = len_a - i_a;
-        if (copy_count > 0) {   /* The non-exhausted input is 'a' */
-            Copy(array_a + i_a, array_u + i_u, copy_count, UV);
-        }
-        else { /* The non-exhausted input is b */
-            copy_count = len_b - i_b;
-            Copy(array_b + i_b, array_u + i_u, copy_count, UV);
-        }
-        len_u = i_u + copy_count;
-    }
-
-    /* Set the result to the final length, which can change the pointer to
-     * array_u, so re-find it.  (Note that it is unlikely that this will
-     * change, as we are shrinking the space, not enlarging it) */
-    if (len_u != _invlist_len(u)) {
-        invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
-        invlist_trim(u);
-        array_u = invlist_array(u);
-    }
-
-    if (*output == NULL) {  /* Simply return the new inversion list */
-        *output = u;
-    }
-    else {
-        /* Otherwise, overwrite the inversion list that was in '*output'.  We
-         * could instead free '*output', and then set it to 'u', but experience
-         * has shown [perl #127392] that if the input is a mortal, we can get a
-         * huge build-up of these during regex compilation before they get
-         * freed. */
-        invlist_replace_list_destroys_src(*output, u);
-        SvREFCNT_dec_NN(u);
-    }
-
-    return;
-}
-
-void
-Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
-                                               const bool complement_b, SV** i)
-{
-    /* Take the intersection of two inversion lists and point '*i' to it.  On
-     * input, '*i' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
-     * even 'a' or 'b').  If to an inversion list, the contents of the original
-     * list will be replaced by the intersection.  The first list, 'a', may be
-     * NULL, in which case '*i' will be an empty list.  If 'complement_b' is
-     * TRUE, the result will be the intersection of 'a' and the complement (or
-     * inversion) of 'b' instead of 'b' directly.
-     *
-     * The basis for this comes from "Unicode Demystified" Chapter 13 by
-     * Richard Gillam, published by Addison-Wesley, and explained at some
-     * length there.  The preface says to incorporate its examples into your
-     * code at your own risk.  In fact, it had bugs
-     *
-     * The algorithm is like a merge sort, and is essentially the same as the
-     * union above
-     */
-
-    const UV* array_a;          /* a's array */
-    const UV* array_b;
-    UV len_a;   /* length of a's array */
-    UV len_b;
-
-    SV* r;                   /* the resulting intersection */
-    UV* array_r;
-    UV len_r = 0;
-
-    UV i_a = 0;             /* current index into a's array */
-    UV i_b = 0;
-    UV i_r = 0;
-
-    /* running count of how many of the two inputs are postitioned at ranges
-     * that are in their sets.  As explained in the algorithm source book,
-     * items are stopped accumulating and are output when the count changes
-     * to/from 2.  The count is incremented when we start a range that's in an
-     * input's set, and decremented when we start a range that's not in a set.
-     * Only when it is 2 are we in the intersection. */
-    UV count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
-    assert(a != b);
-    assert(*i == NULL || is_invlist(*i));
-
-    /* Special case if either one is empty */
-    len_a = (a == NULL) ? 0 : _invlist_len(a);
-    if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
-        if (len_a != 0 && complement_b) {
-
-            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
-             * must be empty.  Here, also we are using 'b's complement, which
-             * hence must be every possible code point.  Thus the intersection
-             * is simply 'a'. */
-
-            if (*i == a) {  /* No-op */
-                return;
-            }
-
-            if (*i == NULL) {
-                *i = invlist_clone(a, NULL);
-                return;
-            }
-
-            r = invlist_clone(a, NULL);
-            invlist_replace_list_destroys_src(*i, r);
-            SvREFCNT_dec_NN(r);
-            return;
-        }
-
-        /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
-         * intersection must be empty */
-        if (*i == NULL) {
-            *i = _new_invlist(0);
-            return;
-        }
-
-        invlist_clear(*i);
-        return;
-    }
-
-    /* Here both lists exist and are non-empty */
-    array_a = invlist_array(a);
-    array_b = invlist_array(b);
-
-    /* If are to take the intersection of 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* To complement, we invert: if the first element is 0, remove it.  To
-         * do this, we just pretend the array starts one later */
-        if (array_b[0] == 0) {
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    /* Size the intersection for the worst case: that the intersection ends up
-     * fragmenting everything to be completely disjoint */
-    r= _new_invlist(len_a + len_b);
-
-    /* Will contain U+0000 iff both components do */
-    array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
-                                     && len_b > 0 && array_b[0] == 0);
-
-    /* Go through each list item by item, stopping when have exhausted one of
-     * them */
-    while (i_a < len_a && i_b < len_b) {
-        UV cp;      /* The element to potentially add to the intersection's
-                       array */
-        bool cp_in_set; /* Is it in the input list's set or not */
-
-        /* We need to take one or the other of the two inputs for the
-         * intersection.  Since we are merging two sorted lists, we take the
-         * smaller of the next items.  In case of a tie, we take first the one
-         * that is not in its set (a difference from the union algorithm).  If
-         * we first took the one in its set, it would increment the count,
-         * possibly to 2 which would cause it to be output as starting a range
-         * in the intersection, and the next time through we would take that
-         * same number, and output it again as ending the set.  By doing the
-         * opposite of this, there is no possibility that the count will be
-         * momentarily incremented to 2.  (In a tie and both are in the set or
-         * both not in the set, it doesn't matter which we take first.) */
-        if (       array_a[i_a] < array_b[i_b]
-            || (   array_a[i_a] == array_b[i_b]
-                && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
-        {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-            cp = array_a[i_a++];
-        }
-        else {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
-            cp= array_b[i_b++];
-        }
-
-        /* Here, have chosen which of the two inputs to look at.  Only output
-         * if the running count changes to/from 2, which marks the
-         * beginning/end of a range that's in the intersection */
-        if (cp_in_set) {
-            count++;
-            if (count == 2) {
-                array_r[i_r++] = cp;
-            }
-        }
-        else {
-            if (count == 2) {
-                array_r[i_r++] = cp;
-            }
-            count--;
-        }
-
-    }
-
-    /* The loop above increments the index into exactly one of the input lists
-     * each iteration, and ends when either index gets to its list end.  That
-     * means the other index is lower than its end, and so something is
-     * remaining in that one.  We increment 'count', as explained below, if the
-     * exhausted list was in its set.  (i_a and i_b each currently index the
-     * element beyond the one we care about.) */
-    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-        || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
-    {
-        count++;
-    }
-
-    /* Above we incremented 'count' if the exhausted list was in its set.  This
-     * has made it so that 'count' being below 2 means there is nothing left to
-     * output; otheriwse what's left to add to the intersection is precisely
-     * that which is left in the non-exhausted input list.
-     *
-     * To see why, note first that the exhausted input obviously has nothing
-     * left to affect the intersection.  If it was in its set at its end, that
-     * means the set extends from here to the platform's infinity, and hence
-     * anything in the non-exhausted's list will be in the intersection, and
-     * anything not in it won't be.  Hence, the rest of the intersection is
-     * precisely what's in the non-exhausted list  The exhausted set also
-     * contributed 1 to 'count', meaning 'count' was at least 1.  Incrementing
-     * it means 'count' is now at least 2.  This is consistent with the
-     * incremented 'count' being >= 2 means to add the non-exhausted list to
-     * the intersection.
-     *
-     * But if the exhausted input wasn't in its set, it contributed 0 to
-     * 'count', and the intersection can't include anything further; the
-     * non-exhausted set is irrelevant.  'count' was at most 1, and doesn't get
-     * incremented.  This is consistent with 'count' being < 2 meaning nothing
-     * further to add to the intersection. */
-    if (count < 2) { /* Nothing left to put in the intersection. */
-        len_r = i_r;
-    }
-    else { /* copy the non-exhausted list, unchanged. */
-        IV copy_count = len_a - i_a;
-        if (copy_count > 0) {   /* a is the one with stuff left */
-            Copy(array_a + i_a, array_r + i_r, copy_count, UV);
-        }
-        else {  /* b is the one with stuff left */
-            copy_count = len_b - i_b;
-            Copy(array_b + i_b, array_r + i_r, copy_count, UV);
-        }
-        len_r = i_r + copy_count;
-    }
-
-    /* Set the result to the final length, which can change the pointer to
-     * array_r, so re-find it.  (Note that it is unlikely that this will
-     * change, as we are shrinking the space, not enlarging it) */
-    if (len_r != _invlist_len(r)) {
-        invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
-        invlist_trim(r);
-        array_r = invlist_array(r);
-    }
-
-    if (*i == NULL) { /* Simply return the calculated intersection */
-        *i = r;
-    }
-    else { /* Otherwise, replace the existing inversion list in '*i'.  We could
-              instead free '*i', and then set it to 'r', but experience has
-              shown [perl #127392] that if the input is a mortal, we can get a
-              huge build-up of these during regex compilation before they get
-              freed. */
-        if (len_r) {
-            invlist_replace_list_destroys_src(*i, r);
-        }
-        else {
-            invlist_clear(*i);
-        }
-        SvREFCNT_dec_NN(r);
-    }
-
-    return;
-}
-
-SV*
-Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
-{
-    /* Add the range from 'start' to 'end' inclusive to the inversion list's
-     * set.  A pointer to the inversion list is returned.  This may actually be
-     * a new list, in which case the passed in one has been destroyed.  The
-     * passed-in inversion list can be NULL, in which case a new one is created
-     * with just the one range in it.  The new list is not necessarily
-     * NUL-terminated.  Space is not freed if the inversion list shrinks as a
-     * result of this function.  The gain would not be large, and in many
-     * cases, this is called multiple times on a single inversion list, so
-     * anything freed may almost immediately be needed again.
-     *
-     * This used to mostly call the 'union' routine, but that is much more
-     * heavyweight than really needed for a single range addition */
-
-    UV* array;              /* The array implementing the inversion list */
-    UV len;                 /* How many elements in 'array' */
-    SSize_t i_s;            /* index into the invlist array where 'start'
-                               should go */
-    SSize_t i_e = 0;        /* And the index where 'end' should go */
-    UV cur_highest;         /* The highest code point in the inversion list
-                               upon entry to this function */
-
-    /* This range becomes the whole inversion list if none already existed */
-    if (invlist == NULL) {
-        invlist = _new_invlist(2);
-        _append_range_to_invlist(invlist, start, end);
-        return invlist;
-    }
-
-    /* Likewise, if the inversion list is currently empty */
-    len = _invlist_len(invlist);
-    if (len == 0) {
-        _append_range_to_invlist(invlist, start, end);
-        return invlist;
-    }
-
-    /* Starting here, we have to know the internals of the list */
-    array = invlist_array(invlist);
-
-    /* If the new range ends higher than the current highest ... */
-    cur_highest = invlist_highest(invlist);
-    if (end > cur_highest) {
-
-        /* If the whole range is higher, we can just append it */
-        if (start > cur_highest) {
-            _append_range_to_invlist(invlist, start, end);
-            return invlist;
-        }
-
-        /* Otherwise, add the portion that is higher ... */
-        _append_range_to_invlist(invlist, cur_highest + 1, end);
-
-        /* ... and continue on below to handle the rest.  As a result of the
-         * above append, we know that the index of the end of the range is the
-         * final even numbered one of the array.  Recall that the final element
-         * always starts a range that extends to infinity.  If that range is in
-         * the set (meaning the set goes from here to infinity), it will be an
-         * even index, but if it isn't in the set, it's odd, and the final
-         * range in the set is one less, which is even. */
-        if (end == UV_MAX) {
-            i_e = len;
-        }
-        else {
-            i_e = len - 2;
-        }
-    }
-
-    /* We have dealt with appending, now see about prepending.  If the new
-     * range starts lower than the current lowest ... */
-    if (start < array[0]) {
-
-        /* Adding something which has 0 in it is somewhat tricky, and uncommon.
-         * Let the union code handle it, rather than having to know the
-         * trickiness in two code places.  */
-        if (UNLIKELY(start == 0)) {
-            SV* range_invlist;
-
-            range_invlist = _new_invlist(2);
-            _append_range_to_invlist(range_invlist, start, end);
-
-            _invlist_union(invlist, range_invlist, &invlist);
-
-            SvREFCNT_dec_NN(range_invlist);
-
-            return invlist;
-        }
-
-        /* If the whole new range comes before the first entry, and doesn't
-         * extend it, we have to insert it as an additional range */
-        if (end < array[0] - 1) {
-            i_s = i_e = -1;
-            goto splice_in_new_range;
-        }
-
-        /* Here the new range adjoins the existing first range, extending it
-         * downwards. */
-        array[0] = start;
-
-        /* And continue on below to handle the rest.  We know that the index of
-         * the beginning of the range is the first one of the array */
-        i_s = 0;
-    }
-    else { /* Not prepending any part of the new range to the existing list.
-            * Find where in the list it should go.  This finds i_s, such that:
-            *     invlist[i_s] <= start < array[i_s+1]
-            */
-        i_s = _invlist_search(invlist, start);
-    }
-
-    /* At this point, any extending before the beginning of the inversion list
-     * and/or after the end has been done.  This has made it so that, in the
-     * code below, each endpoint of the new range is either in a range that is
-     * in the set, or is in a gap between two ranges that are.  This means we
-     * don't have to worry about exceeding the array bounds.
-     *
-     * Find where in the list the new range ends (but we can skip this if we
-     * have already determined what it is, or if it will be the same as i_s,
-     * which we already have computed) */
-    if (i_e == 0) {
-        i_e = (start == end)
-              ? i_s
-              : _invlist_search(invlist, end);
-    }
-
-    /* Here generally invlist[i_e] <= end < array[i_e+1].  But if invlist[i_e]
-     * is a range that goes to infinity there is no element at invlist[i_e+1],
-     * so only the first relation holds. */
-
-    if ( ! ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
-
-        /* Here, the ranges on either side of the beginning of the new range
-         * are in the set, and this range starts in the gap between them.
-         *
-         * The new range extends the range above it downwards if the new range
-         * ends at or above that range's start */
-        const bool extends_the_range_above = (   end == UV_MAX
-                                              || end + 1 >= array[i_s+1]);
-
-        /* The new range extends the range below it upwards if it begins just
-         * after where that range ends */
-        if (start == array[i_s]) {
-
-            /* If the new range fills the entire gap between the other ranges,
-             * they will get merged together.  Other ranges may also get
-             * merged, depending on how many of them the new range spans.  In
-             * the general case, we do the merge later, just once, after we
-             * figure out how many to merge.  But in the case where the new
-             * range exactly spans just this one gap (possibly extending into
-             * the one above), we do the merge here, and an early exit.  This
-             * is done here to avoid having to special case later. */
-            if (i_e - i_s <= 1) {
-
-                /* If i_e - i_s == 1, it means that the new range terminates
-                 * within the range above, and hence 'extends_the_range_above'
-                 * must be true.  (If the range above it extends to infinity,
-                 * 'i_s+2' will be above the array's limit, but 'len-i_s-2'
-                 * will be 0, so no harm done.) */
-                if (extends_the_range_above) {
-                    Move(array + i_s + 2, array + i_s, len - i_s - 2, UV);
-                    invlist_set_len(invlist,
-                                    len - 2,
-                                    *(get_invlist_offset_addr(invlist)));
-                    return invlist;
-                }
-
-                /* Here, i_e must == i_s.  We keep them in sync, as they apply
-                 * to the same range, and below we are about to decrement i_s
-                 * */
-                i_e--;
-            }
-
-            /* Here, the new range is adjacent to the one below.  (It may also
-             * span beyond the range above, but that will get resolved later.)
-             * Extend the range below to include this one. */
-            array[i_s] = (end == UV_MAX) ? UV_MAX : end + 1;
-            i_s--;
-            start = array[i_s];
-        }
-        else if (extends_the_range_above) {
-
-            /* Here the new range only extends the range above it, but not the
-             * one below.  It merges with the one above.  Again, we keep i_e
-             * and i_s in sync if they point to the same range */
-            if (i_e == i_s) {
-                i_e++;
-            }
-            i_s++;
-            array[i_s] = start;
-        }
-    }
-
-    /* Here, we've dealt with the new range start extending any adjoining
-     * existing ranges.
-     *
-     * If the new range extends to infinity, it is now the final one,
-     * regardless of what was there before */
-    if (UNLIKELY(end == UV_MAX)) {
-        invlist_set_len(invlist, i_s + 1, *(get_invlist_offset_addr(invlist)));
-        return invlist;
-    }
-
-    /* If i_e started as == i_s, it has also been dealt with,
-     * and been updated to the new i_s, which will fail the following if */
-    if (! ELEMENT_RANGE_MATCHES_INVLIST(i_e)) {
-
-        /* Here, the ranges on either side of the end of the new range are in
-         * the set, and this range ends in the gap between them.
-         *
-         * If this range is adjacent to (hence extends) the range above it, it
-         * becomes part of that range; likewise if it extends the range below,
-         * it becomes part of that range */
-        if (end + 1 == array[i_e+1]) {
-            i_e++;
-            array[i_e] = start;
-        }
-        else if (start <= array[i_e]) {
-            array[i_e] = end + 1;
-            i_e--;
-        }
-    }
-
-    if (i_s == i_e) {
-
-        /* If the range fits entirely in an existing range (as possibly already
-         * extended above), it doesn't add anything new */
-        if (ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
-            return invlist;
-        }
-
-        /* Here, no part of the range is in the list.  Must add it.  It will
-         * occupy 2 more slots */
-      splice_in_new_range:
-
-        invlist_extend(invlist, len + 2);
-        array = invlist_array(invlist);
-        /* Move the rest of the array down two slots. Don't include any
-         * trailing NUL */
-        Move(array + i_e + 1, array + i_e + 3, len - i_e - 1, UV);
-
-        /* Do the actual splice */
-        array[i_e+1] = start;
-        array[i_e+2] = end + 1;
-        invlist_set_len(invlist, len + 2, *(get_invlist_offset_addr(invlist)));
-        return invlist;
-    }
-
-    /* Here the new range crossed the boundaries of a pre-existing range.  The
-     * code above has adjusted things so that both ends are in ranges that are
-     * in the set.  This means everything in between must also be in the set.
-     * Just squash things together */
-    Move(array + i_e + 1, array + i_s + 1, len - i_e - 1, UV);
-    invlist_set_len(invlist,
-                    len - i_e + i_s,
-                    *(get_invlist_offset_addr(invlist)));
-
-    return invlist;
-}
-
-SV*
-Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
-                                 UV** other_elements_ptr)
-{
-    /* Create and return an inversion list whose contents are to be populated
-     * by the caller.  The caller gives the number of elements (in 'size') and
-     * the very first element ('element0').  This function will set
-     * '*other_elements_ptr' to an array of UVs, where the remaining elements
-     * are to be placed.
-     *
-     * Obviously there is some trust involved that the caller will properly
-     * fill in the other elements of the array.
-     *
-     * (The first element needs to be passed in, as the underlying code does
-     * things differently depending on whether it is zero or non-zero) */
-
-    SV* invlist = _new_invlist(size);
-    bool offset;
-
-    PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
-
-    invlist = add_cp_to_invlist(invlist, element0);
-    offset = *get_invlist_offset_addr(invlist);
-
-    invlist_set_len(invlist, size, offset);
-    *other_elements_ptr = invlist_array(invlist) + 1;
-    return invlist;
-}
-
-#endif
-
-#ifndef PERL_IN_XSUB_RE
-void
-Perl__invlist_invert(pTHX_ SV* const invlist)
-{
-    /* Complement the input inversion list.  This adds a 0 if the list didn't
-     * have a zero; removes it otherwise.  As described above, the data
-     * structure is set up so that this is very efficient */
-
-    PERL_ARGS_ASSERT__INVLIST_INVERT;
-
-    assert(! invlist_is_iterating(invlist));
-
-    /* The inverse of matching nothing is matching everything */
-    if (_invlist_len(invlist) == 0) {
-        _append_range_to_invlist(invlist, 0, UV_MAX);
-        return;
-    }
-
-    *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
-}
-
-SV*
-Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
-{
-    /* Return a new inversion list that is a copy of the input one, which is
-     * unchanged.  The new list will not be mortal even if the old one was. */
-
-    const STRLEN nominal_length = _invlist_len(invlist);
-    const STRLEN physical_length = SvCUR(invlist);
-    const bool offset = *(get_invlist_offset_addr(invlist));
-
-    PERL_ARGS_ASSERT_INVLIST_CLONE;
-
-    if (new_invlist == NULL) {
-        new_invlist = _new_invlist(nominal_length);
-    }
-    else {
-        sv_upgrade(new_invlist, SVt_INVLIST);
-        initialize_invlist_guts(new_invlist, nominal_length);
-    }
-
-    *(get_invlist_offset_addr(new_invlist)) = offset;
-    invlist_set_len(new_invlist, nominal_length, offset);
-    Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
-
-    return new_invlist;
-}
-
-#endif
-
-PERL_STATIC_INLINE UV
-S_invlist_lowest(SV* const invlist)
-{
-    /* Returns the lowest code point that matches an inversion list.  This API
-     * has an ambiguity, as it returns 0 under either the lowest is actually
-     * 0, or if the list is empty.  If this distinction matters to you, check
-     * for emptiness before calling this function */
-
-    UV len = _invlist_len(invlist);
-    UV *array;
-
-    PERL_ARGS_ASSERT_INVLIST_LOWEST;
-
-    if (len == 0) {
-        return 0;
-    }
-
-    array = invlist_array(invlist);
-
-    return array[0];
-}
-
-STATIC SV *
-S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
-{
-    /* Get the contents of an inversion list into a string SV so that they can
-     * be printed out.  If 'traditional_style' is TRUE, it uses the format
-     * traditionally done for debug tracing; otherwise it uses a format
-     * suitable for just copying to the output, with blanks between ranges and
-     * a dash between range components */
-
-    UV start, end;
-    SV* output;
-    const char intra_range_delimiter = (traditional_style ? '\t' : '-');
-    const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
-
-    if (traditional_style) {
-        output = newSVpvs("\n");
-    }
-    else {
-        output = newSVpvs("");
-    }
-
-    PERL_ARGS_ASSERT_INVLIST_CONTENTS;
-
-    assert(! invlist_is_iterating(invlist));
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (end == UV_MAX) {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
-                                          start, intra_range_delimiter,
-                                                 inter_range_delimiter);
-        }
-        else if (end != start) {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
-                                          start,
-                                                   intra_range_delimiter,
-                                                  end, inter_range_delimiter);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
-                                          start, inter_range_delimiter);
-        }
-    }
-
-    if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
-        SvCUR_set(output, SvCUR(output) - 1);
-    }
-
-    return output;
-}
-
-#ifndef PERL_IN_XSUB_RE
-void
-Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
-                         const char * const indent, SV* const invlist)
-{
-    /* Designed to be called only by do_sv_dump().  Dumps out the ranges of the
-     * inversion list 'invlist' to 'file' at 'level'  Each line is prefixed by
-     * the string 'indent'.  The output looks like this:
-         [0] 0x000A .. 0x000D
-         [2] 0x0085
-         [4] 0x2028 .. 0x2029
-         [6] 0x3104 .. INFTY
-     * This means that the first range of code points matched by the list are
-     * 0xA through 0xD; the second range contains only the single code point
-     * 0x85, etc.  An inversion list is an array of UVs.  Two array elements
-     * are used to define each range (except if the final range extends to
-     * infinity, only a single element is needed).  The array index of the
-     * first element for the corresponding range is given in brackets. */
-
-    UV start, end;
-    STRLEN count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_DUMP;
-
-    if (invlist_is_iterating(invlist)) {
-        Perl_dump_indent(aTHX_ level, file,
-             "%sCan't dump inversion list because is in middle of iterating\n",
-             indent);
-        return;
-    }
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (end == UV_MAX) {
-            Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n",
-                                   indent, (UV)count, start);
-        }
-        else if (end != start) {
-            Perl_dump_indent(aTHX_ level, file,
-                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
-                                indent, (UV)count, start,         end);
-        }
-        else {
-            Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
-                                            indent, (UV)count, start);
-        }
-        count += 2;
-    }
-}
-
-#endif
-
-#if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
-bool
-Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
-{
-    /* Return a boolean as to if the two passed in inversion lists are
-     * identical.  The final argument, if TRUE, says to take the complement of
-     * the second inversion list before doing the comparison */
-
-    const UV len_a = _invlist_len(a);
-    UV len_b = _invlist_len(b);
-
-    const UV* array_a = NULL;
-    const UV* array_b = NULL;
-
-    PERL_ARGS_ASSERT__INVLISTEQ;
-
-    /* This code avoids accessing the arrays unless it knows the length is
-     * non-zero */
-
-    if (len_a == 0) {
-        if (len_b == 0) {
-            return ! complement_b;
-        }
-    }
-    else {
-        array_a = invlist_array(a);
-    }
-
-    if (len_b != 0) {
-        array_b = invlist_array(b);
-    }
-
-    /* If are to compare 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* The complement of nothing is everything, so <a> would have to have
-         * just one element, starting at zero (ending at infinity) */
-        if (len_b == 0) {
-            return (len_a == 1 && array_a[0] == 0);
-        }
-        if (array_b[0] == 0) {
-
-            /* Otherwise, to complement, we invert.  Here, the first element is
-             * 0, just remove it.  To do this, we just pretend the array starts
-             * one later */
-
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    return    len_a == len_b
-           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
-
-}
-#endif
-
-/*
- * As best we can, determine the characters that can match the start of
- * the given EXACTF-ish node.  This is for use in creating ssc nodes, so there
- * can be false positive matches
- *
- * Returns the invlist as a new SV*; it is the caller's responsibility to
- * call SvREFCNT_dec() when done with it.
- */
-STATIC SV*
-S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
-{
-    const U8 * s = (U8*)STRING(node);
-    SSize_t bytelen = STR_LEN(node);
-    UV uc;
-    /* Start out big enough for 2 separate code points */
-    SV* invlist = _new_invlist(4);
-
-    PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST;
-
-    if (! UTF) {
-        uc = *s;
-
-        /* We punt and assume can match anything if the node begins
-         * with a multi-character fold.  Things are complicated.  For
-         * example, /ffi/i could match any of:
-         *  "\N{LATIN SMALL LIGATURE FFI}"
-         *  "\N{LATIN SMALL LIGATURE FF}I"
-         *  "F\N{LATIN SMALL LIGATURE FI}"
-         *  plus several other things; and making sure we have all the
-         *  possibilities is hard. */
-        if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + bytelen)) {
-            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else {
-            /* Any Latin1 range character can potentially match any
-             * other depending on the locale, and in Turkic locales, 'I' and
-             * 'i' can match U+130 and U+131 */
-            if (OP(node) == EXACTFL) {
-                _invlist_union(invlist, PL_Latin1, &invlist);
-                if (isALPHA_FOLD_EQ(uc, 'I')) {
-                    invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-                    invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-                }
-            }
-            else {
-                /* But otherwise, it matches at least itself.  We can
-                 * quickly tell if it has a distinct fold, and if so,
-                 * it matches that as well */
-                invlist = add_cp_to_invlist(invlist, uc);
-                if (IS_IN_SOME_FOLD_L1(uc))
-                    invlist = add_cp_to_invlist(invlist, PL_fold_latin1[uc]);
-            }
-
-            /* Some characters match above-Latin1 ones under /i.  This
-             * is true of EXACTFL ones when the locale is UTF-8 */
-            if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
-                && (! isASCII(uc) || ! inRANGE(OP(node), EXACTFAA,
-                                                         EXACTFAA_NO_TRIE)))
-            {
-                add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
-            }
-        }
-    }
-    else {  /* Pattern is UTF-8 */
-        U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
-        const U8* e = s + bytelen;
-        IV fc;
-
-        fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
-
-        /* The only code points that aren't folded in a UTF EXACTFish
-         * node are the problematic ones in EXACTFL nodes */
-        if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
-            /* We need to check for the possibility that this EXACTFL
-             * node begins with a multi-char fold.  Therefore we fold
-             * the first few characters of it so that we can make that
-             * check */
-            U8 *d = folded;
-            int i;
-
-            fc = -1;
-            for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
-                if (isASCII(*s)) {
-                    *(d++) = (U8) toFOLD(*s);
-                    if (fc < 0) {       /* Save the first fold */
-                        fc = *(d-1);
-                    }
-                    s++;
-                }
-                else {
-                    STRLEN len;
-                    UV fold = toFOLD_utf8_safe(s, e, d, &len);
-                    if (fc < 0) {       /* Save the first fold */
-                        fc = fold;
-                    }
-                    d += len;
-                    s += UTF8SKIP(s);
-                }
-            }
-
-            /* And set up so the code below that looks in this folded
-             * buffer instead of the node's string */
-            e = d;
-            s = folded;
-        }
-
-        /* When we reach here 's' points to the fold of the first
-         * character(s) of the node; and 'e' points to far enough along
-         * the folded string to be just past any possible multi-char
-         * fold.
-         *
-         * Like the non-UTF case above, we punt if the node begins with a
-         * multi-char fold  */
-
-        if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
-            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else {  /* Single char fold */
-            unsigned int k;
-            U32 first_fold;
-            const U32 * remaining_folds;
-            Size_t folds_count;
-
-            /* It matches itself */
-            invlist = add_cp_to_invlist(invlist, fc);
-
-            /* ... plus all the things that fold to it, which are found in
-             * PL_utf8_foldclosures */
-            folds_count = _inverse_folds(fc, &first_fold,
-                                                &remaining_folds);
-            for (k = 0; k < folds_count; k++) {
-                UV c = (k == 0) ? first_fold : remaining_folds[k-1];
-
-                /* /aa doesn't allow folds between ASCII and non- */
-                if (   inRANGE(OP(node), EXACTFAA, EXACTFAA_NO_TRIE)
-                    && isASCII(c) != isASCII(fc))
-                {
-                    continue;
-                }
-
-                invlist = add_cp_to_invlist(invlist, c);
-            }
-
-            if (OP(node) == EXACTFL) {
-
-                /* If either [iI] are present in an EXACTFL node the above code
-                 * should have added its normal case pair, but under a Turkish
-                 * locale they could match instead the case pairs from it.  Add
-                 * those as potential matches as well */
-                if (isALPHA_FOLD_EQ(fc, 'I')) {
-                    invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-                    invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-                }
-                else if (fc == LATIN_SMALL_LETTER_DOTLESS_I) {
-                    invlist = add_cp_to_invlist(invlist, 'I');
-                }
-                else if (fc == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
-                    invlist = add_cp_to_invlist(invlist, 'i');
-                }
-            }
-        }
-    }
-
-    return invlist;
-}
-
-#undef HEADER_LENGTH
-#undef TO_INTERNAL_SIZE
-#undef FROM_INTERNAL_SIZE
-#undef INVLIST_VERSION_ID
-
-/* End of inversion list object */
 
 STATIC void
 S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
@@ -11451,12 +2669,6 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  * is a trifle forced, but the need to tie the tails of the branches to what
  * follows makes it hard to avoid.
  */
-#define REGTAIL(x,y,z) regtail((x),(y),(z),depth+1)
-#ifdef DEBUGGING
-#define REGTAIL_STUDY(x,y,z) regtail_study((x),(y),(z),depth+1)
-#else
-#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
-#endif
 
 STATIC regnode_offset
 S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
@@ -11484,7 +2696,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     }
 
     if (sv_dat) {
-        num = add_data( pRExC_state, STR_WITH_LEN("S"));
+        num = reg_add_data( pRExC_state, STR_WITH_LEN("S"));
         RExC_rxi->data->data[num]=(void*)sv_dat;
         SvREFCNT_inc_simple_void_NN(sv_dat);
     }
@@ -11708,7 +2920,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
         vFAIL("Too many nested open parens");
     }
 
-    *flagp = 0;                         /* Initialize. */
+    *flagp = 0;				/* Initialize. */
 
     /* Having this true makes it feasible to have a lot fewer tests for the
      * parse pointer being in scope.  For example, we can write
@@ -12034,7 +3246,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RExC_seen |= REG_VERBARG_SEEN;
             if (start_arg) {
                 SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
-                ARG(REGNODE_p(ret)) = add_data( pRExC_state,
+                ARG(REGNODE_p(ret)) = reg_add_data( pRExC_state,
                                         STR_WITH_LEN("S"));
                 RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
                 FLAGS(REGNODE_p(ret)) = 1;
@@ -12066,10 +3278,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             if (RExC_parse > RExC_end) {
                 paren = '\0';
             }
-            ret = 0;                    /* For look-ahead/behind. */
+            ret = 0;			/* For look-ahead/behind. */
             switch (paren) {
 
-            case 'P':   /* (?P...) variants for those used to PCRE/Python */
+            case 'P':	/* (?P...) variants for those used to PCRE/Python */
                 paren = *RExC_parse;
                 if ( paren == '<') {    /* (?P<...>) named capture */
                     RExC_parse_inc_by(1);
@@ -12412,13 +3624,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 RExC_parse_set(RExC_start + cb->end);
                 o = cb->block;
                 if (cb->src_regex) {
-                    n = add_data(pRExC_state, STR_WITH_LEN("rl"));
+                    n = reg_add_data(pRExC_state, STR_WITH_LEN("rl"));
                     RExC_rxi->data->data[n] =
                         (void*)SvREFCNT_inc((SV*)cb->src_regex);
                     RExC_rxi->data->data[n+1] = (void*)o;
                 }
                 else {
-                    n = add_data(pRExC_state,
+                    n = reg_add_data(pRExC_state,
                             (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
                     RExC_rxi->data->data[n] = (void*)o;
                 }
@@ -12511,7 +3723,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     }
                     RExC_parse_inc_by(1);
                     if (sv_dat) {
-                        num = add_data( pRExC_state, STR_WITH_LEN("S"));
+                        num = reg_add_data( pRExC_state, STR_WITH_LEN("S"));
                         RExC_rxi->data->data[num]=(void*)sv_dat;
                         SvREFCNT_inc_simple_void_NN(sv_dat);
                     }
@@ -12783,12 +3995,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     else if (paren == ':') {
         *flagp |= flags&SIMPLE;
     }
-    if (is_open) {                              /* Starts with OPEN. */
+    if (is_open) {				/* Starts with OPEN. */
         if (! REGTAIL(pRExC_state, ret, br)) {  /* OPEN -> first. */
             REQUIRE_BRANCHJ(flagp, 0);
         }
     }
-    else if (paren != '?')              /* Not Conditional */
+    else if (paren != '?')		/* Not Conditional */
         ret = br;
     *flagp |= flags & (HASWIDTH | POSTPONED);
     lastbr = br;
@@ -13005,7 +4217,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             vFAIL("Unmatched )");
         }
         else
-            FAIL("Junk on end of regexp");      /* "Can't happen". */
+            FAIL("Junk on end of regexp");	/* "Can't happen". */
         NOT_REACHED; /* NOTREACHED */
     }
 
@@ -13052,7 +4264,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         }
     }
 
-    *flagp = 0;                 /* Initialize. */
+    *flagp = 0;			/* Initialize. */
 
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                             FALSE /* Don't force to /x */ );
@@ -13082,7 +4294,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         chain = latest;
         c++;
     }
-    if (chain == 0) {   /* Loop ran zero times. */
+    if (chain == 0) {	/* Loop ran zero times. */
         chain = reg_node(pRExC_state, NOTHING);
         if (ret == 0)
             ret = chain;
@@ -14004,14 +5216,6 @@ S_backref_value(char *p, char *e)
     return I32_MAX;
 }
 
-#ifdef DEBUGGING
-#define REGNODE_GUTS(state,op,extra_size) \
-    regnode_guts_debug(state,op,extra_size)
-#else
-#define REGNODE_GUTS(state,op,extra_size) \
-    regnode_guts(state,extra_size)
-#endif
-
 
 /*
  - regatom - the lowest level
@@ -14093,7 +5297,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
     DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
-    *flagp = 0;         /* Initialize. */
+    *flagp = 0;		/* Initialize. */
 
     DEBUG_PARSE("atom");
 
@@ -14249,7 +5453,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else {
                 ret = reg_node(pRExC_state, SEOL);
             }
-            RExC_seen_zerolen++;                /* Do not optimize RE away */
+            RExC_seen_zerolen++;		/* Do not optimize RE away */
             goto finish_meta_pat;
         case 'z':
             if (RExC_pm_flags & PMf_WILDCARD) {
@@ -14259,7 +5463,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else {
                 ret = reg_node(pRExC_state, EOS);
             }
-            RExC_seen_zerolen++;                /* Do not optimize RE away */
+            RExC_seen_zerolen++;		/* Do not optimize RE away */
             goto finish_meta_pat;
         case 'C':
             vFAIL("\\C no longer supported");
@@ -16046,8 +7250,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 }
 
 
-STATIC void
-S_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
+void
+Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
 {
     /* Uses the inversion list '*invlist_ptr' to populate the ANYOF 'node'.  It
      * sets up the bitmap and any flags, removing those code points from the
@@ -16962,7 +8166,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
     /* Handle the (?[...]) construct to do set operations */
 
     U8 curchar;                     /* Current character being parsed */
-    UV start, end;                  /* End points of code point ranges */
+    UV start, end;	            /* End points of code point ranges */
     SV* final = NULL;               /* The end result inversion list */
     SV* result_string;              /* 'final' stringified */
     AV* stack;                      /* stack of operators and operands not yet
@@ -17698,8 +8902,8 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
 #undef IS_OPERATOR
 #undef IS_OPERAND
 
-STATIC void
-S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
+void
+Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
 {
     /* This adds the Latin1/above-Latin1 folding rules.
      *
@@ -18073,7 +9277,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     assert(RExC_parse <= RExC_end);
 
-    if (UCHARAT(RExC_parse) == '^') {   /* Complement the class */
+    if (UCHARAT(RExC_parse) == '^') {	/* Complement the class */
         RExC_parse_inc_by(1);
         invert = TRUE;
         allow_mutiple_chars = FALSE;
@@ -18236,16 +9440,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 U32 packed_warn;
                 U8 grok_c_char;
 
-            case 'w':   namedclass = ANYOF_WORDCHAR;    break;
-            case 'W':   namedclass = ANYOF_NWORDCHAR;   break;
-            case 's':   namedclass = ANYOF_SPACE;       break;
-            case 'S':   namedclass = ANYOF_NSPACE;      break;
-            case 'd':   namedclass = ANYOF_DIGIT;       break;
-            case 'D':   namedclass = ANYOF_NDIGIT;      break;
-            case 'v':   namedclass = ANYOF_VERTWS;      break;
-            case 'V':   namedclass = ANYOF_NVERTWS;     break;
-            case 'h':   namedclass = ANYOF_HORIZWS;     break;
-            case 'H':   namedclass = ANYOF_NHORIZWS;    break;
+            case 'w':	namedclass = ANYOF_WORDCHAR;	break;
+            case 'W':	namedclass = ANYOF_NWORDCHAR;	break;
+            case 's':	namedclass = ANYOF_SPACE;	break;
+            case 'S':	namedclass = ANYOF_NSPACE;	break;
+            case 'd':	namedclass = ANYOF_DIGIT;	break;
+            case 'D':	namedclass = ANYOF_NDIGIT;	break;
+            case 'v':	namedclass = ANYOF_VERTWS;	break;
+            case 'V':	namedclass = ANYOF_NVERTWS;	break;
+            case 'h':	namedclass = ANYOF_HORIZWS;	break;
+            case 'H':	namedclass = ANYOF_NHORIZWS;	break;
             case 'N':  /* Handle \N{NAME} in class */
                 {
                     const char * const backslash_N_beg = RExC_parse - 2;
@@ -18544,15 +9748,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                 named */
                 }
                 break;
-            case 'n':   value = '\n';                   break;
-            case 'r':   value = '\r';                   break;
-            case 't':   value = '\t';                   break;
-            case 'f':   value = '\f';                   break;
-            case 'b':   value = '\b';                   break;
-            case 'e':   value = ESC_NATIVE;             break;
-            case 'a':   value = '\a';                   break;
+            case 'n':	value = '\n';			break;
+            case 'r':	value = '\r';			break;
+            case 't':	value = '\t';			break;
+            case 'f':	value = '\f';			break;
+            case 'b':	value = '\b';			break;
+            case 'e':	value = ESC_NATIVE;             break;
+            case 'a':	value = '\a';                   break;
             case 'o':
-                RExC_parse--;   /* function expects to be pointed at the 'o' */
+                RExC_parse--;	/* function expects to be pointed at the 'o' */
                 if (! grok_bslash_o(&RExC_parse,
                                             RExC_end,
                                             &value,
@@ -18574,7 +9778,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
                 break;
             case 'x':
-                RExC_parse--;   /* function expects to be pointed at the 'x' */
+                RExC_parse--;	/* function expects to be pointed at the 'x' */
                 if (!  grok_bslash_x(&RExC_parse,
                                             RExC_end,
                                             &value,
@@ -18886,8 +10090,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         cp_list = add_cp_to_invlist(cp_list, '-');
                         element_count++;
                     } else
-                        range = 1;      /* yeah, it's a range! */
-                    continue;   /* but do it the next time */
+                        range = 1;	/* yeah, it's a range! */
+                    continue;	/* but do it the next time */
                 }
             }
         }
@@ -19277,7 +10481,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * ones already on the list */
     if (cp_foldable_list) {
         if (FOLD) {
-            UV start, end;      /* End points of code point ranges */
+            UV start, end;	/* End points of code point ranges */
 
             SV* fold_intersection = NULL;
             SV** use_list;
@@ -20711,8 +11915,8 @@ S_optimize_regclass(pTHX_
 
 #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
 
-STATIC void
-S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
+void
+Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
                 regnode* const node,
                 SV* const cp_list,
                 SV* const runtime_defns,
@@ -20724,7 +11928,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
      *      1) ANYOF_MATCHES_NONE_OUTSIDE_BITMAP_VALUE
      *      2) ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE
      *
-     * Otherwise, it sets the argument to the count returned by add_data(),
+     * Otherwise, it sets the argument to the count returned by reg_add_data(),
      * having allocated and stored an array, av, as follows:
      *  av[0] stores the inversion list defining this class as far as known at
      *        this time, or PL_sv_undef if nothing definite is now known.
@@ -20863,7 +12067,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
     }
 
     rv = newRV_noinc(MUTABLE_SV(av));
-    n = add_data(pRExC_state, STR_WITH_LEN("s"));
+    n = reg_add_data(pRExC_state, STR_WITH_LEN("s"));
     RExC_rxi->data->data[n] = (void*)rv;
     ARG_SET(node, n);
 }
@@ -21437,7 +12641,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
         StructCopy(--src, --dst, regnode);
     }
 
-    place = REGNODE_p(operand); /* Op node, where operand used to be. */
+    place = REGNODE_p(operand);	/* Op node, where operand used to be. */
     src = place + 1; /* NOT REGNODE_AFTER! */
     FLAGS(place) = 0;
     FILL_NODE(operand, op);
@@ -21548,7 +12752,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         regnode * const temp = regnext(REGNODE_p(scan));
 #ifdef EXPERIMENTAL_INPLACESCAN
         if (REGNODE_TYPE(OP(REGNODE_p(scan))) == EXACT) {
-            bool unfolded_multi_char;   /* Unexamined in this routine */
+            bool unfolded_multi_char;	/* Unexamined in this routine */
             if (join_exact(pRExC_state, scan, &min,
                            &unfolded_multi_char, 1, REGNODE_p(val), depth+1))
                 return TRUE; /* Was return EXACT */
@@ -21606,8 +12810,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 }
 #endif
 
-STATIC SV*
-S_get_ANYOFM_contents(pTHX_ const regnode * n) {
+SV*
+Perl_get_ANYOFM_contents(pTHX_ const regnode * n) {
 
     /* Returns an inversion list of all the code points matched by the
      * ANYOFM/NANYOFM node 'n' */
@@ -21639,8 +12843,8 @@ S_get_ANYOFM_contents(pTHX_ const regnode * n) {
     return cp_list;
 }
 
-STATIC SV *
-S_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
+SV *
+Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
     PERL_ARGS_ASSERT_GET_ANYOFHBBM_CONTENTS;
 
     SV * cp_list = NULL;
@@ -21655,716 +12859,11 @@ S_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
     return cp_list;
 }
 
-/*
- - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
- */
-#ifdef DEBUGGING
-
-static void
-S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
-{
-    int bit;
-    int set=0;
-
-    ASSUME(REG_INTFLAGS_NAME_SIZE <= sizeof(flags)*8);
-
-    for (bit=0; bit<REG_INTFLAGS_NAME_SIZE; bit++) {
-        if (flags & (1<<bit)) {
-            if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s", lead);
-            Perl_re_printf( aTHX_  "%s ", PL_reg_intflags_name[bit]);
-        }
-    }
-    if (lead)  {
-        if (set)
-            Perl_re_printf( aTHX_  "\n");
-        else
-            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
-    }
-}
-
-static void
-S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
-{
-    int bit;
-    int set=0;
-    regex_charset cs;
-
-    ASSUME(REG_EXTFLAGS_NAME_SIZE <= sizeof(flags)*8);
-
-    for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) {
-        if (flags & (1U<<bit)) {
-            if ((1U<<bit) & RXf_PMf_CHARSET) {  /* Output separately, below */
-                continue;
-            }
-            if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s", lead);
-            Perl_re_printf( aTHX_  "%s ", PL_reg_extflags_name[bit]);
-        }
-    }
-    if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
-            if (!set++ && lead) {
-                Perl_re_printf( aTHX_  "%s", lead);
-            }
-            switch (cs) {
-                case REGEX_UNICODE_CHARSET:
-                    Perl_re_printf( aTHX_  "UNICODE");
-                    break;
-                case REGEX_LOCALE_CHARSET:
-                    Perl_re_printf( aTHX_  "LOCALE");
-                    break;
-                case REGEX_ASCII_RESTRICTED_CHARSET:
-                    Perl_re_printf( aTHX_  "ASCII-RESTRICTED");
-                    break;
-                case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                    Perl_re_printf( aTHX_  "ASCII-MORE_RESTRICTED");
-                    break;
-                default:
-                    Perl_re_printf( aTHX_  "UNKNOWN CHARACTER SET");
-                    break;
-            }
-    }
-    if (lead)  {
-        if (set)
-            Perl_re_printf( aTHX_  "\n");
-        else
-            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
-    }
-}
-#endif
-
-void
-Perl_regdump(pTHX_ const regexp *r)
-{
-#ifdef DEBUGGING
-    int i;
-    SV * const sv = sv_newmortal();
-    SV *dsv= sv_newmortal();
-    RXi_GET_DECL(r, ri);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REGDUMP;
-
-    (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
-
-    /* Header fields of interest. */
-    for (i = 0; i < 2; i++) {
-        if (r->substrs->data[i].substr) {
-            RE_PV_QUOTED_DECL(s, 0, dsv,
-                            SvPVX_const(r->substrs->data[i].substr),
-                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
-                            PL_dump_re_max_len);
-            Perl_re_printf( aTHX_
-                          "%s %s%s at %" IVdf "..%" UVuf " ",
-                          i ? "floating" : "anchored",
-                          s,
-                          RE_SV_TAIL(r->substrs->data[i].substr),
-                          (IV)r->substrs->data[i].min_offset,
-                          (UV)r->substrs->data[i].max_offset);
-        }
-        else if (r->substrs->data[i].utf8_substr) {
-            RE_PV_QUOTED_DECL(s, 1, dsv,
-                            SvPVX_const(r->substrs->data[i].utf8_substr),
-                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
-                            30);
-            Perl_re_printf( aTHX_
-                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
-                          i ? "floating" : "anchored",
-                          s,
-                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
-                          (IV)r->substrs->data[i].min_offset,
-                          (UV)r->substrs->data[i].max_offset);
-        }
-    }
-
-    if (r->check_substr || r->check_utf8)
-        Perl_re_printf( aTHX_
-                      (const char *)
-                      (   r->check_substr == r->substrs->data[1].substr
-                       && r->check_utf8   == r->substrs->data[1].utf8_substr
-                       ? "(checking floating" : "(checking anchored"));
-    if (r->intflags & PREGf_NOSCAN)
-        Perl_re_printf( aTHX_  " noscan");
-    if (r->extflags & RXf_CHECK_ALL)
-        Perl_re_printf( aTHX_  " isall");
-    if (r->check_substr || r->check_utf8)
-        Perl_re_printf( aTHX_  ") ");
-
-    if (ri->regstclass) {
-        regprop(r, sv, ri->regstclass, NULL, NULL);
-        Perl_re_printf( aTHX_  "stclass %s ", SvPVX_const(sv));
-    }
-    if (r->intflags & PREGf_ANCH) {
-        Perl_re_printf( aTHX_  "anchored");
-        if (r->intflags & PREGf_ANCH_MBOL)
-            Perl_re_printf( aTHX_  "(MBOL)");
-        if (r->intflags & PREGf_ANCH_SBOL)
-            Perl_re_printf( aTHX_  "(SBOL)");
-        if (r->intflags & PREGf_ANCH_GPOS)
-            Perl_re_printf( aTHX_  "(GPOS)");
-        Perl_re_printf( aTHX_ " ");
-    }
-    if (r->intflags & PREGf_GPOS_SEEN)
-        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
-    if (r->intflags & PREGf_SKIP)
-        Perl_re_printf( aTHX_  "plus ");
-    if (r->intflags & PREGf_IMPLICIT)
-        Perl_re_printf( aTHX_  "implicit ");
-    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
-    if (r->extflags & RXf_EVAL_SEEN)
-        Perl_re_printf( aTHX_  "with eval ");
-    Perl_re_printf( aTHX_  "\n");
-    DEBUG_FLAGS_r({
-        regdump_extflags("r->extflags: ", r->extflags);
-        regdump_intflags("r->intflags: ", r->intflags);
-    });
-#else
-    PERL_ARGS_ASSERT_REGDUMP;
-    PERL_UNUSED_CONTEXT;
-    PERL_UNUSED_ARG(r);
-#endif  /* DEBUGGING */
-}
-
-/* Should be synchronized with ANYOF_ #defines in regcomp.h */
-#ifdef DEBUGGING
-
-#  if   CC_WORDCHAR_ != 0 || CC_DIGIT_ != 1        || CC_ALPHA_ != 2    \
-     || CC_LOWER_ != 3    || CC_UPPER_ != 4        || CC_PUNCT_ != 5    \
-     || CC_PRINT_ != 6    || CC_ALPHANUMERIC_ != 7 || CC_GRAPH_ != 8    \
-     || CC_CASED_ != 9    || CC_SPACE_ != 10       || CC_BLANK_ != 11   \
-     || CC_XDIGIT_ != 12  || CC_CNTRL_ != 13       || CC_ASCII_ != 14   \
-     || CC_VERTSPACE_ != 15
-#   error Need to adjust order of anyofs[]
-#  endif
-static const char * const anyofs[] = {
-    "\\w",
-    "\\W",
-    "\\d",
-    "\\D",
-    "[:alpha:]",
-    "[:^alpha:]",
-    "[:lower:]",
-    "[:^lower:]",
-    "[:upper:]",
-    "[:^upper:]",
-    "[:punct:]",
-    "[:^punct:]",
-    "[:print:]",
-    "[:^print:]",
-    "[:alnum:]",
-    "[:^alnum:]",
-    "[:graph:]",
-    "[:^graph:]",
-    "[:cased:]",
-    "[:^cased:]",
-    "\\s",
-    "\\S",
-    "[:blank:]",
-    "[:^blank:]",
-    "[:xdigit:]",
-    "[:^xdigit:]",
-    "[:cntrl:]",
-    "[:^cntrl:]",
-    "[:ascii:]",
-    "[:^ascii:]",
-    "\\v",
-    "\\V"
-};
-#endif
-
-/*
-- regprop - printable representation of opcode, with run time support
-*/
-
-void
-Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
-{
-#ifdef DEBUGGING
-    U8 k;
-    const U8 op = OP(o);
-    RXi_GET_DECL(prog, progi);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REGPROP;
-
-    SvPVCLEAR(sv);
-
-    if (op > REGNODE_MAX) {          /* regnode.type is unsigned */
-        if (pRExC_state) {  /* This gives more info, if we have it */
-            FAIL3("panic: corrupted regexp opcode %d > %d",
-                  (int)op, (int)REGNODE_MAX);
-        }
-        else {
-            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
-                             (int)op, (int)REGNODE_MAX);
-        }
-    }
-    sv_catpv(sv, REGNODE_NAME(op)); /* Take off const! */
-
-    k = REGNODE_TYPE(op);
-
-    if (k == EXACT) {
-        sv_catpvs(sv, " ");
-        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
-         * is a crude hack but it may be the best for now since
-         * we have no flag "this EXACTish node was UTF-8"
-         * --jhi */
-        pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
-                  PL_colors[0], PL_colors[1],
-                  PERL_PV_ESCAPE_UNI_DETECT |
-                  PERL_PV_ESCAPE_NONASCII   |
-                  PERL_PV_PRETTY_ELLIPSES   |
-                  PERL_PV_PRETTY_LTGT       |
-                  PERL_PV_PRETTY_NOCLEAR
-                  );
-    } else if (k == TRIE) {
-        /* print the details of the trie in dumpuntil instead, as
-         * progi->data isn't available here */
-        const U32 n = ARG(o);
-        const reg_ac_data * const ac = IS_TRIE_AC(op) ?
-               (reg_ac_data *)progi->data->data[n] :
-               NULL;
-        const reg_trie_data * const trie
-            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
-
-        Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags));
-        DEBUG_TRIE_COMPILE_r({
-          if (trie->jump)
-            sv_catpvs(sv, "(JUMP)");
-          Perl_sv_catpvf(aTHX_ sv,
-            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
-            (UV)trie->startstate,
-            (IV)trie->statecount-1, /* -1 because of the unused 0 element */
-            (UV)trie->wordcount,
-            (UV)trie->minlen,
-            (UV)trie->maxlen,
-            (UV)TRIE_CHARCOUNT(trie),
-            (UV)trie->uniquecharcount
-          );
-        });
-        if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
-            sv_catpvs(sv, "[");
-            (void) put_charclass_bitmap_innards(sv,
-                                                ((IS_ANYOF_TRIE(op))
-                                                 ? ANYOF_BITMAP(o)
-                                                 : TRIE_BITMAP(trie)),
-                                                NULL,
-                                                NULL,
-                                                NULL,
-                                                0,
-                                                FALSE
-                                               );
-            sv_catpvs(sv, "]");
-        }
-    } else if (k == CURLY) {
-        U32 lo = ARG1(o), hi = ARG2(o);
-        if (op == CURLYM || op == CURLYN || op == CURLYX)
-            Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
-        Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
-        if (hi == REG_INFTY)
-            sv_catpvs(sv, "INFTY");
-        else
-            Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi);
-        sv_catpvs(sv, "}");
-    }
-    else if (k == WHILEM && o->flags)                   /* Ordinal/of */
-        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
-    else if (k == REF || k == OPEN || k == CLOSE
-             || k == GROUPP || op == ACCEPT)
-    {
-        AV *name_list= NULL;
-        U32 parno= (op == ACCEPT)              ? (U32)ARG2L(o) :
-                   (op == OPEN || op == CLOSE) ? (U32)PARNO(o) :
-                                                 (U32)ARG(o);
-        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
-        if ( RXp_PAREN_NAMES(prog) ) {
-            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
-        } else if ( pRExC_state ) {
-            name_list= RExC_paren_name_list;
-        }
-        if ( name_list ) {
-            if ( k != REF || (op < REFN)) {
-                SV **name= av_fetch_simple(name_list, parno, 0 );
-                if (name)
-                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-            }
-            else
-            if (parno > 0) {
-                /* parno must always be larger than 0 for this block
-                 * as it represents a slot into the data array, which
-                 * has the 0 slot reserved for a placeholder so any valid
-                 * index into it is always true, eg non-zero
-                 * see the '%' "what" type and the implementation of
-                 * S_add_data()
-                 */
-                SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
-                I32 *nums=(I32*)SvPVX(sv_dat);
-                SV **name= av_fetch_simple(name_list, nums[0], 0 );
-                I32 n;
-                if (name) {
-                    for ( n=0; n<SvIVX(sv_dat); n++ ) {
-                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
-                                    (n ? "," : ""), (IV)nums[n]);
-                    }
-                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-                }
-            }
-        }
-        if ( k == REF && reginfo) {
-            U32 n = ARG(o);  /* which paren pair */
-            I32 ln = prog->offs[n].start;
-            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
-                Perl_sv_catpvf(aTHX_ sv, ": FAIL");
-            else if (ln == prog->offs[n].end)
-                Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
-            else {
-                const char *s = reginfo->strbeg + ln;
-                Perl_sv_catpvf(aTHX_ sv, ": ");
-                Perl_pv_pretty( aTHX_ sv, s, prog->offs[n].end - prog->offs[n].start, 32, 0, 0,
-                    PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
-            }
-        }
-    } else if (k == GOSUB) {
-        AV *name_list= NULL;
-        if ( RXp_PAREN_NAMES(prog) ) {
-            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
-        } else if ( pRExC_state ) {
-            name_list= RExC_paren_name_list;
-        }
-
-        /* Paren and offset */
-        Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
-                (int)((o + (int)ARG2L(o)) - progi->program) );
-        if (name_list) {
-            SV **name= av_fetch_simple(name_list, ARG(o), 0 );
-            if (name)
-                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-        }
-    }
-    else if (k == LOGICAL)
-        /* 2: embedded, otherwise 1 */
-        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
-    else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
-        U8 flags;
-        char * bitmap;
-        U8 do_sep = 0;    /* Do we need to separate various components of the
-                             output? */
-        /* Set if there is still an unresolved user-defined property */
-        SV *unresolved                = NULL;
-
-        /* Things that are ignored except when the runtime locale is UTF-8 */
-        SV *only_utf8_locale_invlist = NULL;
-
-        /* Code points that don't fit in the bitmap */
-        SV *nonbitmap_invlist = NULL;
-
-        /* And things that aren't in the bitmap, but are small enough to be */
-        SV* bitmap_range_not_in_bitmap = NULL;
-
-        bool inverted;
-
-        if (k != ANYOF) {
-            flags = 0;
-            bitmap = NULL;
-        }
-        else {
-            flags = ANYOF_FLAGS(o);
-            bitmap = ANYOF_BITMAP(o);
-        }
-
-        if (op == ANYOFL || op == ANYOFPOSIXL) {
-            if ((flags & ANYOFL_UTF8_LOCALE_REQD)) {
-                sv_catpvs(sv, "{utf8-locale-reqd}");
-            }
-            if (flags & ANYOFL_FOLD) {
-                sv_catpvs(sv, "{i}");
-            }
-        }
-
-        inverted = flags & ANYOF_INVERT;
-
-        /* If there is stuff outside the bitmap, get it */
-        if (k == ANYOFR) {
-
-            /* For a single range, split into the parts inside vs outside the
-             * bitmap. */
-            UV start = ANYOFRbase(o);
-            UV end   = ANYOFRbase(o) + ANYOFRdelta(o);
-
-            if (start < NUM_ANYOF_CODE_POINTS) {
-                if (end < NUM_ANYOF_CODE_POINTS) {
-                    bitmap_range_not_in_bitmap
-                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
-                                                  start, end);
-                }
-                else {
-                    bitmap_range_not_in_bitmap
-                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
-                                                  start, NUM_ANYOF_CODE_POINTS);
-                    start = NUM_ANYOF_CODE_POINTS;
-                }
-            }
-
-            if (start >= NUM_ANYOF_CODE_POINTS) {
-                nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
-                                                ANYOFRbase(o),
-                                                ANYOFRbase(o) + ANYOFRdelta(o));
-            }
-        }
-        else if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(o)) {
-            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
-                                                      NUM_ANYOF_CODE_POINTS,
-                                                      UV_MAX);
-        }
-        else if (ANYOF_HAS_AUX(o)) {
-                (void) GET_REGCLASS_AUX_DATA(prog, o, FALSE,
-                                                &unresolved,
-                                                &only_utf8_locale_invlist,
-                                                &nonbitmap_invlist);
-
-            /* The aux data may contain stuff that could fit in the bitmap.
-             * This could come from a user-defined property being finally
-             * resolved when this call was done; or much more likely because
-             * there are matches that require UTF-8 to be valid, and so aren't
-             * in the bitmap (or ANYOFR).  This is teased apart later */
-            _invlist_intersection(nonbitmap_invlist,
-                                  PL_InBitmap,
-                                  &bitmap_range_not_in_bitmap);
-            /* Leave just the things that don't fit into the bitmap */
-            _invlist_subtract(nonbitmap_invlist,
-                              PL_InBitmap,
-                              &nonbitmap_invlist);
-        }
-
-        /* Ready to start outputting.  First, the initial left bracket */
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-
-        if (   bitmap
-            || bitmap_range_not_in_bitmap
-            || only_utf8_locale_invlist
-            || unresolved)
-        {
-            /* Then all the things that could fit in the bitmap */
-            do_sep = put_charclass_bitmap_innards(
-                                    sv,
-                                    bitmap,
-                                    bitmap_range_not_in_bitmap,
-                                    only_utf8_locale_invlist,
-                                    o,
-                                    flags,
-
-                                    /* Can't try inverting for a
-                                                   * better display if there
-                                                   * are things that haven't
-                                                   * been resolved */
-                                    (unresolved != NULL || k == ANYOFR));
-            SvREFCNT_dec(bitmap_range_not_in_bitmap);
-
-            /* If there are user-defined properties which haven't been defined
-             * yet, output them.  If the result is not to be inverted, it is
-             * clearest to output them in a separate [] from the bitmap range
-             * stuff.  If the result is to be complemented, we have to show
-             * everything in one [], as the inversion applies to the whole
-             * thing.  Use {braces} to separate them from anything in the
-             * bitmap and anything above the bitmap. */
-            if (unresolved) {
-                if (inverted) {
-                    if (! do_sep) { /* If didn't output anything in the bitmap
-                                     */
-                        sv_catpvs(sv, "^");
-                    }
-                    sv_catpvs(sv, "{");
-                }
-                else if (do_sep) {
-                    Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1],
-                                                      PL_colors[0]);
-                }
-                sv_catsv(sv, unresolved);
-                if (inverted) {
-                    sv_catpvs(sv, "}");
-                }
-                do_sep = ! inverted;
-            }
-            else if (     do_sep == 2
-                     && ! nonbitmap_invlist
-                     &&   ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(o))
-            {
-                /* Here, the display shows the class as inverted, and
-                 * everything above the lower display should also match, but
-                 * there is no indication of that.  Add this range so the code
-                 * below will add it to the display */
-                _invlist_union_complement_2nd(nonbitmap_invlist,
-                                              PL_InBitmap,
-                                              &nonbitmap_invlist);
-            }
-        }
-
-        /* And, finally, add the above-the-bitmap stuff */
-        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
-            SV* contents;
-
-            /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len > 256)
-                                    ? PL_dump_re_max_len
-                                    : 256;
-
-            /* This is output in a separate [] */
-            if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
-            }
-
-            /* And, for easy of understanding, it is shown in the
-             * uncomplemented form if possible.  The one exception being if
-             * there are unresolved items, where the inversion has to be
-             * delayed until runtime */
-            if (inverted && ! unresolved) {
-                _invlist_invert(nonbitmap_invlist);
-                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
-            }
-
-            contents = invlist_contents(nonbitmap_invlist,
-                                        FALSE /* output suitable for catsv */
-                                       );
-
-            /* If the output is shorter than the permissible maximum, just do it. */
-            if (SvCUR(contents) <= dump_len) {
-                sv_catsv(sv, contents);
-            }
-            else {
-                const char * contents_string = SvPVX(contents);
-                STRLEN i = dump_len;
-
-                /* Otherwise, start at the permissible max and work back to the
-                 * first break possibility */
-                while (i > 0 && contents_string[i] != ' ') {
-                    i--;
-                }
-                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
-                                       find a legal break */
-                    i = dump_len;
-                }
-
-                sv_catpvn(sv, contents_string, i);
-                sv_catpvs(sv, "...");
-            }
-
-            SvREFCNT_dec_NN(contents);
-            SvREFCNT_dec_NN(nonbitmap_invlist);
-        }
-
-        /* And finally the matching, closing ']' */
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        if (op == ANYOFHs) {
-            Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1));
-        }
-        else if (REGNODE_TYPE(op) != ANYOF) {
-            U8 lowest = (op != ANYOFHr)
-                         ? FLAGS(o)
-                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
-            U8 highest = (op == ANYOFHr)
-                         ? HIGHEST_ANYOF_HRx_BYTE(FLAGS(o))
-                         : (op == ANYOFH || op == ANYOFR)
-                           ? 0xFF
-                           : lowest;
-#ifndef EBCDIC
-            if (op != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
-#endif
-            {
-                Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
-                if (lowest != highest) {
-                    Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
-                }
-                Perl_sv_catpvf(aTHX_ sv, ")");
-            }
-        }
-
-        SvREFCNT_dec(unresolved);
-    }
-    else if (k == ANYOFM) {
-        SV * cp_list = get_ANYOFM_contents(o);
-
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-        if (op == NANYOFM) {
-            _invlist_invert(cp_list);
-        }
-
-        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE);
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        SvREFCNT_dec(cp_list);
-    }
-    else if (k == ANYOFHbbm) {
-        SV * cp_list = get_ANYOFHbbm_contents(o);
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-
-        sv_catsv(sv, invlist_contents(cp_list,
-                                      FALSE /* output suitable for catsv */
-                                     ));
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        SvREFCNT_dec(cp_list);
-    }
-    else if (k == POSIXD || k == NPOSIXD) {
-        U8 index = FLAGS(o) * 2;
-        if (index < C_ARRAY_LENGTH(anyofs)) {
-            if (*anyofs[index] != '[')  {
-                sv_catpvs(sv, "[");
-            }
-            sv_catpv(sv, anyofs[index]);
-            if (*anyofs[index] != '[')  {
-                sv_catpvs(sv, "]");
-            }
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
-        }
-    }
-    else if (k == BOUND || k == NBOUND) {
-        /* Must be synced with order of 'bound_type' in regcomp.h */
-        const char * const bounds[] = {
-            "",      /* Traditional */
-            "{gcb}",
-            "{lb}",
-            "{sb}",
-            "{wb}"
-        };
-        assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
-        sv_catpv(sv, bounds[FLAGS(o)]);
-    }
-    else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
-        Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
-        if (o->next_off) {
-            Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
-        }
-        Perl_sv_catpvf(aTHX_ sv, "]");
-    }
-    else if (op == SBOL)
-        Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
-
-    /* add on the verb argument if there is one */
-    if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) {
-        if ( ARG(o) )
-            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
-                       SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
-        else
-            sv_catpvs(sv, ":NULL");
-    }
-#else
-    PERL_UNUSED_CONTEXT;
-    PERL_UNUSED_ARG(sv);
-    PERL_UNUSED_ARG(o);
-    PERL_UNUSED_ARG(prog);
-    PERL_UNUSED_ARG(reginfo);
-    PERL_UNUSED_ARG(pRExC_state);
-#endif  /* DEBUGGING */
-}
-
 
 
 SV *
 Perl_re_intuit_string(pTHX_ REGEXP * const r)
-{                               /* Assume that RE_INTUIT is set */
+{				/* Assume that RE_INTUIT is set */
     /* Returns an SV containing a string that must appear in the target for it
      * to match, or NULL if nothing is known that must match.
      *
@@ -22686,7 +13185,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                 }
                 break;
             case '%':
-                /* NO-OP a '%' data contains a null pointer, so that add_data
+                /* NO-OP a '%' data contains a null pointer, so that reg_add_data
                  * always returns non-zero, this should only ever happen in the
                  * 0 index */
                 assert(n==0);
@@ -22703,9 +13202,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     Safefree(ri);
 }
 
-#define av_dup_inc(s, t)        MUTABLE_AV(sv_dup_inc((const SV *)s, t))
-#define hv_dup_inc(s, t)        MUTABLE_HV(sv_dup_inc((const SV *)s, t))
-#define SAVEPVN(p, n)   ((p) ? savepvn(p, n) : NULL)
+#define av_dup_inc(s, t)	MUTABLE_AV(sv_dup_inc((const SV *)s, t))
+#define hv_dup_inc(s, t)	MUTABLE_HV(sv_dup_inc((const SV *)s, t))
+#define SAVEPVN(p, n)	((p) ? savepvn(p, n) : NULL)
 
 /*
 =for apidoc re_dup_guts
@@ -22921,7 +13420,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                 break;
             case '%':
                 /* this is a placeholder type, it exists purely so that
-                 * add_data always returns a non-zero value, this type of
+                 * reg_add_data always returns a non-zero value, this type of
                  * entry should ONLY be present in the 0 slot of the array */
                 assert(i == 0);
                 d->data[i]= ri->data->data[i];
@@ -23014,778 +13513,6 @@ Perl_save_re_context(pTHX)
 }
 #endif
 
-#ifdef DEBUGGING
-
-STATIC void
-S_put_code_point(pTHX_ SV *sv, UV c)
-{
-    PERL_ARGS_ASSERT_PUT_CODE_POINT;
-
-    if (c > 255) {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
-    }
-    else if (isPRINT(c)) {
-        const char string = (char) c;
-
-        /* We use {phrase} as metanotation in the class, so also escape literal
-         * braces */
-        if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
-            sv_catpvs(sv, "\\");
-        sv_catpvn(sv, &string, 1);
-    }
-    else if (isMNEMONIC_CNTRL(c)) {
-        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
-    }
-    else {
-        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
-    }
-}
-
-STATIC void
-S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
-{
-    /* Appends to 'sv' a displayable version of the range of code points from
-     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
-     * that have them, when they occur at the beginning or end of the range.
-     * It uses hex to output the remaining code points, unless 'allow_literals'
-     * is true, in which case the printable ASCII ones are output as-is (though
-     * some of these will be escaped by put_code_point()).
-     *
-     * NOTE:  This is designed only for printing ranges of code points that fit
-     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
-     */
-
-    const unsigned int min_range_count = 3;
-
-    assert(start <= end);
-
-    PERL_ARGS_ASSERT_PUT_RANGE;
-
-    while (start <= end) {
-        UV this_end;
-        const char * format;
-
-        if (    end - start < min_range_count
-            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
-        {
-            /* Output a range of 1 or 2 chars individually, or longer ranges
-             * when printable */
-            for (; start <= end; start++) {
-                put_code_point(sv, start);
-            }
-            break;
-        }
-
-        /* If permitted by the input options, and there is a possibility that
-         * this range contains a printable literal, look to see if there is
-         * one. */
-        if (allow_literals && start <= MAX_PRINT_A) {
-
-            /* If the character at the beginning of the range isn't an ASCII
-             * printable, effectively split the range into two parts:
-             *  1) the portion before the first such printable,
-             *  2) the rest
-             * and output them separately. */
-            if (! isPRINT_A(start)) {
-                UV temp_end = start + 1;
-
-                /* There is no point looking beyond the final possible
-                 * printable, in MAX_PRINT_A */
-                UV max = MIN(end, MAX_PRINT_A);
-
-                while (temp_end <= max && ! isPRINT_A(temp_end)) {
-                    temp_end++;
-                }
-
-                /* Here, temp_end points to one beyond the first printable if
-                 * found, or to one beyond 'max' if not.  If none found, make
-                 * sure that we use the entire range */
-                if (temp_end > MAX_PRINT_A) {
-                    temp_end = end + 1;
-                }
-
-                /* Output the first part of the split range: the part that
-                 * doesn't have printables, with the parameter set to not look
-                 * for literals (otherwise we would infinitely recurse) */
-                put_range(sv, start, temp_end - 1, FALSE);
-
-                /* The 2nd part of the range (if any) starts here. */
-                start = temp_end;
-
-                /* We do a continue, instead of dropping down, because even if
-                 * the 2nd part is non-empty, it could be so short that we want
-                 * to output it as individual characters, as tested for at the
-                 * top of this loop.  */
-                continue;
-            }
-
-            /* Here, 'start' is a printable ASCII.  If it is an alphanumeric,
-             * output a sub-range of just the digits or letters, then process
-             * the remaining portion as usual. */
-            if (isALPHANUMERIC_A(start)) {
-                UV mask = (isDIGIT_A(start))
-                           ? CC_DIGIT_
-                             : isUPPER_A(start)
-                               ? CC_UPPER_
-                               : CC_LOWER_;
-                UV temp_end = start + 1;
-
-                /* Find the end of the sub-range that includes just the
-                 * characters in the same class as the first character in it */
-                while (temp_end <= end && generic_isCC_A_(temp_end, mask)) {
-                    temp_end++;
-                }
-                temp_end--;
-
-                /* For short ranges, don't duplicate the code above to output
-                 * them; just call recursively */
-                if (temp_end - start < min_range_count) {
-                    put_range(sv, start, temp_end, FALSE);
-                }
-                else {  /* Output as a range */
-                    put_code_point(sv, start);
-                    sv_catpvs(sv, "-");
-                    put_code_point(sv, temp_end);
-                }
-                start = temp_end + 1;
-                continue;
-            }
-
-            /* We output any other printables as individual characters */
-            if (isPUNCT_A(start) || isSPACE_A(start)) {
-                while (start <= end && (isPUNCT_A(start)
-                                        || isSPACE_A(start)))
-                {
-                    put_code_point(sv, start);
-                    start++;
-                }
-                continue;
-            }
-        } /* End of looking for literals */
-
-        /* Here is not to output as a literal.  Some control characters have
-         * mnemonic names.  Split off any of those at the beginning and end of
-         * the range to print mnemonically.  It isn't possible for many of
-         * these to be in a row, so this won't overwhelm with output */
-        if (   start <= end
-            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
-        {
-            while (isMNEMONIC_CNTRL(start) && start <= end) {
-                put_code_point(sv, start);
-                start++;
-            }
-
-            /* If this didn't take care of the whole range ... */
-            if (start <= end) {
-
-                /* Look backwards from the end to find the final non-mnemonic
-                 * */
-                UV temp_end = end;
-                while (isMNEMONIC_CNTRL(temp_end)) {
-                    temp_end--;
-                }
-
-                /* And separately output the interior range that doesn't start
-                 * or end with mnemonics */
-                put_range(sv, start, temp_end, FALSE);
-
-                /* Then output the mnemonic trailing controls */
-                start = temp_end + 1;
-                while (start <= end) {
-                    put_code_point(sv, start);
-                    start++;
-                }
-                break;
-            }
-        }
-
-        /* As a final resort, output the range or subrange as hex. */
-
-        if (start >= NUM_ANYOF_CODE_POINTS) {
-            this_end = end;
-        }
-        else {  /* Have to split range at the bitmap boundary */
-            this_end = (end < NUM_ANYOF_CODE_POINTS)
-                        ? end
-                        : NUM_ANYOF_CODE_POINTS - 1;
-        }
-#if NUM_ANYOF_CODE_POINTS > 256
-        format = (this_end < 256)
-                 ? "\\x%02" UVXf "-\\x%02" UVXf
-                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
-#else
-        format = "\\x%02" UVXf "-\\x%02" UVXf;
-#endif
-        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
-        Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE_STMT;
-        break;
-    }
-}
-
-STATIC void
-S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
-{
-    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
-     * 'invlist' */
-
-    UV start, end;
-    bool allow_literals = TRUE;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;
-
-    /* Generally, it is more readable if printable characters are output as
-     * literals, but if a range (nearly) spans all of them, it's best to output
-     * it as a single range.  This code will use a single range if all but 2
-     * ASCII printables are in it */
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-
-        /* If the range starts beyond the final printable, it doesn't have any
-         * in it */
-        if (start > MAX_PRINT_A) {
-            break;
-        }
-
-        /* In both ASCII and EBCDIC, a SPACE is the lowest printable.  To span
-         * all but two, the range must start and end no later than 2 from
-         * either end */
-        if (start < ' ' + 2 && end > MAX_PRINT_A - 2) {
-            if (end > MAX_PRINT_A) {
-                end = MAX_PRINT_A;
-            }
-            if (start < ' ') {
-                start = ' ';
-            }
-            if (end - start >= MAX_PRINT_A - ' ' - 2) {
-                allow_literals = FALSE;
-            }
-            break;
-        }
-    }
-    invlist_iterfinish(invlist);
-
-    /* Here we have figured things out.  Output each range */
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (start >= NUM_ANYOF_CODE_POINTS) {
-            break;
-        }
-        put_range(sv, start, end, allow_literals);
-    }
-    invlist_iterfinish(invlist);
-
-    return;
-}
-
-STATIC SV*
-S_put_charclass_bitmap_innards_common(pTHX_
-        SV* invlist,            /* The bitmap */
-        SV* posixes,            /* Under /l, things like [:word:], \S */
-        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
-        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
-        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
-        const bool invert       /* Is the result to be inverted? */
-)
-{
-    /* Create and return an SV containing a displayable version of the bitmap
-     * and associated information determined by the input parameters.  If the
-     * output would have been only the inversion indicator '^', NULL is instead
-     * returned. */
-
-    SV * output;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
-
-    if (invert) {
-        output = newSVpvs("^");
-    }
-    else {
-        output = newSVpvs("");
-    }
-
-    /* First, the code points in the bitmap that are unconditionally there */
-    put_charclass_bitmap_innards_invlist(output, invlist);
-
-    /* Traditionally, these have been placed after the main code points */
-    if (posixes) {
-        sv_catsv(output, posixes);
-    }
-
-    if (only_utf8 && _invlist_len(only_utf8)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, only_utf8);
-    }
-
-    if (not_utf8 && _invlist_len(not_utf8)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, not_utf8);
-    }
-
-    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);
-
-        /* This is the only list in this routine that can legally contain code
-         * points outside the bitmap range.  The call just above to
-         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
-         * output them here.  There's about a half-dozen possible, and none in
-         * contiguous ranges longer than 2 */
-        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
-            UV start, end;
-            SV* above_bitmap = NULL;
-
-            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);
-
-            invlist_iterinit(above_bitmap);
-            while (invlist_iternext(above_bitmap, &start, &end)) {
-                UV i;
-
-                for (i = start; i <= end; i++) {
-                    put_code_point(output, i);
-                }
-            }
-            invlist_iterfinish(above_bitmap);
-            SvREFCNT_dec_NN(above_bitmap);
-        }
-    }
-
-    if (invert && SvCUR(output) == 1) {
-        return NULL;
-    }
-
-    return output;
-}
-
-STATIC U8
-S_put_charclass_bitmap_innards(pTHX_ SV *sv,
-                                     char *bitmap,
-                                     SV *nonbitmap_invlist,
-                                     SV *only_utf8_locale_invlist,
-                                     const regnode * const node,
-                                     const U8 flags,
-                                     const bool force_as_is_display)
-{
-    /* Appends to 'sv' a displayable version of the innards of the bracketed
-     * character class defined by the other arguments:
-     *  'bitmap' points to the bitmap, or NULL if to ignore that.
-     *  'nonbitmap_invlist' is an inversion list of the code points that are in
-     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
-     *      none.  The reasons for this could be that they require some
-     *      condition such as the target string being or not being in UTF-8
-     *      (under /d), or because they came from a user-defined property that
-     *      was not resolved at the time of the regex compilation (under /u)
-     *  'only_utf8_locale_invlist' is an inversion list of the code points that
-     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
-     *  'node' is the regex pattern ANYOF node.  It is needed only when the
-     *      above two parameters are not null, and is passed so that this
-     *      routine can tease apart the various reasons for them.
-     *  'flags' is the flags field of 'node'
-     *  'force_as_is_display' is TRUE if this routine should definitely NOT try
-     *      to invert things to see if that leads to a cleaner display.  If
-     *      FALSE, this routine is free to use its judgment about doing this.
-     *
-     * It returns 0 if nothing was actually output.  (It may be that
-     *              the bitmap, etc is empty.)
-     *            1 if the output wasn't inverted (didn't begin with a '^')
-     *            2 if the output was inverted (did begin with a '^')
-     *
-     * When called for outputting the bitmap of a non-ANYOF node, just pass the
-     * bitmap, with the succeeding parameters set to NULL, and the final one to
-     * FALSE.
-     */
-
-    /* In general, it tries to display the 'cleanest' representation of the
-     * innards, choosing whether to display them inverted or not, regardless of
-     * whether the class itself is to be inverted.  However,  there are some
-     * cases where it can't try inverting, as what actually matches isn't known
-     * until runtime, and hence the inversion isn't either. */
-
-    bool inverting_allowed = ! force_as_is_display;
-
-    int i;
-    STRLEN orig_sv_cur = SvCUR(sv);
-
-    SV* invlist;            /* Inversion list we accumulate of code points that
-                               are unconditionally matched */
-    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
-                               UTF-8 */
-    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
-                             */
-    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
-    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
-                                       is UTF-8 */
-
-    SV* as_is_display;      /* The output string when we take the inputs
-                               literally */
-    SV* inverted_display;   /* The output string when we invert the inputs */
-
-    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
-                                                   to match? */
-    /* We are biased in favor of displaying things without them being inverted,
-     * as that is generally easier to understand */
-    const int bias = 5;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
-
-    /* Start off with whatever code points are passed in.  (We clone, so we
-     * don't change the caller's list) */
-    if (nonbitmap_invlist) {
-        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
-        invlist = invlist_clone(nonbitmap_invlist, NULL);
-    }
-    else {  /* Worst case size is every other code point is matched */
-        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
-    }
-
-    if (flags) {
-        if (OP(node) == ANYOFD) {
-
-            /* This flag indicates that the code points below 0x100 in the
-             * nonbitmap list are precisely the ones that match only when the
-             * target is UTF-8 (they should all be non-ASCII). */
-            if (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES) {
-                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
-                _invlist_subtract(invlist, only_utf8, &invlist);
-            }
-
-            /* And this flag for matching all non-ASCII 0xFF and below */
-            if (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared) {
-                not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
-            }
-        }
-        else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
-
-            /* If either of these flags are set, what matches isn't
-             * determinable except during execution, so don't know enough here
-             * to invert */
-            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
-                inverting_allowed = FALSE;
-            }
-
-            /* What the posix classes match also varies at runtime, so these
-             * will be output symbolically. */
-            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
-                int i;
-
-                posixes = newSVpvs("");
-                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
-                    if (ANYOF_POSIXL_TEST(node, i)) {
-                        sv_catpv(posixes, anyofs[i]);
-                    }
-                }
-            }
-        }
-    }
-
-    /* Accumulate the bit map into the unconditional match list */
-    if (bitmap) {
-        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-            if (BITMAP_TEST(bitmap, i)) {
-                int start = i++;
-                for (;
-                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
-                     i++)
-                { /* empty */ }
-                invlist = _add_range_to_invlist(invlist, start, i-1);
-            }
-        }
-    }
-
-    /* Make sure that the conditional match lists don't have anything in them
-     * that match unconditionally; otherwise the output is quite confusing.
-     * This could happen if the code that populates these misses some
-     * duplication. */
-    if (only_utf8) {
-        _invlist_subtract(only_utf8, invlist, &only_utf8);
-    }
-    if (not_utf8) {
-        _invlist_subtract(not_utf8, invlist, &not_utf8);
-    }
-
-    if (only_utf8_locale_invlist) {
-
-        /* Since this list is passed in, we have to make a copy before
-         * modifying it */
-        only_utf8_locale = invlist_clone(only_utf8_locale_invlist, NULL);
-
-        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
-
-        /* And, it can get really weird for us to try outputting an inverted
-         * form of this list when it has things above the bitmap, so don't even
-         * try */
-        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
-            inverting_allowed = FALSE;
-        }
-    }
-
-    /* Calculate what the output would be if we take the input as-is */
-    as_is_display = put_charclass_bitmap_innards_common(invlist,
-                                                    posixes,
-                                                    only_utf8,
-                                                    not_utf8,
-                                                    only_utf8_locale,
-                                                    invert);
-
-    /* If have to take the output as-is, just do that */
-    if (! inverting_allowed) {
-        if (as_is_display) {
-            sv_catsv(sv, as_is_display);
-            SvREFCNT_dec_NN(as_is_display);
-        }
-    }
-    else { /* But otherwise, create the output again on the inverted input, and
-              use whichever version is shorter */
-
-        int inverted_bias, as_is_bias;
-
-        /* We will apply our bias to whichever of the results doesn't have
-         * the '^' */
-        bool trial_invert;
-        if (invert) {
-            trial_invert = FALSE;
-            as_is_bias = bias;
-            inverted_bias = 0;
-        }
-        else {
-            trial_invert = TRUE;
-            as_is_bias = 0;
-            inverted_bias = bias;
-        }
-
-        /* Now invert each of the lists that contribute to the output,
-         * excluding from the result things outside the possible range */
-
-        /* For the unconditional inversion list, we have to add in all the
-         * conditional code points, so that when inverted, they will be gone
-         * from it */
-        _invlist_union(only_utf8, invlist, &invlist);
-        _invlist_union(not_utf8, invlist, &invlist);
-        _invlist_union(only_utf8_locale, invlist, &invlist);
-        _invlist_invert(invlist);
-        _invlist_intersection(invlist, PL_InBitmap, &invlist);
-
-        if (only_utf8) {
-            _invlist_invert(only_utf8);
-            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
-        }
-        else if (not_utf8) {
-
-            /* If a code point matches iff the target string is not in UTF-8,
-             * then complementing the result has it not match iff not in UTF-8,
-             * which is the same thing as matching iff it is UTF-8. */
-            only_utf8 = not_utf8;
-            not_utf8 = NULL;
-        }
-
-        if (only_utf8_locale) {
-            _invlist_invert(only_utf8_locale);
-            _invlist_intersection(only_utf8_locale,
-                                  PL_InBitmap,
-                                  &only_utf8_locale);
-        }
-
-        inverted_display = put_charclass_bitmap_innards_common(
-                                            invlist,
-                                            posixes,
-                                            only_utf8,
-                                            not_utf8,
-                                            only_utf8_locale, trial_invert);
-
-        /* Use the shortest representation, taking into account our bias
-         * against showing it inverted */
-        if (   inverted_display
-            && (   ! as_is_display
-                || (  SvCUR(inverted_display) + inverted_bias
-                    < SvCUR(as_is_display)    + as_is_bias)))
-        {
-            sv_catsv(sv, inverted_display);
-            invert = ! invert;
-        }
-        else if (as_is_display) {
-            sv_catsv(sv, as_is_display);
-        }
-
-        SvREFCNT_dec(as_is_display);
-        SvREFCNT_dec(inverted_display);
-    }
-
-    SvREFCNT_dec_NN(invlist);
-    SvREFCNT_dec(only_utf8);
-    SvREFCNT_dec(not_utf8);
-    SvREFCNT_dec(posixes);
-    SvREFCNT_dec(only_utf8_locale);
-
-    U8 did_output_something = (bool) (SvCUR(sv) > orig_sv_cur);
-    if (did_output_something) {
-        /* Distinguish between non and inverted cases */
-        did_output_something += invert;
-    }
-
-    return did_output_something;
-}
-
-#define CLEAR_OPTSTART                                                       \
-    if (optstart) STMT_START {                                               \
-        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                           \
-                              " (%" IVdf " nodes)\n", (IV)(node - optstart))); \
-        optstart=NULL;                                                       \
-    } STMT_END
-
-#define DUMPUNTIL(b,e)                                                       \
-                    CLEAR_OPTSTART;                                          \
-                    node=dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
-
-STATIC const regnode *
-S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
-            const regnode *last, const regnode *plast,
-            SV* sv, I32 indent, U32 depth)
-{
-    const regnode *next;
-    const regnode *optstart= NULL;
-
-    RXi_GET_DECL(r, ri);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMPUNTIL;
-
-#ifdef DEBUG_DUMPUNTIL
-    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n", indent, node-start,
-        last ? last-start : 0, plast ? plast-start : 0);
-#endif
-
-    if (plast && plast < last)
-        last= plast;
-
-    while (node && (!last || node < last)) {
-        const U8 op = OP(node);
-
-        if (op == CLOSE || op == SRCLOSE || op == WHILEM)
-            indent--;
-        next = regnext((regnode *)node);
-        const regnode *after = regnode_after((regnode *)node,0);
-
-        /* Where, what. */
-        if (op == OPTIMIZED) {
-            if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
-                optstart = node;
-            else
-                goto after_print;
-        } else
-            CLEAR_OPTSTART;
-
-        regprop(r, sv, node, NULL, NULL);
-        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
-                      (int)(2*indent + 1), "", SvPVX_const(sv));
-
-        if (op != OPTIMIZED) {
-            if (next == NULL)           /* Next ptr. */
-                Perl_re_printf( aTHX_  " (0)");
-            else if (REGNODE_TYPE(op) == BRANCH
-                     && REGNODE_TYPE(OP(next)) != BRANCH )
-                Perl_re_printf( aTHX_  " (FAIL)");
-            else
-                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
-            Perl_re_printf( aTHX_ "\n");
-        }
-
-      after_print:
-        if (REGNODE_TYPE(op) == BRANCHJ) {
-            assert(next);
-            const regnode *nnode = (OP(next) == LONGJMP
-                                   ? regnext((regnode *)next)
-                                   : next);
-            if (last && nnode > last)
-                nnode = last;
-            DUMPUNTIL(after, nnode);
-        }
-        else if (REGNODE_TYPE(op) == BRANCH) {
-            assert(next);
-            DUMPUNTIL(after, next);
-        }
-        else if ( REGNODE_TYPE(op)  == TRIE ) {
-            const regnode *this_trie = node;
-            const U32 n = ARG(node);
-            const reg_ac_data * const ac = op>=AHOCORASICK ?
-               (reg_ac_data *)ri->data->data[n] :
-               NULL;
-            const reg_trie_data * const trie =
-                (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
-#ifdef DEBUGGING
-            AV *const trie_words
-                           = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
-#endif
-            const regnode *nextbranch= NULL;
-            I32 word_idx;
-            SvPVCLEAR(sv);
-            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
-                SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);
-
-                Perl_re_indentf( aTHX_  "%s ",
-                    indent+3,
-                    elem_ptr
-                    ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), PL_dump_re_max_len,
-                                PL_colors[0], PL_colors[1],
-                                (SvUTF8(*elem_ptr)
-                                 ? PERL_PV_ESCAPE_UNI
-                                 : 0)
-                                | PERL_PV_PRETTY_ELLIPSES
-                                | PERL_PV_PRETTY_LTGT
-                            )
-                    : "???"
-                );
-                if (trie->jump) {
-                    U16 dist= trie->jump[word_idx+1];
-                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
-                               (UV)((dist ? this_trie + dist : next) - start));
-                    if (dist) {
-                        if (!nextbranch)
-                            nextbranch= this_trie + trie->jump[0];
-                        DUMPUNTIL(this_trie + dist, nextbranch);
-                    }
-                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
-                        nextbranch= regnext((regnode *)nextbranch);
-                } else {
-                    Perl_re_printf( aTHX_  "\n");
-                }
-            }
-            if (last && next > last)
-                node= last;
-            else
-                node= next;
-        }
-        else if ( op == CURLY ) {   /* "next" might be very big: optimizer */
-            DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
-        }
-        else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
-            assert(next);
-            DUMPUNTIL(after, next);
-        }
-        else if ( op == PLUS || op == STAR) {
-            DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
-        }
-        else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
-            /* Literal string, where present. */
-            node = (const regnode *)REGNODE_AFTER_varies(node);
-        }
-        else {
-            node = REGNODE_AFTER_opcode(node,op);
-        }
-        if (op == CURLYX || op == OPEN || op == SROPEN)
-            indent++;
-        if (REGNODE_TYPE(op) == END)
-            break;
-    }
-    CLEAR_OPTSTART;
-#ifdef DEBUG_DUMPUNTIL
-    Perl_re_printf( aTHX_  "--- %d\n", (int)indent);
-#endif
-    return node;
-}
-
-#endif  /* DEBUGGING */
-
 #ifndef PERL_IN_XSUB_RE
 
 #  include "uni_keywords.h"
@@ -23939,7 +13666,7 @@ warning when none was present before might cause breakage, for little gain.  So
 khw left this code in, but not enabled.  Tests were never added.
 
 embed.fnc entry:
-Ei      |const char *|get_extended_utf8_msg|const UV cp
+Ei	|const char *|get_extended_utf8_msg|const UV cp
 
 PERL_STATIC_INLINE const char *
 S_get_extended_utf8_msg(pTHX_ const UV cp)