26 files changed, 11496 insertions, 11213 deletions
diff --git a/MANIFEST b/MANIFEST
index 5c94dd640f..283449884b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5513,6 +5513,11 @@ regcharclass.h			Generated by regen/regcharclass.pl
 regcomp.c			Regular expression compiler
 regcomp.h			Private declarations for above
 regcomp.sym			Data for regnodes.h
+regcomp_debug.c			Regular expression compiler debug code
+regcomp_internal.h		Internal stuff for regex compiler
+regcomp_invlist.c		Invlist logic for regular expresion engine
+regcomp_study.c			Optimizer for regular expresion compiler
+regcomp_trie.c			Trie logic for regular expresion compiler
 regen.pl			Run all scripts that (re)generate files
 regen/charset_translations.pl	Character set utilities
 regen/ebcdic.pl			Generates ebcdic_tables.h
diff --git a/Makefile.SH b/Makefile.SH
index 20f6efed4e..6b9f48f463 100755
--- a/Makefile.SH
+++ b/Makefile.SH
@@ -530,17 +530,19 @@ unidatadirs = lib/unicore/To lib/unicore/lib
 
 h1 = EXTERN.h INTERN.h XSUB.h av.h $(CONFIGH) cop.h cv.h dosish.h
 h2 = embed.h form.h gv.h handy.h hv.h hv_func.h keywords.h mg.h op.h opcode.h
-h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h
+h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h regcomp_internal.h
 h4 = regexp.h scope.h sv.h unixish.h util.h iperlsys.h thread.h
 h5 = utf8.h warnings.h mydtrace.h op_reg_common.h l1_char_class_tab.h
 h6 = charclass_invlists.h
 h = $(h1) $(h2) $(h3) $(h4) $(h5) $(h6)
 
 c1 = av.c scope.c op.c peep.c doop.c doio.c dump.c gv.c hv.c mg.c reentr.c mro_core.c perl.c
-c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c sv.c
+c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c utf8.c sv.c
 c3 = taint.c toke.c util.c deb.c run.c builtin.c universal.c pad.c globals.c keywords.c
 c4 = perlio.c numeric.c mathoms.c locale.c pp_pack.c pp_sort.c caretx.c dquote.c time64.c
-c5 = $(mallocsrc)
+c5 = regcomp.c regcomp_debug.c regcomp_invlist.c regcomp_study.c regcomp_trie.c regexec.c
+c6 = $(mallocsrc)
+c_base  = $(c1) $(c2) $(c3) $(c4) $(c5) $(c6)
 
 !NO!SUBS!
 
@@ -552,17 +554,24 @@ main_only_objs =$main_only_objs
 
 $spitshell >>$Makefile <<'!NO!SUBS!'
 
-c = $(c1) $(c2) $(c3) $(c4) $(c5) miniperlmain.c $(mini_only_src)
+c = $(c_base) miniperlmain.c $(mini_only_src)
 
-obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) pad$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) reentr$(OBJ_EXT) mro_core$(OBJ_EXT) keywords$(OBJ_EXT) builtin$(OBJ_EXT)
-obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) peep$(OBJ_EXT)
-obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) numeric$(OBJ_EXT) mathoms$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT) time64$(OBJ_EXT)
+obj1  = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) pad$(OBJ_EXT)
+obj2  = regcomp$(OBJ_EXT) regcomp_debug$(OBJ_EXT) regcomp_invlist$(OBJ_EXT) regcomp_study$(OBJ_EXT) regcomp_trie$(OBJ_EXT)
+obj3  = regexec$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) reentr$(OBJ_EXT) mro_core$(OBJ_EXT)
+obj4  = keywords$(OBJ_EXT) builtin$(OBJ_EXT)
+obj5  = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT)
+obj6  = scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) peep$(OBJ_EXT)
+obj7  = doop$(OBJ_EXT) doio$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT)
+obj8  = deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) numeric$(OBJ_EXT) mathoms$(OBJ_EXT)
+obj9  = locale$(OBJ_EXT) pp_pack$(OBJ_EXT) pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT)
+obj10 = time64$(OBJ_EXT)
 
 # split the objects into 3 exclusive sets: those used by both miniperl and
 # perl, and those used by just one or the other. Doesn't include the
 # actual perl(mini)main.o, nor any dtrace objects.
 
-common_objs    = $(obj1) $(obj2) $(obj3) $(ARCHOBJS)
+common_objs    = $(obj1) $(obj2) $(obj3) $(obj4) $(obj5) $(obj6) $(obj7) $(obj8) $(obj9) $(obj10) $(ARCHOBJS)
 
 miniperl_objs_nodt = $(mini_only_objs) $(common_objs) miniperlmain$(OBJ_EXT)
 perllib_objs_nodt  = $(main_only_objs) $(common_objs)
@@ -1722,8 +1731,8 @@ distcheck: FORCE
 
 .PHONY: ctags
 
-TAGS: $(c1) $(c2) $(c3) $(c4) $(c5) $(h)
-	etags $(c1) $(c2) $(c3) $(c4) $(c5) $(h)
+TAGS: $(c_base) $(h)
+	etags $(c_base) $(h)
 !NO!SUBS!
 
 $spitshell >>$Makefile <<!GROK!THIS!
diff --git a/Makefile.micro b/Makefile.micro
index 89b52faf8c..75f9333a2a 100644
--- a/Makefile.micro
+++ b/Makefile.micro
@@ -18,7 +18,8 @@ O = uav$(_O) udeb$(_O) udoio$(_O) udoop$(_O) udquote$(_O) udump$(_O) \
 	umg$(_O) uperlmain$(_O) uop$(_O) ureentr$(_O) \
 	upad$(_O) uperl$(_O) uperlio$(_O) uperly$(_O) upp$(_O) \
 	upp_ctl$(_O) upp_hot$(_O) upp_sys$(_O) upp_pack$(_O) upp_sort$(_O) \
-	uregcomp$(_O) uregexec$(_O) urun$(_O) \
+	uregcomp$(_O) uregcomp_debug$(_O) uregcomp_invlist$(_O) \
+	uregcomp_study$(_O) uregcomp_trie$(_O) uregexec$(_O) urun$(_O) \
 	uscope$(_O) usv$(_O) utaint$(_O) utime64$(_O) utoke$(_O) \
 	unumeric$(_O) ulocale$(_O) umathoms$(_O) \
 	uuniversal$(_O) uutf8$(_O) uutil$(_O) ukeywords$(_O)
@@ -138,7 +139,19 @@ upp_pack$(_O):	$(HE) pp_pack.c
 upp_sort$(_O):	$(HE) pp_sort.c
 	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) pp_sort.c
 
-uregcomp$(_O):	$(HE) regcomp.c regcomp.h regnodes.h INTERN.h
+uregcomp$(_O):	$(HE) regcomp.c regcomp_internal.h regcomp.h regnodes.h INTERN.h
+	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) regcomp.c
+
+uregcomp_debug$(_O):	$(HE) regcomp_debug.c regcomp_internal.h regcomp.h regnodes.h INTERN.h
+	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) regcomp.c
+
+uregcomp_invlist$(_O):	$(HE) regcomp_invlist.c regcomp_internal.h regcomp.h regnodes.h INTERN.h
+	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) regcomp.c
+
+uregcomp_study$(_O):	$(HE) regcomp_study.c regcomp_internal.h regcomp.h regnodes.h INTERN.h
+	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) regcomp.c
+
+uregcomp_trie$(_O):	$(HE) regcomp_trie.c regcomp_internal.h regcomp.h regnodes.h INTERN.h
 	$(CC) $(CCFLAGS) -o $@ $(CFLAGS) regcomp.c
 
 uregexec$(_O):	$(HE) regexec.c regcomp.h regnodes.h
diff --git a/embed.fnc b/embed.fnc
index b64a572904..5ad6b80675 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2040,7 +2040,7 @@ Cp	|I32	|regexec_flags	|NN REGEXP *const rx|NN char *stringarg \
 				|NN char *strend|NN char *strbeg \
 				|SSize_t minend|NN SV *sv \
 				|NULLOK void *data|U32 flags
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_REGEX_ENGINE)
 CipR	|regnode*|regnext	|NULLOK const regnode* p
 CipR	|bool|check_regnode_after	|NULLOK const regnode* p|const STRLEN extra
 CipR	|regnode*|regnode_after	|NULLOK const regnode* p|bool varies
@@ -2069,14 +2069,16 @@ EXpRT	|I16	|do_uniprop_match|NN const char * const key|const U16 key_len
 EXpRT	|const char * const *|get_prop_values|const int table_index
 EXpR	|SV *	|get_prop_definition|const int table_index
 EXpRT	|const char *|get_deprecated_property_msg|const Size_t warning_offset
-#if defined(PERL_IN_REGCOMP_C)
+#if defined(PERL_IN_REGCOMP_ANY)
 EiRT	|bool	|invlist_is_iterating|NN const SV* const invlist
 EiR	|SV*	|invlist_contents|NN SV* const invlist		    \
 				 |const bool traditional_style
 EixRT	|UV	|invlist_lowest|NN SV* const invlist
+#endif
+#if defined(PERL_IN_REGCOMP_ANY)
 EixRT	|UV	|invlist_highest_range_start|NN SV* const invlist
-ERS	|SV*	|make_exactf_invlist	|NN RExC_state_t *pRExC_state \
-					|NN regnode *node
+#endif
+#if defined(PERL_IN_REGCOMP_C)
 ES	|regnode_offset|reg_la_NOTHING	|NN RExC_state_t *pRExC_state \
 					|U32 flags|NN const char *type
 ES	|regnode_offset|reg_la_OPFAIL	|NN RExC_state_t *pRExC_state \
@@ -2103,11 +2105,6 @@ ES	|regnode_offset|regatom	|NN RExC_state_t *pRExC_state \
 				|NN I32 *flagp|U32 depth
 ES	|regnode_offset|regbranch	|NN RExC_state_t *pRExC_state \
 				|NN I32 *flagp|I32 first|U32 depth
-ES	|void	 |set_ANYOF_arg	|NN RExC_state_t* const pRExC_state \
-				|NN regnode* const node                    \
-				|NULLOK SV* const cp_list                  \
-				|NULLOK SV* const runtime_defns		   \
-				|NULLOK SV* const only_utf8_locale_list
 ES	|void	|output_posix_warnings					    \
 				|NN RExC_state_t *pRExC_state		    \
 				|NN AV* posix_warnings
@@ -2162,8 +2159,6 @@ ES	|bool	|handle_names_wildcard					    \
 				|const STRLEN wname_len			    \
 				|NN SV ** prop_definition		    \
 				|NN AV ** strings
-ES	|void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
-				|NN SV** invlist
 ES	|regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state   \
 				|NN I32 *flagp				    \
 				|NN char * backref_parse_start		    \
@@ -2202,25 +2197,83 @@ ESR	|bool	|regtail	|NN RExC_state_t * pRExC_state		    \
 				|const U32 depth
 ES	|SV *	|reg_scan_name	|NN RExC_state_t *pRExC_state \
 				|U32 flags
-ES	|U32	|join_exact	|NN RExC_state_t *pRExC_state \
-				|NN regnode *scan|NN UV *min_subtract  \
-				|NN bool *unfolded_multi_char          \
-				|U32 flags|NULLOK regnode *val|U32 depth
 EST	|U8   |compute_EXACTish|NN RExC_state_t *pRExC_state
 ES	|void	|nextchar	|NN RExC_state_t *pRExC_state
 ES	|void	|skip_to_be_ignored_text|NN RExC_state_t *pRExC_state  \
 				|NN char ** p			    \
 				|const bool force_to_xmod
 EiT	|char *	|reg_skipcomment|NN RExC_state_t *pRExC_state|NN char * p
-ES	|void	|scan_commit	|NN const RExC_state_t *pRExC_state \
+frS	|void	|re_croak	|bool utf8|NN const char* pat|...
+ES	|int	|handle_possible_posix					    \
+				|NN RExC_state_t *pRExC_state		    \
+				|NN const char* const s			    \
+				|NULLOK char ** updated_parse_ptr	    \
+				|NULLOK AV** posix_warnings		    \
+				|const bool check_only
+ETSR	|int	|edit_distance	|NN const UV *src		    \
+				|NN const UV *tgt		    \
+				|const STRLEN x			    \
+				|const STRLEN y			    \
+				|const SSize_t maxDistance
+#  ifdef DEBUGGING
+ESR	|bool	|regtail_study	|NN RExC_state_t *pRExC_state \
+				|NN regnode_offset p|NN const regnode_offset val|U32 depth
+#  endif
+#endif
+#if defined(PERL_IN_REGCOMP_INVLIST_C)
+#  ifndef PERL_EXT_RE_BUILD
+EiRT	|UV*	|_invlist_array_init	|NN SV* const invlist|const bool will_have_0
+EiRT	|UV	|invlist_max	|NN const SV* const invlist
+EiRT	|IV*	|get_invlist_previous_index_addr|NN SV* invlist
+EiT	|void	|invlist_set_previous_index|NN SV* const invlist|const IV index
+EiRT	|IV	|invlist_previous_index|NN SV* const invlist
+EiT	|void	|invlist_trim	|NN SV* invlist
+Ei	|void	|invlist_clear	|NN SV* invlist
+ES	|void	|_append_range_to_invlist   |NN SV* const invlist|const UV start|const UV end
+ES	|void	|invlist_replace_list_destroys_src|NN SV *dest|NN SV *src
+S	|void	|initialize_invlist_guts|NN SV* invlist|const Size_t initial_size
+#  endif
+#endif
+#if defined(PERL_IN_REGCOMP_ANY)
+EpR	|SV *	|get_ANYOFM_contents|NN const regnode * n
+EpR	|SV *	|get_ANYOFHbbm_contents|NN const regnode * n
+Ep	|void	 |set_ANYOF_arg	|NN RExC_state_t* const pRExC_state \
+				|NN regnode* const node                    \
+				|NULLOK SV* const cp_list                  \
+				|NULLOK SV* const runtime_defns		   \
+				|NULLOK SV* const only_utf8_locale_list
+Ep	|void	|populate_anyof_bitmap_from_invlist|NN regnode *node|NN SV** invlist_ptr
+Ep	|void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
+				|NN SV** invlist
+Ep	|SSize_t|study_chunk	|NN RExC_state_t *pRExC_state \
+				|NN regnode **scanp|NN SSize_t *minlenp \
+				|NN SSize_t *deltap|NN regnode *last \
+				|NULLOK struct scan_data_t *data \
+                                |I32 stopparen|U32 recursed_depth \
+				|NULLOK regnode_ssc *and_withp \
+				|U32 flags|U32 depth|bool was_mutate_ok
+Ep	|void	|scan_commit	|NN const RExC_state_t *pRExC_state \
 				|NN struct scan_data_t *data        \
 				|NN SSize_t *minlenp		    \
 				|int is_inf
-ES	|void	|populate_anyof_bitmap_from_invlist|NN regnode *node|NN SV** invlist_ptr
+Ep	|void	|ssc_init	|NN const RExC_state_t *pRExC_state \
+				|NN regnode_ssc *ssc
+ETp	|bool	|is_ssc_worth_it|NN const RExC_state_t * pRExC_state \
+				|NN const regnode_ssc * ssc
+Ep	|void	|ssc_finalize	|NN RExC_state_t *pRExC_state \
+				|NN regnode_ssc *ssc
+Ep	|U32	|join_exact	|NN RExC_state_t *pRExC_state \
+				|NN regnode *scan|NN UV *min_subtract  \
+				|NN bool *unfolded_multi_char          \
+				|U32 flags|NULLOK regnode *val|U32 depth
+#endif
+#if defined(PERL_IN_REGCOMP_STUDY_C)
+ES	|void   |unwind_scan_frames|NN const void *p
+ES	|void	|rck_elide_nothing|NN regnode *node
+ERS	|SV*	|make_exactf_invlist	|NN RExC_state_t *pRExC_state \
+					|NN regnode *node
 ES	|void	|ssc_anything	|NN regnode_ssc *ssc
 ESRT	|int	|ssc_is_anything|NN const regnode_ssc *ssc
-ES	|void	|ssc_init	|NN const RExC_state_t *pRExC_state \
-				|NN regnode_ssc *ssc
 ESRT	|int	|ssc_is_cp_posixl_init|NN const RExC_state_t *pRExC_state \
 				|NN const regnode_ssc *ssc
 ES	|void	|ssc_and	|NN const RExC_state_t *pRExC_state \
@@ -2241,111 +2294,30 @@ ES	|void	|ssc_add_range	|NN regnode_ssc *ssc \
 ES	|void	|ssc_cp_and	|NN regnode_ssc *ssc \
 				|UV const cp
 EST	|void	|ssc_clear_locale|NN regnode_ssc *ssc
-ETS	|bool	|is_ssc_worth_it|NN const RExC_state_t * pRExC_state \
-				|NN const regnode_ssc * ssc
-ES	|void	|ssc_finalize	|NN RExC_state_t *pRExC_state \
-				|NN regnode_ssc *ssc
-ES	|SSize_t|study_chunk	|NN RExC_state_t *pRExC_state \
-				|NN regnode **scanp|NN SSize_t *minlenp \
-				|NN SSize_t *deltap|NN regnode *last \
-				|NULLOK struct scan_data_t *data \
-                                |I32 stopparen|U32 recursed_depth \
-				|NULLOK regnode_ssc *and_withp \
-				|U32 flags|U32 depth|bool was_mutate_ok
-ES	|void	|rck_elide_nothing|NN regnode *node
-ESR	|SV *	|get_ANYOFM_contents|NN const regnode * n
-ESR	|SV *	|get_ANYOFHbbm_contents|NN const regnode * n
-ES	|void	|populate_bitmap_from_invlist				    \
+#endif
+#if defined(PERL_IN_REGCOMP_INVLIST_C) || defined(PERL_IN_REGCOMP_C)
+Ep	|void	|populate_bitmap_from_invlist				    \
 				|NN SV * invlist			    \
 				|const UV offset			    \
 				|NN const U8 * bitmap			    \
 				|const Size_t len
-ES	|void	|populate_invlist_from_bitmap				    \
+Ep	|void	|populate_invlist_from_bitmap				    \
 				|NN const U8 * bitmap			    \
 				|const Size_t bitmap_len		    \
 				|NN SV ** invlist			    \
 				|const UV offset
-ESRT	|U32	|add_data	|NN RExC_state_t* const pRExC_state \
+#endif
+#if defined(PERL_IN_REGCOMP_ANY)
+EpRT	|U32	|reg_add_data	|NN RExC_state_t* const pRExC_state \
 				|NN const char* const s|const U32 n
-frS	|void	|re_croak	|bool utf8|NN const char* pat|...
-ES	|int	|handle_possible_posix					    \
-				|NN RExC_state_t *pRExC_state		    \
-				|NN const char* const s			    \
-				|NULLOK char ** updated_parse_ptr	    \
-				|NULLOK AV** posix_warnings		    \
-				|const bool check_only
-ES	|I32	|make_trie	|NN RExC_state_t *pRExC_state \
-				|NN regnode *startbranch|NN regnode *first \
-				|NN regnode *last|NN regnode *tail \
-				|U32 word_count|U32 flags|U32 depth
-ES	|regnode *|construct_ahocorasick_from_trie|NN RExC_state_t *pRExC_state \
-                                |NN regnode *source|U32 depth
-ETSR	|int	|edit_distance	|NN const UV *src		    \
-				|NN const UV *tgt		    \
-				|const STRLEN x			    \
-				|const STRLEN y			    \
-				|const SSize_t maxDistance
-#  ifdef DEBUGGING
-EFp	|int	|re_indentf	|NN const char *fmt|U32 depth|...
-ES	|void        |regdump_intflags|NULLOK const char *lead| const U32 flags
-ES	|void	|regdump_extflags|NULLOK const char *lead| const U32 flags
-ES	|const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \
-				|NN const regnode *node \
-				|NULLOK const regnode *last \
-				|NULLOK const regnode *plast \
-				|NN SV* sv|I32 indent|U32 depth
-ES	|void	|put_code_point	|NN SV* sv|UV c
-ES	|U8	|put_charclass_bitmap_innards|NN SV* sv		    \
-				|NULLOK char* bitmap		    \
-				|NULLOK SV* nonbitmap_invlist	    \
-				|NULLOK SV* only_utf8_locale_invlist\
-				|NULLOK const regnode * const node  \
-				|const U8 flags			    \
-				|const bool force_as_is_display
-ES	|SV*	|put_charclass_bitmap_innards_common		    \
-				|NN SV* invlist			    \
-				|NULLOK SV* posixes		    \
-				|NULLOK SV* only_utf8		    \
-				|NULLOK SV* not_utf8		    \
-				|NULLOK SV* only_utf8_locale	    \
-				|const bool invert
-ES	|void	|put_charclass_bitmap_innards_invlist		    \
-				|NN SV *sv			    \
-				|NN SV* invlist
-ES	|void	|put_range	|NN SV* sv|UV start|const UV end    \
-				|const bool allow_literals
-ES	|void	|dump_trie	|NN const struct _reg_trie_data *trie\
-				|NULLOK HV* widecharmap|NN AV *revcharmap\
-				|U32 depth
-ES	|void	|dump_trie_interim_list|NN const struct _reg_trie_data *trie\
-				|NULLOK HV* widecharmap|NN AV *revcharmap\
-				|U32 next_alloc|U32 depth
-ES	|void	|dump_trie_interim_table|NN const struct _reg_trie_data *trie\
-				|NULLOK HV* widecharmap|NN AV *revcharmap\
-				|U32 next_alloc|U32 depth
-ESR	|bool	|regtail_study	|NN RExC_state_t *pRExC_state \
-				|NN regnode_offset p|NN const regnode_offset val|U32 depth
-#  endif
-#  ifndef PERL_EXT_RE_BUILD
-EiRT	|UV*	|_invlist_array_init	|NN SV* const invlist|const bool will_have_0
-EiRT	|UV	|invlist_max	|NN const SV* const invlist
-EiRT	|IV*	|get_invlist_previous_index_addr|NN SV* invlist
-EiT	|void	|invlist_set_previous_index|NN SV* const invlist|const IV index
-EiRT	|IV	|invlist_previous_index|NN SV* const invlist
-EiT	|void	|invlist_trim	|NN SV* invlist
-Ei	|void	|invlist_clear	|NN SV* invlist
-ES	|void	|_append_range_to_invlist   |NN SV* const invlist|const UV start|const UV end
-ES	|void	|invlist_replace_list_destroys_src|NN SV *dest|NN SV *src
-S	|void	|initialize_invlist_guts|NN SV* invlist|const Size_t initial_size
-#  endif
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
 EiR	|SV*	|add_cp_to_invlist	|NULLOK SV* invlist|const UV cp
 Ei	|void	|invlist_extend    |NN SV* const invlist|const UV len
 Ei	|void	|invlist_set_len|NN SV* const invlist|const UV len|const bool offset
 EiRT	|UV	|invlist_highest|NN SV* const invlist
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
 m	|void	|_invlist_intersection	|NN SV* const a|NN SV* const b|NN SV** i
 EXp	|void	|_invlist_intersection_maybe_complement_2nd \
 		|NULLOK SV* const a|NN SV* const b          \
@@ -2394,25 +2366,25 @@ EpRX	|const char *|form_cp_too_large_msg|const U8 which	\
 				|const Size_t len		\
 				|const UV cp
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
 EXp	|void	|_invlist_dump	|NN PerlIO *file|I32 level   \
 				|NN const char* const indent \
 				|NN SV* const invlist
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
 EiRT	|STRLEN*|get_invlist_iter_addr	|NN SV* invlist
 EiT	|void	|invlist_iterinit|NN SV* invlist
 EiRT	|bool	|invlist_iternext|NN SV* invlist|NN UV* start|NN UV* end
 EiT	|void	|invlist_iterfinish|NN SV* invlist
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
 EXpR	|SV*	|_new_invlist_C_array|NN const UV* const list
 EXp	|bool	|_invlistEQ	|NN SV* const a|NN SV* const b|const bool complement_b
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
 EiT	|const char *|get_regex_charset_name|const U32 flags|NN STRLEN* const lenp
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	\
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_REGEXEC_C)	\
  || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		\
  || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		\
  || defined(PERL_IN_DOOP_C)
@@ -2423,7 +2395,7 @@ EiRT	|UV	|_invlist_len	|NN SV* const invlist
 EiRT	|bool	|_invlist_contains_cp|NN SV* const invlist|const UV cp
 EXpRT	|SSize_t|_invlist_search	|NN SV* const invlist|const UV cp
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_REGEX_ENGINE)
 #  ifndef PERL_EXT_RE_BUILD
 Ep	|SV*	|get_regclass_aux_data					   \
 				|NULLOK const regexp *prog		   \
@@ -2440,10 +2412,69 @@ Ep	|SV*	|get_re_gclass_aux_data					   \
 				|NULLOK SV **listsvp			   \
 				|NULLOK SV **lonly_utf8_locale		   \
 				|NULLOK SV **output_invlist
+#  endif
 #endif
-Ep	|void	|regprop	|NULLOK const regexp *prog|NN SV* sv|NN const regnode* o|NULLOK const regmatch_info *reginfo \
-				|NULLOK const RExC_state_t *pRExC_state
+#if defined(PERL_IN_REGCOMP_ANY)
+Ep	|I32	|make_trie	|NN RExC_state_t *pRExC_state \
+				|NN regnode *startbranch|NN regnode *first \
+				|NN regnode *last|NN regnode *tail \
+				|U32 word_count|U32 flags|U32 depth
+Ep	|regnode *|construct_ahocorasick_from_trie|NN RExC_state_t *pRExC_state \
+                                |NN regnode *source|U32 depth
+#  if defined(PERL_IN_REGCOMP_TRIE_C) && defined(DEBUGGING)
+ES	|void	|dump_trie	|NN const struct _reg_trie_data *trie\
+				|NULLOK HV* widecharmap|NN AV *revcharmap\
+				|U32 depth
+ES	|void	|dump_trie_interim_list|NN const struct _reg_trie_data *trie\
+				|NULLOK HV* widecharmap|NN AV *revcharmap\
+				|U32 next_alloc|U32 depth
+ES	|void	|dump_trie_interim_table|NN const struct _reg_trie_data *trie\
+				|NULLOK HV* widecharmap|NN AV *revcharmap\
+				|U32 next_alloc|U32 depth
+#  endif
+#endif
+#if defined(PERL_IN_REGEX_ENGINE) && defined(DEBUGGING)
+EFp	|int	|re_indentf	|NN const char *fmt|U32 depth|...
 Efp	|int	|re_printf	|NN const char *fmt|...
+Ep      |void   |debug_show_study_flags|U32 flags|NN const char *open_str \
+                                |NN const char *close_str
+Ep	|void	|regprop	|NULLOK const regexp *prog|NN SV* sv|NN const regnode* o \
+                                |NULLOK const regmatch_info *reginfo \
+				|NULLOK const RExC_state_t *pRExC_state
+Ep      |void   |debug_studydata|NN const char *where|NULLOK scan_data_t *data \
+				|U32 depth|int is_inf|SSize_t min \
+				|SSize_t stopmin|SSize_t delta
+Ep	|void	|debug_peep	|NN const char *str|NN const RExC_state_t *pRExC_state \
+				|NULLOK regnode *scan|U32 depth|U32 flags
+Ep	|const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \
+				|NN const regnode *node \
+				|NULLOK const regnode *last \
+				|NULLOK const regnode *plast \
+				|NN SV* sv|I32 indent|U32 depth
+#endif
+#if defined(PERL_IN_REGCOMP_DEBUG_C) && defined(DEBUGGING)
+ES	|void   |regdump_intflags|NULLOK const char *lead| const U32 flags
+ES	|void	|regdump_extflags|NULLOK const char *lead| const U32 flags
+ES	|void	|put_code_point	|NN SV* sv|UV c
+ES	|U8	|put_charclass_bitmap_innards|NN SV* sv		    \
+				|NULLOK char* bitmap		    \
+				|NULLOK SV* nonbitmap_invlist	    \
+				|NULLOK SV* only_utf8_locale_invlist\
+				|NULLOK const regnode * const node  \
+				|const U8 flags			    \
+				|const bool force_as_is_display
+ES	|SV*	|put_charclass_bitmap_innards_common		    \
+				|NN SV* invlist			    \
+				|NULLOK SV* posixes		    \
+				|NULLOK SV* only_utf8		    \
+				|NULLOK SV* not_utf8		    \
+				|NULLOK SV* only_utf8_locale	    \
+				|const bool invert
+ES	|void	|put_charclass_bitmap_innards_invlist		    \
+				|NN SV *sv			    \
+				|NN SV* invlist
+ES	|void	|put_range	|NN SV* sv|UV start|const UV end    \
+				|const bool allow_literals
 #endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C)
 ERp	|bool	|is_grapheme	|NN const U8 * strbeg|NN const U8 * s|NN const U8 *strend|const UV cp
@@ -2451,7 +2482,7 @@ ERp	|bool	|is_grapheme	|NN const U8 * strbeg|NN const U8 * s|NN const U8 *strend
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
 EXTp	|UV	|_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const unsigned int flags
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_SV_C)
 EpX	|SV*	|invlist_clone	|NN SV* const invlist|NULLOK SV* newlist
 #endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
diff --git a/embed.h b/embed.h
index 3e469d3d13..694aaf5578 100644
--- a/embed.h
+++ b/embed.h
@@ -838,7 +838,7 @@
 #define dump_mstats(a)		Perl_dump_mstats(aTHX_ a)
 #define get_mstats(a,b,c)	Perl_get_mstats(aTHX_ a,b,c)
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_REGEX_ENGINE)
 #define check_regnode_after(a,b)	Perl_check_regnode_after(aTHX_ a,b)
 #define regnext(a)		Perl_regnext(aTHX_ a)
 #define regnode_after(a,b)	Perl_regnode_after(aTHX_ a,b)
@@ -964,12 +964,12 @@
 #define my_memrchr		S_my_memrchr
 #  endif
 #  if !(!defined(PERL_EXT_RE_BUILD))
-#    if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#    if defined(PERL_IN_REGEX_ENGINE)
 #define get_re_gclass_aux_data(a,b,c,d,e,f)	Perl_get_re_gclass_aux_data(aTHX_ a,b,c,d,e,f)
 #    endif
 #  endif
 #  if !defined(PERL_EXT_RE_BUILD)
-#    if defined(PERL_IN_REGCOMP_C)
+#    if defined(PERL_IN_REGCOMP_INVLIST_C)
 #define _append_range_to_invlist(a,b,c)	S__append_range_to_invlist(aTHX_ a,b,c)
 #define _invlist_array_init	S__invlist_array_init
 #define get_invlist_previous_index_addr	S_get_invlist_previous_index_addr
@@ -980,26 +980,12 @@
 #define invlist_set_previous_index	S_invlist_set_previous_index
 #define invlist_trim		S_invlist_trim
 #    endif
-#    if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#    if defined(PERL_IN_REGEX_ENGINE)
 #define get_regclass_aux_data(a,b,c,d,e,f)	Perl_get_regclass_aux_data(aTHX_ a,b,c,d,e,f)
 #    endif
 #  endif
 #  if defined(DEBUGGING)
 #    if defined(PERL_IN_REGCOMP_C)
-#define dump_trie(a,b,c,d)	S_dump_trie(aTHX_ a,b,c,d)
-#define dump_trie_interim_list(a,b,c,d,e)	S_dump_trie_interim_list(aTHX_ a,b,c,d,e)
-#define dump_trie_interim_table(a,b,c,d,e)	S_dump_trie_interim_table(aTHX_ a,b,c,d,e)
-#define dumpuntil(a,b,c,d,e,f,g,h)	S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
-#define put_charclass_bitmap_innards(a,b,c,d,e,f,g)	S_put_charclass_bitmap_innards(aTHX_ a,b,c,d,e,f,g)
-#define put_charclass_bitmap_innards_common(a,b,c,d,e,f)	S_put_charclass_bitmap_innards_common(aTHX_ a,b,c,d,e,f)
-#define put_charclass_bitmap_innards_invlist(a,b)	S_put_charclass_bitmap_innards_invlist(aTHX_ a,b)
-#define put_code_point(a,b)	S_put_code_point(aTHX_ a,b)
-#define put_range(a,b,c,d)	S_put_range(aTHX_ a,b,c,d)
-#if !defined(MULTIPLICITY) || defined(PERL_CORE)
-#define re_indentf(a,...)	Perl_re_indentf(aTHX_ a,__VA_ARGS__)
-#endif
-#define regdump_extflags(a,b)	S_regdump_extflags(aTHX_ a,b)
-#define regdump_intflags(a,b)	S_regdump_intflags(aTHX_ a,b)
 #define regnode_guts_debug(a,b,c)	S_regnode_guts_debug(aTHX_ a,b,c)
 #define regtail_study(a,b,c,d)	S_regtail_study(aTHX_ a,b,c,d)
 #    endif
@@ -1035,20 +1021,80 @@
 #define mem_collxfrm_(a,b,c,d)	Perl_mem_collxfrm_(aTHX_ a,b,c,d)
 #    endif
 #  endif
+#  if defined(PERL_IN_REGCOMP_ANY)
+#define add_above_Latin1_folds(a,b,c)	Perl_add_above_Latin1_folds(aTHX_ a,b,c)
+#define construct_ahocorasick_from_trie(a,b,c)	Perl_construct_ahocorasick_from_trie(aTHX_ a,b,c)
+#define get_ANYOFHbbm_contents(a)	Perl_get_ANYOFHbbm_contents(aTHX_ a)
+#define get_ANYOFM_contents(a)	Perl_get_ANYOFM_contents(aTHX_ a)
+#define invlist_contents(a,b)	S_invlist_contents(aTHX_ a,b)
+#define invlist_highest_range_start	S_invlist_highest_range_start
+#define invlist_is_iterating	S_invlist_is_iterating
+#define invlist_lowest		S_invlist_lowest
+#define is_ssc_worth_it		Perl_is_ssc_worth_it
+#define join_exact(a,b,c,d,e,f,g)	Perl_join_exact(aTHX_ a,b,c,d,e,f,g)
+#define make_trie(a,b,c,d,e,f,g,h)	Perl_make_trie(aTHX_ a,b,c,d,e,f,g,h)
+#define populate_anyof_bitmap_from_invlist(a,b)	Perl_populate_anyof_bitmap_from_invlist(aTHX_ a,b)
+#define reg_add_data		Perl_reg_add_data
+#define scan_commit(a,b,c,d)	Perl_scan_commit(aTHX_ a,b,c,d)
+#define set_ANYOF_arg(a,b,c,d,e)	Perl_set_ANYOF_arg(aTHX_ a,b,c,d,e)
+#define ssc_finalize(a,b)	Perl_ssc_finalize(aTHX_ a,b)
+#define ssc_init(a,b)		Perl_ssc_init(aTHX_ a,b)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l)	Perl_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
+#    if defined(PERL_IN_REGCOMP_TRIE_C) && defined(DEBUGGING)
+#define dump_trie(a,b,c,d)	S_dump_trie(aTHX_ a,b,c,d)
+#define dump_trie_interim_list(a,b,c,d,e)	S_dump_trie_interim_list(aTHX_ a,b,c,d,e)
+#define dump_trie_interim_table(a,b,c,d,e)	S_dump_trie_interim_table(aTHX_ a,b,c,d,e)
+#    endif
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
+#define _invlist_dump(a,b,c,d)	Perl__invlist_dump(aTHX_ a,b,c,d)
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
+#define add_cp_to_invlist(a,b)	S_add_cp_to_invlist(aTHX_ a,b)
+#define invlist_extend(a,b)	S_invlist_extend(aTHX_ a,b)
+#define invlist_highest		S_invlist_highest
+#define invlist_set_len(a,b,c)	S_invlist_set_len(aTHX_ a,b,c)
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
+#define _add_range_to_invlist(a,b,c)	Perl__add_range_to_invlist(aTHX_ a,b,c)
+#define _invlist_intersection_maybe_complement_2nd(a,b,c,d)	Perl__invlist_intersection_maybe_complement_2nd(aTHX_ a,b,c,d)
+#define _invlist_invert(a)	Perl__invlist_invert(aTHX_ a)
+#define _invlist_union_maybe_complement_2nd(a,b,c,d)	Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
+#define _new_invlist(a)		Perl__new_invlist(aTHX_ a)
+#define _setup_canned_invlist(a,b,c)	Perl__setup_canned_invlist(aTHX_ a,b,c)
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
+#define get_invlist_iter_addr	S_get_invlist_iter_addr
+#define invlist_iterfinish	S_invlist_iterfinish
+#define invlist_iterinit	S_invlist_iterinit
+#define invlist_iternext	S_invlist_iternext
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
+#define _invlistEQ(a,b,c)	Perl__invlistEQ(aTHX_ a,b,c)
+#define _new_invlist_C_array(a)	Perl__new_invlist_C_array(aTHX_ a)
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
+#define get_regex_charset_name	S_get_regex_charset_name
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_DOOP_C)
+#define _invlist_contains_cp	S__invlist_contains_cp
+#define _invlist_len		S__invlist_len
+#define _invlist_search		Perl__invlist_search
+#define get_invlist_offset_addr	S_get_invlist_offset_addr
+#define invlist_array		S_invlist_array
+#define is_invlist		S_is_invlist
+#  endif
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_SV_C)
+#define invlist_clone(a,b)	Perl_invlist_clone(aTHX_ a,b)
+#  endif
 #  if defined(PERL_IN_REGCOMP_C)
-#define add_above_Latin1_folds(a,b,c)	S_add_above_Latin1_folds(aTHX_ a,b,c)
-#define add_data		S_add_data
 #define add_multi_match(a,b,c)	S_add_multi_match(aTHX_ a,b,c)
 #define change_engine_size(a,b)	S_change_engine_size(aTHX_ a,b)
 #define compile_wildcard(a,b,c)	S_compile_wildcard(aTHX_ a,b,c)
 #define compute_EXACTish	S_compute_EXACTish
-#define construct_ahocorasick_from_trie(a,b,c)	S_construct_ahocorasick_from_trie(aTHX_ a,b,c)
 #define edit_distance		S_edit_distance
 #define execute_wildcard(a,b,c,d,e,f,g)	S_execute_wildcard(aTHX_ a,b,c,d,e,f,g)
 #define find_first_differing_byte_pos	S_find_first_differing_byte_pos
-#define get_ANYOFHbbm_contents(a)	S_get_ANYOFHbbm_contents(aTHX_ a)
-#define get_ANYOFM_contents(a)	S_get_ANYOFM_contents(aTHX_ a)
-#define get_ANYOF_cp_list_for_ssc(a,b)	S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
 #define get_quantifier_value(a,b,c)	S_get_quantifier_value(aTHX_ a,b,c)
 #define grok_bslash_N(a,b,c,d,e,f,g)	S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
 #define handle_named_backref(a,b,c,d)	S_handle_named_backref(aTHX_ a,b,c,d)
@@ -1056,23 +1102,11 @@
 #define handle_possible_posix(a,b,c,d,e)	S_handle_possible_posix(aTHX_ a,b,c,d,e)
 #define handle_regex_sets(a,b,c,d)	S_handle_regex_sets(aTHX_ a,b,c,d)
 #define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j)	S_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
-#define invlist_contents(a,b)	S_invlist_contents(aTHX_ a,b)
-#define invlist_highest_range_start	S_invlist_highest_range_start
-#define invlist_is_iterating	S_invlist_is_iterating
-#define invlist_lowest		S_invlist_lowest
-#define is_ssc_worth_it		S_is_ssc_worth_it
-#define join_exact(a,b,c,d,e,f,g)	S_join_exact(aTHX_ a,b,c,d,e,f,g)
-#define make_exactf_invlist(a,b)	S_make_exactf_invlist(aTHX_ a,b)
-#define make_trie(a,b,c,d,e,f,g,h)	S_make_trie(aTHX_ a,b,c,d,e,f,g,h)
 #define nextchar(a)		S_nextchar(aTHX_ a)
 #define optimize_regclass(a,b,c,d,e,f,g,h,i,j)	S_optimize_regclass(aTHX_ a,b,c,d,e,f,g,h,i,j)
 #define output_posix_warnings(a,b)	S_output_posix_warnings(aTHX_ a,b)
 #define parse_lparen_question_flags(a)	S_parse_lparen_question_flags(aTHX_ a)
 #define parse_uniprop_string(a,b,c,d,e,f,g,h,i,j)	S_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i,j)
-#define populate_anyof_bitmap_from_invlist(a,b)	S_populate_anyof_bitmap_from_invlist(aTHX_ a,b)
-#define populate_bitmap_from_invlist(a,b,c,d)	S_populate_bitmap_from_invlist(aTHX_ a,b,c,d)
-#define populate_invlist_from_bitmap(a,b,c,d)	S_populate_invlist_from_bitmap(aTHX_ a,b,c,d)
-#define rck_elide_nothing(a)	S_rck_elide_nothing(aTHX_ a)
 #define reg(a,b,c,d)		S_reg(aTHX_ a,b,c,d)
 #define reg2Lanode(a,b,c,d)	S_reg2Lanode(aTHX_ a,b,c,d)
 #define reg_la_NOTHING(a,b,c)	S_reg_la_NOTHING(aTHX_ a,b,c)
@@ -1090,40 +1124,8 @@
 #define regpiece(a,b,c)		S_regpiece(aTHX_ a,b,c)
 #define regpnode(a,b,c)		S_regpnode(aTHX_ a,b,c)
 #define regtail(a,b,c,d)	S_regtail(aTHX_ a,b,c,d)
-#define scan_commit(a,b,c,d)	S_scan_commit(aTHX_ a,b,c,d)
-#define set_ANYOF_arg(a,b,c,d,e)	S_set_ANYOF_arg(aTHX_ a,b,c,d,e)
 #define set_regex_pv(a,b)	S_set_regex_pv(aTHX_ a,b)
 #define skip_to_be_ignored_text(a,b,c)	S_skip_to_be_ignored_text(aTHX_ a,b,c)
-#define ssc_add_range(a,b,c)	S_ssc_add_range(aTHX_ a,b,c)
-#define ssc_and(a,b,c)		S_ssc_and(aTHX_ a,b,c)
-#define ssc_anything(a)		S_ssc_anything(aTHX_ a)
-#define ssc_clear_locale	S_ssc_clear_locale
-#define ssc_cp_and(a,b)		S_ssc_cp_and(aTHX_ a,b)
-#define ssc_finalize(a,b)	S_ssc_finalize(aTHX_ a,b)
-#define ssc_init(a,b)		S_ssc_init(aTHX_ a,b)
-#define ssc_intersection(a,b,c)	S_ssc_intersection(aTHX_ a,b,c)
-#define ssc_is_anything		S_ssc_is_anything
-#define ssc_is_cp_posixl_init	S_ssc_is_cp_posixl_init
-#define ssc_or(a,b,c)		S_ssc_or(aTHX_ a,b,c)
-#define ssc_union(a,b,c)	S_ssc_union(aTHX_ a,b,c)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l)	S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
-#define _invlist_dump(a,b,c,d)	Perl__invlist_dump(aTHX_ a,b,c,d)
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
-#define add_cp_to_invlist(a,b)	S_add_cp_to_invlist(aTHX_ a,b)
-#define invlist_extend(a,b)	S_invlist_extend(aTHX_ a,b)
-#define invlist_highest		S_invlist_highest
-#define invlist_set_len(a,b,c)	S_invlist_set_len(aTHX_ a,b,c)
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
-#define _add_range_to_invlist(a,b,c)	Perl__add_range_to_invlist(aTHX_ a,b,c)
-#define _invlist_intersection_maybe_complement_2nd(a,b,c,d)	Perl__invlist_intersection_maybe_complement_2nd(aTHX_ a,b,c,d)
-#define _invlist_invert(a)	Perl__invlist_invert(aTHX_ a)
-#define _invlist_union_maybe_complement_2nd(a,b,c,d)	Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
-#define _new_invlist(a)		Perl__new_invlist(aTHX_ a)
-#define _setup_canned_invlist(a,b,c)	Perl__setup_canned_invlist(aTHX_ a,b,c)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C)
 #define form_alien_digit_msg(a,b,c,d,e,f)	Perl_form_alien_digit_msg(aTHX_ a,b,c,d,e,f)
@@ -1134,45 +1136,44 @@
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
 #define form_cp_too_large_msg(a,b,c,d)	Perl_form_cp_too_large_msg(aTHX_ a,b,c,d)
 #  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C)
-#define get_invlist_iter_addr	S_get_invlist_iter_addr
-#define invlist_iterfinish	S_invlist_iterfinish
-#define invlist_iterinit	S_invlist_iterinit
-#define invlist_iternext	S_invlist_iternext
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
-#define _invlistEQ(a,b,c)	Perl__invlistEQ(aTHX_ a,b,c)
-#define _new_invlist_C_array(a)	Perl__new_invlist_C_array(aTHX_ a)
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
-#define get_regex_charset_name	S_get_regex_charset_name
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
-#if !defined(MULTIPLICITY) || defined(PERL_CORE)
-#define re_printf(...)		Perl_re_printf(aTHX_ __VA_ARGS__)
-#endif
-#define regprop(a,b,c,d,e)	Perl_regprop(aTHX_ a,b,c,d,e)
-#  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_DOOP_C)
-#define _invlist_contains_cp	S__invlist_contains_cp
-#define _invlist_len		S__invlist_len
-#define _invlist_search		Perl__invlist_search
-#define get_invlist_offset_addr	S_get_invlist_offset_addr
-#define invlist_array		S_invlist_array
-#define is_invlist		S_is_invlist
-#  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C)
 #define is_grapheme(a,b,c,d)	Perl_is_grapheme(aTHX_ a,b,c,d)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
 #define _to_fold_latin1		Perl__to_fold_latin1
 #  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
-#define invlist_clone(a,b)	Perl_invlist_clone(aTHX_ a,b)
-#  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
 #define regcurly		Perl_regcurly
 #  endif
+#  if defined(PERL_IN_REGCOMP_DEBUG_C) && defined(DEBUGGING)
+#define put_charclass_bitmap_innards(a,b,c,d,e,f,g)	S_put_charclass_bitmap_innards(aTHX_ a,b,c,d,e,f,g)
+#define put_charclass_bitmap_innards_common(a,b,c,d,e,f)	S_put_charclass_bitmap_innards_common(aTHX_ a,b,c,d,e,f)
+#define put_charclass_bitmap_innards_invlist(a,b)	S_put_charclass_bitmap_innards_invlist(aTHX_ a,b)
+#define put_code_point(a,b)	S_put_code_point(aTHX_ a,b)
+#define put_range(a,b,c,d)	S_put_range(aTHX_ a,b,c,d)
+#define regdump_extflags(a,b)	S_regdump_extflags(aTHX_ a,b)
+#define regdump_intflags(a,b)	S_regdump_intflags(aTHX_ a,b)
+#  endif
+#  if defined(PERL_IN_REGCOMP_INVLIST_C) || defined(PERL_IN_REGCOMP_C)
+#define populate_bitmap_from_invlist(a,b,c,d)	Perl_populate_bitmap_from_invlist(aTHX_ a,b,c,d)
+#define populate_invlist_from_bitmap(a,b,c,d)	Perl_populate_invlist_from_bitmap(aTHX_ a,b,c,d)
+#  endif
+#  if defined(PERL_IN_REGCOMP_STUDY_C)
+#define get_ANYOF_cp_list_for_ssc(a,b)	S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
+#define make_exactf_invlist(a,b)	S_make_exactf_invlist(aTHX_ a,b)
+#define rck_elide_nothing(a)	S_rck_elide_nothing(aTHX_ a)
+#define ssc_add_range(a,b,c)	S_ssc_add_range(aTHX_ a,b,c)
+#define ssc_and(a,b,c)		S_ssc_and(aTHX_ a,b,c)
+#define ssc_anything(a)		S_ssc_anything(aTHX_ a)
+#define ssc_clear_locale	S_ssc_clear_locale
+#define ssc_cp_and(a,b)		S_ssc_cp_and(aTHX_ a,b)
+#define ssc_intersection(a,b,c)	S_ssc_intersection(aTHX_ a,b,c)
+#define ssc_is_anything		S_ssc_is_anything
+#define ssc_is_cp_posixl_init	S_ssc_is_cp_posixl_init
+#define ssc_or(a,b,c)		S_ssc_or(aTHX_ a,b,c)
+#define ssc_union(a,b,c)	S_ssc_union(aTHX_ a,b,c)
+#define unwind_scan_frames(a)	S_unwind_scan_frames(aTHX_ a)
+#  endif
 #  if defined(PERL_IN_REGEXEC_C)
 #define advance_one_LB(a,b,c)	S_advance_one_LB(aTHX_ a,b,c)
 #define advance_one_SB(a,b,c)	S_advance_one_SB(aTHX_ a,b,c)
@@ -1206,6 +1207,19 @@
 #define to_byte_substr(a)	S_to_byte_substr(aTHX_ a)
 #define to_utf8_substr(a)	S_to_utf8_substr(aTHX_ a)
 #  endif
+#  if defined(PERL_IN_REGEX_ENGINE) && defined(DEBUGGING)
+#define debug_peep(a,b,c,d,e)	Perl_debug_peep(aTHX_ a,b,c,d,e)
+#define debug_show_study_flags(a,b,c)	Perl_debug_show_study_flags(aTHX_ a,b,c)
+#define debug_studydata(a,b,c,d,e,f,g)	Perl_debug_studydata(aTHX_ a,b,c,d,e,f,g)
+#define dumpuntil(a,b,c,d,e,f,g,h)	Perl_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
+#if !defined(MULTIPLICITY) || defined(PERL_CORE)
+#define re_indentf(a,...)	Perl_re_indentf(aTHX_ a,__VA_ARGS__)
+#endif
+#if !defined(MULTIPLICITY) || defined(PERL_CORE)
+#define re_printf(...)		Perl_re_printf(aTHX_ __VA_ARGS__)
+#endif
+#define regprop(a,b,c,d,e)	Perl_regprop(aTHX_ a,b,c,d,e)
+#  endif
 #endif
 #ifdef PERL_CORE
 #define PerlLIO_dup2_cloexec(a,b)	Perl_PerlLIO_dup2_cloexec(aTHX_ a,b)
@@ -1570,7 +1584,7 @@
 #    endif
 #  endif
 #  if !defined(PERL_EXT_RE_BUILD)
-#    if defined(PERL_IN_REGCOMP_C)
+#    if defined(PERL_IN_REGCOMP_INVLIST_C)
 #define initialize_invlist_guts(a,b)	S_initialize_invlist_guts(aTHX_ a,b)
 #    endif
 #  endif
diff --git a/ext/re/Makefile.PL b/ext/re/Makefile.PL
index f3bdcfdc69..8d4a576e4c 100644
--- a/ext/re/Makefile.PL
+++ b/ext/re/Makefile.PL
@@ -1,8 +1,31 @@
+use strict;
+use warnings;
 use ExtUtils::MakeMaker;
 use File::Spec;
 use Config;
+#   [ src => @deps ]
+our @files = (
+    # compiler files ########################################
+    ['regcomp.c'        => 'dquote.c', 'invlist_inline.h'   ],
+    ['regcomp_invlist.c' => 'invlist_inline.h'              ],
+    ['regcomp_study.c'                                      ],
+    ['regcomp_trie.c'                                       ],
+    ['regcomp_debug.c'                                      ],
+    # execution engine files ################################
+    ['regexec.c'        => 'invlist_inline.h'               ],
+    # misc files ############################################
+    ['dquote.c'                                             ],
+    ['invlist_inline.h'                                     ],
+    #########################################################
+);
 
-my $object = 're_exec$(OBJ_EXT) re_comp$(OBJ_EXT) re$(OBJ_EXT)';
+my @objects = 're$(OBJ_EXT)';
+foreach my $tuple (@files) {
+    my $src_file = $tuple->[0];
+    if ($src_file=~s/reg/re_/ and $src_file=~s/\.c/\$(OBJ_EXT)/) {
+        push @objects, $src_file;
+    }
+}
 
 my $defines = '-DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT';
 
@@ -15,45 +38,41 @@ WriteMakefile(
     @libs ? ( 'LIBS' => [ join(" ", map { "-l$_" } @libs) ] ) : (),
     VERSION_FROM	=> 're.pm',
     XSPROTOARG		=> '-noprototypes',
-    OBJECT		=> $object,
+    OBJECT		=> "@objects",
     DEFINE             => $defines,
     clean		=> { FILES => '*$(OBJ_EXT) invlist_inline.h *.c ../../lib/re.pm' },
 );
 
 package MY;
-
 sub upupfile {
     File::Spec->catfile(File::Spec->updir, File::Spec->updir, $_[0]);
 }
 
 sub postamble {
-    my $regcomp_c = upupfile('regcomp.c');
-    my $regexec_c = upupfile('regexec.c');
-    my $dquote_c = upupfile('dquote.c');
-    my $invlist_inline_h = upupfile('invlist_inline.h');
+    my $postamble = "";
 
+    foreach my $tuple (@::files) {
+        my ($file, @deps) = @$tuple;
+        my $src_file = upupfile($file);
+        my $target = $file;
+        $target =~ s/^reg/re_/;
+        $postamble .=
     <<EOF;
-re_comp.c : $regcomp_c
-	- \$(RM_F) re_comp.c
-	\$(CP) $regcomp_c re_comp.c
-
-re_comp\$(OBJ_EXT) : re_comp.c dquote.c invlist_inline.h
+$target : $src_file
+	- \$(RM_F) $target
+	\$(CP) $src_file $target
 
-re_exec.c : $regexec_c
-	- \$(RM_F) re_exec.c
-	\$(CP) $regexec_c re_exec.c
-
-re_exec\$(OBJ_EXT) : re_exec.c invlist_inline.h
-
-dquote.c : $dquote_c
-	- \$(RM_F) dquote.c
-	\$(CP) $dquote_c dquote.c
-
-invlist_inline.h : $invlist_inline_h
-	- \$(RM_F) invlist_inline.h
-	\$(CP) $invlist_inline_h invlist_inline.h
+EOF
+        next if $target eq $file;
+        my $base_name = $target;
+        if ($base_name=~s/\.c\z//) {
+            $postamble .= <<EOF
+$base_name\$(OBJ_EXT) : $target @deps
 
 EOF
+        }
+    }
+    return $postamble
 }
 
 sub MY::c_o {
diff --git a/handy.h b/handy.h
index eefc00600a..3c308c3f02 100644
--- a/handy.h
+++ b/handy.h
@@ -1685,7 +1685,7 @@ END_EXTERN_C
 #   endif
 
     /* Participates in a single-character fold with a character above 255 */
-#   if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#   if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_REGEXEC_C)
 #     define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(c)                           \
         ((   ! cBOOL(FITS_IN_8_BITS(c)))                                    \
           || (PL_charclass[(U8) (c)] & CC_mask_(CC_NONLATIN1_SIMPLE_FOLD_)))
diff --git a/inline.h b/inline.h
index d7de926263..8c4d10f1d5 100644
--- a/inline.h
+++ b/inline.h
@@ -2639,7 +2639,7 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp)
 
 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
 
-#if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
+#if defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
 
 #define MAX_CHARSET_NAME_LENGTH 2
 
diff --git a/invlist_inline.h b/invlist_inline.h
index ebb37b6543..8b28c21885 100644
--- a/invlist_inline.h
+++ b/invlist_inline.h
@@ -10,7 +10,7 @@
 #define PERL_INVLIST_INLINE_H_
 
 #if defined(PERL_IN_UTF8_C)             \
- || defined(PERL_IN_REGCOMP_C)          \
+ || defined(PERL_IN_REGCOMP_ANY)        \
  || defined(PERL_IN_REGEXEC_C)          \
  || defined(PERL_IN_TOKE_C)             \
  || defined(PERL_IN_PP_C)               \
@@ -93,7 +93,7 @@ S_invlist_array(SV* const invlist)
 }
 
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
 
 PERL_STATIC_INLINE void
 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -161,7 +161,7 @@ S_invlist_highest(SV* const invlist)
            : array[len - 1] - 1;
 }
 
-#  if defined(PERL_IN_REGCOMP_C)
+#  if defined(PERL_IN_REGCOMP_ANY)
 
 PERL_STATIC_INLINE UV
 S_invlist_highest_range_start(SV* const invlist)
@@ -199,7 +199,7 @@ S_invlist_highest_range_start(SV* const invlist)
 
 #  endif
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
 
 PERL_STATIC_INLINE STRLEN*
 S_get_invlist_iter_addr(SV* invlist)
@@ -275,11 +275,101 @@ S_invlist_iternext(SV* invlist, UV* start, UV* end)
 
 #endif
 
-#ifndef PERL_IN_REGCOMP_C
+#ifndef PERL_IN_REGCOMP_ANY
 
 /* These symbols are only needed later in regcomp.c */
 #       undef TO_INTERNAL_SIZE
 #       undef FROM_INTERNAL_SIZE
 #endif
 
+#ifdef PERL_IN_REGCOMP_ANY
+PERL_STATIC_INLINE
+bool
+S_invlist_is_iterating(const SV* const invlist)
+{
+    PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
+
+    /* get_invlist_iter_addr()'s sv is non-const only because it returns a
+     * value that can be used to modify the invlist, it doesn't modify the
+     * invlist itself */
+    return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX;
+}
+
+PERL_STATIC_INLINE
+SV *
+S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
+{
+    /* Get the contents of an inversion list into a string SV so that they can
+     * be printed out.  If 'traditional_style' is TRUE, it uses the format
+     * traditionally done for debug tracing; otherwise it uses a format
+     * suitable for just copying to the output, with blanks between ranges and
+     * a dash between range components */
+
+    UV start, end;
+    SV* output;
+    const char intra_range_delimiter = (traditional_style ? '\t' : '-');
+    const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
+
+    if (traditional_style) {
+        output = newSVpvs("\n");
+    }
+    else {
+        output = newSVpvs("");
+    }
+
+    PERL_ARGS_ASSERT_INVLIST_CONTENTS;
+
+    assert(! invlist_is_iterating(invlist));
+
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+        if (end == UV_MAX) {
+            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
+                                          start, intra_range_delimiter,
+                                                 inter_range_delimiter);
+        }
+        else if (end != start) {
+            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
+                                          start,
+                                                   intra_range_delimiter,
+                                                  end, inter_range_delimiter);
+        }
+        else {
+            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
+                                          start, inter_range_delimiter);
+        }
+    }
+
+    if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
+        SvCUR_set(output, SvCUR(output) - 1);
+    }
+
+    return output;
+}
+
+PERL_STATIC_INLINE
+UV
+S_invlist_lowest(SV* const invlist)
+{
+    /* Returns the lowest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the lowest is actually
+     * 0, or if the list is empty.  If this distinction matters to you, check
+     * for emptiness before calling this function */
+
+    UV len = _invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_LOWEST;
+
+    if (len == 0) {
+        return 0;
+    }
+
+    array = invlist_array(invlist);
+
+    return array[0];
+}
+
+#endif
+
 #endif /* PERL_INVLIST_INLINE_H_ */
diff --git a/perl.h b/perl.h
index 94ba49bff9..3f728f865d 100644
--- a/perl.h
+++ b/perl.h
@@ -4506,8 +4506,7 @@ typedef        struct crypt_data {     /* straight from /usr/include/crypt.h */
 
 typedef struct magic_state MGS;	/* struct magic_state defined in mg.c */
 
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) \
- || defined(PERL_EXT_RE_BUILD)
+#if defined(PERL_IN_REGEX_ENGINE) || defined(PERL_EXT_RE_BUILD)
 
 /* These have to be predeclared, as they are used in proto.h which is #included
  * before their definitions in regcomp.h. */
@@ -4523,6 +4522,7 @@ typedef struct regnode_charclass_posixl regnode_charclass_posixl;
 typedef struct regnode_ssc regnode_ssc;
 typedef struct RExC_state_t RExC_state_t;
 struct _reg_trie_data;
+typedef struct scan_data_t scan_data_t;
 
 #endif
 
diff --git a/proto.h b/proto.h
index df65610c6c..ac517eb5d3 100644
--- a/proto.h
+++ b/proto.h
@@ -4705,7 +4705,7 @@ PERL_STATIC_INLINE void *	S_my_memrchr(const char * s, const char c, const STRLE
 #endif
 #endif
 #if !(!defined(PERL_EXT_RE_BUILD))
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#  if defined(PERL_IN_REGEX_ENGINE)
 PERL_CALLCONV SV*	Perl_get_re_gclass_aux_data(pTHX_ const regexp *prog, const struct regnode *node, bool doinit, SV **listsvp, SV **lonly_utf8_locale, SV **output_invlist)
 			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_GET_RE_GCLASS_AUX_DATA	\
@@ -4887,7 +4887,7 @@ STATIC PerlIO *	S_doopen_pm(pTHX_ SV *name)
 #  endif
 #endif
 #if !defined(PERL_EXT_RE_BUILD)
-#  if defined(PERL_IN_REGCOMP_C)
+#  if defined(PERL_IN_REGCOMP_INVLIST_C)
 STATIC void	S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end);
 #define PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST	\
 	assert(invlist)
@@ -4941,7 +4941,7 @@ PERL_STATIC_INLINE void	S_invlist_trim(SV* invlist);
 	assert(invlist)
 #endif
 #  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#  if defined(PERL_IN_REGEX_ENGINE)
 PERL_CALLCONV SV*	Perl_get_regclass_aux_data(pTHX_ const regexp *prog, const struct regnode *node, bool doinit, SV **listsvp, SV **lonly_utf8_locale, SV **output_invlist)
 			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_GET_REGCLASS_AUX_DATA	\
@@ -5163,42 +5163,6 @@ STATIC void	S_cv_dump(pTHX_ const CV *cv, const char *title);
 	assert(cv); assert(title)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C)
-STATIC void	S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 depth);
-#define PERL_ARGS_ASSERT_DUMP_TRIE	\
-	assert(trie); assert(revcharmap)
-STATIC void	S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth);
-#define PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST	\
-	assert(trie); assert(revcharmap)
-STATIC void	S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth);
-#define PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE	\
-	assert(trie); assert(revcharmap)
-STATIC const regnode*	S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const regnode *last, const regnode *plast, SV* sv, I32 indent, U32 depth);
-#define PERL_ARGS_ASSERT_DUMPUNTIL	\
-	assert(r); assert(start); assert(node); assert(sv)
-STATIC U8	S_put_charclass_bitmap_innards(pTHX_ SV* sv, char* bitmap, SV* nonbitmap_invlist, SV* only_utf8_locale_invlist, const regnode * const node, const U8 flags, const bool force_as_is_display);
-#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS	\
-	assert(sv)
-STATIC SV*	S_put_charclass_bitmap_innards_common(pTHX_ SV* invlist, SV* posixes, SV* only_utf8, SV* not_utf8, SV* only_utf8_locale, const bool invert);
-#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON	\
-	assert(invlist)
-STATIC void	S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist);
-#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST	\
-	assert(sv); assert(invlist)
-STATIC void	S_put_code_point(pTHX_ SV* sv, UV c);
-#define PERL_ARGS_ASSERT_PUT_CODE_POINT	\
-	assert(sv)
-STATIC void	S_put_range(pTHX_ SV* sv, UV start, const UV end, const bool allow_literals);
-#define PERL_ARGS_ASSERT_PUT_RANGE	\
-	assert(sv)
-PERL_CALLCONV int	Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
-			__attribute__visibility__("hidden");
-#define PERL_ARGS_ASSERT_RE_INDENTF	\
-	assert(fmt)
-
-STATIC void	S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
-#define PERL_ARGS_ASSERT_REGDUMP_EXTFLAGS
-STATIC void	S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
-#define PERL_ARGS_ASSERT_REGDUMP_INTFLAGS
 STATIC regnode_offset	S_regnode_guts_debug(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len);
 #define PERL_ARGS_ASSERT_REGNODE_GUTS_DEBUG	\
 	assert(pRExC_state)
@@ -6411,79 +6375,29 @@ STATIC OP*	S_doform(pTHX_ CV *cv, GV *gv, OP *retop);
 STATIC SV *	S_space_join_names_mortal(pTHX_ char *const *array);
 #define PERL_ARGS_ASSERT_SPACE_JOIN_NAMES_MORTAL
 #endif
-#if defined(PERL_IN_REGCOMP_C)
-STATIC void	S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist);
+#if defined(PERL_IN_REGCOMP_ANY)
+PERL_CALLCONV void	Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_ADD_ABOVE_LATIN1_FOLDS	\
 	assert(pRExC_state); assert(invlist)
-STATIC U32	S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_ADD_DATA	\
-	assert(pRExC_state); assert(s)
 
-STATIC AV*	S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count);
-#define PERL_ARGS_ASSERT_ADD_MULTI_MATCH	\
-	assert(multi_string)
-STATIC void	S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size);
-#define PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE	\
-	assert(pRExC_state)
-STATIC REGEXP*	S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len, const bool ignore_case)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_COMPILE_WILDCARD	\
-	assert(subpattern)
-
-STATIC U8	S_compute_EXACTish(RExC_state_t *pRExC_state);
-#define PERL_ARGS_ASSERT_COMPUTE_EXACTISH	\
-	assert(pRExC_state)
-STATIC regnode *	S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth);
+PERL_CALLCONV regnode *	Perl_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE	\
 	assert(pRExC_state); assert(source)
-STATIC int	S_edit_distance(const UV *src, const UV *tgt, const STRLEN x, const STRLEN y, const SSize_t maxDistance)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_EDIT_DISTANCE	\
-	assert(src); assert(tgt)
 
-STATIC I32	S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, SSize_t minend, SV* screamer, U32 nosave);
-#define PERL_ARGS_ASSERT_EXECUTE_WILDCARD	\
-	assert(prog); assert(stringarg); assert(strend); assert(strbeg); assert(screamer)
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE Size_t	S_find_first_differing_byte_pos(const U8 * s1, const U8 * s2, const Size_t max);
-#define PERL_ARGS_ASSERT_FIND_FIRST_DIFFERING_BYTE_POS	\
-	assert(s1); assert(s2)
-#endif
-STATIC SV *	S_get_ANYOFHbbm_contents(pTHX_ const regnode * n)
-			__attribute__warn_unused_result__;
+PERL_CALLCONV SV *	Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n)
+			__attribute__warn_unused_result__
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_GET_ANYOFHBBM_CONTENTS	\
 	assert(n)
 
-STATIC SV *	S_get_ANYOFM_contents(pTHX_ const regnode * n)
-			__attribute__warn_unused_result__;
+PERL_CALLCONV SV *	Perl_get_ANYOFM_contents(pTHX_ const regnode * n)
+			__attribute__warn_unused_result__
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_GET_ANYOFM_CONTENTS	\
 	assert(n)
 
-STATIC SV*	S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, const regnode_charclass* const node);
-#define PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC	\
-	assert(pRExC_state); assert(node)
-STATIC U32	S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state, const char * start, const char * end);
-#define PERL_ARGS_ASSERT_GET_QUANTIFIER_VALUE	\
-	assert(pRExC_state); assert(start); assert(end)
-STATIC bool	S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
-#define PERL_ARGS_ASSERT_GROK_BSLASH_N	\
-	assert(pRExC_state); assert(flagp)
-STATIC regnode_offset	S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * backref_parse_start, char ch);
-#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF	\
-	assert(pRExC_state); assert(flagp); assert(backref_parse_start)
-STATIC bool	S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname_len, SV ** prop_definition, AV ** strings);
-#define PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD	\
-	assert(wname); assert(prop_definition); assert(strings)
-STATIC int	S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
-#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX	\
-	assert(pRExC_state); assert(s)
-STATIC regnode_offset	S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth);
-#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS	\
-	assert(pRExC_state); assert(flagp)
-STATIC SV *	S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
-#define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY	\
-	assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE SV*	S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
 			__attribute__warn_unused_result__;
@@ -6512,171 +6426,75 @@ PERL_STATIC_INLINE UV	S_invlist_lowest(SV* const invlist)
 	assert(invlist)
 #endif
 
-STATIC bool	S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc);
+PERL_CALLCONV bool	Perl_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_IS_SSC_WORTH_IT	\
 	assert(pRExC_state); assert(ssc)
-STATIC U32	S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, bool *unfolded_multi_char, U32 flags, regnode *val, U32 depth);
+
+PERL_CALLCONV U32	Perl_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, bool *unfolded_multi_char, U32 flags, regnode *val, U32 depth)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_JOIN_EXACT	\
 	assert(pRExC_state); assert(scan); assert(min_subtract); assert(unfolded_multi_char)
-STATIC SV*	S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST	\
-	assert(pRExC_state); assert(node)
 
-STATIC I32	S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *first, regnode *last, regnode *tail, U32 word_count, U32 flags, U32 depth);
+PERL_CALLCONV I32	Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *first, regnode *last, regnode *tail, U32 word_count, U32 flags, U32 depth)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_MAKE_TRIE	\
 	assert(pRExC_state); assert(startbranch); assert(first); assert(last); assert(tail)
-STATIC void	S_nextchar(pTHX_ RExC_state_t *pRExC_state);
-#define PERL_ARGS_ASSERT_NEXTCHAR	\
-	assert(pRExC_state)
-STATIC U8	S_optimize_regclass(pTHX_ RExC_state_t *pRExC_state, SV* cp_list, SV* only_utf8_locale_list, SV* upper_latin1_only_utf8_matches, const U32 has_runtime_dependency, const U32 posixl, U8 * anyof_flags, bool * invert, regnode_offset * ret, I32 *flagp);
-#define PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS	\
-	assert(pRExC_state); assert(anyof_flags); assert(invert); assert(ret); assert(flagp)
-STATIC void	S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings);
-#define PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS	\
-	assert(pRExC_state); assert(posix_warnings)
-STATIC void	S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state);
-#define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS	\
-	assert(pRExC_state)
-STATIC SV *	S_parse_uniprop_string(pTHX_ const char * const name, Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, AV ** strings, bool * user_defined_ptr, SV * msg, const STRLEN level);
-#define PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING	\
-	assert(name); assert(user_defined_ptr); assert(msg)
-STATIC void	S_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
+
+PERL_CALLCONV void	Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_POPULATE_ANYOF_BITMAP_FROM_INVLIST	\
 	assert(node); assert(invlist_ptr)
-STATIC void	S_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len);
-#define PERL_ARGS_ASSERT_POPULATE_BITMAP_FROM_INVLIST	\
-	assert(invlist); assert(bitmap)
-STATIC void	S_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_len, SV ** invlist, const UV offset);
-#define PERL_ARGS_ASSERT_POPULATE_INVLIST_FROM_BITMAP	\
-	assert(bitmap); assert(invlist)
-STATIC void	S_rck_elide_nothing(pTHX_ regnode *node);
-#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING	\
-	assert(node)
-PERL_STATIC_NO_RET void	S_re_croak(pTHX_ bool utf8, const char* pat, ...)
-			__attribute__noreturn__
-			__attribute__format__(__printf__,pTHX_2,pTHX_3);
-#define PERL_ARGS_ASSERT_RE_CROAK	\
-	assert(pat)
-
-STATIC regnode_offset	S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth);
-#define PERL_ARGS_ASSERT_REG	\
-	assert(pRExC_state); assert(flagp)
-STATIC regnode_offset	S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2);
-#define PERL_ARGS_ASSERT_REG2LANODE	\
-	assert(pRExC_state)
-STATIC regnode_offset	S_reg_la_NOTHING(pTHX_ RExC_state_t *pRExC_state, U32 flags, const char *type);
-#define PERL_ARGS_ASSERT_REG_LA_NOTHING	\
-	assert(pRExC_state); assert(type)
-STATIC regnode_offset	S_reg_la_OPFAIL(pTHX_ RExC_state_t *pRExC_state, U32 flags, const char *type);
-#define PERL_ARGS_ASSERT_REG_LA_OPFAIL	\
-	assert(pRExC_state); assert(type)
-STATIC regnode_offset	S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
-#define PERL_ARGS_ASSERT_REG_NODE	\
-	assert(pRExC_state)
-STATIC SV *	S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags);
-#define PERL_ARGS_ASSERT_REG_SCAN_NAME	\
-	assert(pRExC_state)
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE char *	S_reg_skipcomment(RExC_state_t *pRExC_state, char * p);
-#define PERL_ARGS_ASSERT_REG_SKIPCOMMENT	\
-	assert(pRExC_state); assert(p)
-#endif
-STATIC regnode_offset	S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg);
-#define PERL_ARGS_ASSERT_REGANODE	\
-	assert(pRExC_state)
-STATIC regnode_offset	S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
-#define PERL_ARGS_ASSERT_REGATOM	\
-	assert(pRExC_state); assert(flagp)
-STATIC regnode_offset	S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth);
-#define PERL_ARGS_ASSERT_REGBRANCH	\
-	assert(pRExC_state); assert(flagp)
-STATIC regnode_offset	S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist);
-#define PERL_ARGS_ASSERT_REGCLASS	\
-	assert(pRExC_state); assert(flagp)
-STATIC unsigned int	S_regex_set_precedence(const U8 my_operator)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGEX_SET_PRECEDENCE
 
-STATIC void	S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, const regnode_offset operand, const U32 depth);
-#define PERL_ARGS_ASSERT_REGINSERT	\
-	assert(pRExC_state)
-STATIC regnode_offset	S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_len);
-#define PERL_ARGS_ASSERT_REGNODE_GUTS	\
-	assert(pRExC_state)
-STATIC regnode_offset	S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
-#define PERL_ARGS_ASSERT_REGPIECE	\
-	assert(pRExC_state); assert(flagp)
-STATIC regnode_offset	S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg);
-#define PERL_ARGS_ASSERT_REGPNODE	\
-	assert(pRExC_state); assert(arg)
-STATIC bool	S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGTAIL	\
-	assert(pRExC_state); assert(p); assert(val)
+PERL_CALLCONV U32	Perl_reg_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
+			__attribute__warn_unused_result__
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_REG_ADD_DATA	\
+	assert(pRExC_state); assert(s)
 
-STATIC void	S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
+PERL_CALLCONV void	Perl_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_SCAN_COMMIT	\
 	assert(pRExC_state); assert(data); assert(minlenp)
-STATIC void	S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list);
+
+PERL_CALLCONV void	Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_SET_ANYOF_ARG	\
 	assert(pRExC_state); assert(node)
-STATIC void	S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx);
-#define PERL_ARGS_ASSERT_SET_REGEX_PV	\
-	assert(pRExC_state); assert(Rx)
-STATIC void	S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state, char ** p, const bool force_to_xmod);
-#define PERL_ARGS_ASSERT_SKIP_TO_BE_IGNORED_TEXT	\
-	assert(pRExC_state); assert(p)
-STATIC void	S_ssc_add_range(pTHX_ regnode_ssc *ssc, UV const start, UV const end);
-#define PERL_ARGS_ASSERT_SSC_ADD_RANGE	\
-	assert(ssc)
-STATIC void	S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_charclass *and_with);
-#define PERL_ARGS_ASSERT_SSC_AND	\
-	assert(pRExC_state); assert(ssc); assert(and_with)
-STATIC void	S_ssc_anything(pTHX_ regnode_ssc *ssc);
-#define PERL_ARGS_ASSERT_SSC_ANYTHING	\
-	assert(ssc)
-STATIC void	S_ssc_clear_locale(regnode_ssc *ssc);
-#define PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE	\
-	assert(ssc)
-STATIC void	S_ssc_cp_and(pTHX_ regnode_ssc *ssc, UV const cp);
-#define PERL_ARGS_ASSERT_SSC_CP_AND	\
-	assert(ssc)
-STATIC void	S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc);
+
+PERL_CALLCONV void	Perl_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_SSC_FINALIZE	\
 	assert(pRExC_state); assert(ssc)
-STATIC void	S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc);
-#define PERL_ARGS_ASSERT_SSC_INIT	\
-	assert(pRExC_state); assert(ssc)
-STATIC void	S_ssc_intersection(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
-#define PERL_ARGS_ASSERT_SSC_INTERSECTION	\
-	assert(ssc); assert(invlist)
-STATIC int	S_ssc_is_anything(const regnode_ssc *ssc)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_SSC_IS_ANYTHING	\
-	assert(ssc)
 
-STATIC int	S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state, const regnode_ssc *ssc)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT	\
+PERL_CALLCONV void	Perl_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_SSC_INIT	\
 	assert(pRExC_state); assert(ssc)
 
-STATIC void	S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_charclass *or_with);
-#define PERL_ARGS_ASSERT_SSC_OR	\
-	assert(pRExC_state); assert(ssc); assert(or_with)
-STATIC void	S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
-#define PERL_ARGS_ASSERT_SSC_UNION	\
-	assert(ssc); assert(invlist)
-STATIC SSize_t	S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
+PERL_CALLCONV SSize_t	Perl_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok)
+			__attribute__visibility__("hidden");
 #define PERL_ARGS_ASSERT_STUDY_CHUNK	\
 	assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
+
+#  if defined(PERL_IN_REGCOMP_TRIE_C) && defined(DEBUGGING)
+STATIC void	S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 depth);
+#define PERL_ARGS_ASSERT_DUMP_TRIE	\
+	assert(trie); assert(revcharmap)
+STATIC void	S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth);
+#define PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST	\
+	assert(trie); assert(revcharmap)
+STATIC void	S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth);
+#define PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE	\
+	assert(trie); assert(revcharmap)
+#  endif
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
 PERL_CALLCONV void	Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, const char* const indent, SV* const invlist);
 #define PERL_ARGS_ASSERT__INVLIST_DUMP	\
 	assert(file); assert(indent); assert(invlist)
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE SV*	S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp)
 			__attribute__warn_unused_result__;
@@ -6701,7 +6519,7 @@ PERL_STATIC_INLINE void	S_invlist_set_len(pTHX_ SV* const invlist, const UV len,
 	assert(invlist)
 #endif
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_UTF8_C)
 PERL_CALLCONV SV*	Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT__ADD_RANGE_TO_INVLIST
@@ -6731,35 +6549,7 @@ PERL_CALLCONV SV*	Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV e
 	assert(other_elements_ptr)
 
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C)
-PERL_CALLCONV const char *	Perl_form_alien_digit_msg(pTHX_ const U8 which, const STRLEN valids_len, const char * const first_bad, const char * const send, const bool UTF, const bool braced)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG	\
-	assert(first_bad); assert(send)
-
-PERL_CALLCONV bool	Perl_grok_bslash_c(pTHX_ const char source, U8 * result, const char** message, U32 * packed_warn)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_GROK_BSLASH_C	\
-	assert(result); assert(message)
-
-PERL_CALLCONV bool	Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_GROK_BSLASH_O	\
-	assert(s); assert(send); assert(uv); assert(message)
-
-PERL_CALLCONV bool	Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_GROK_BSLASH_X	\
-	assert(s); assert(send); assert(uv); assert(message)
-
-#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
-PERL_CALLCONV const char *	Perl_form_cp_too_large_msg(pTHX_ const U8 which, const char * string, const Size_t len, const UV cp)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG
-
-#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE STRLEN*	S_get_invlist_iter_addr(SV* invlist)
 			__attribute__warn_unused_result__;
@@ -6785,7 +6575,7 @@ PERL_STATIC_INLINE bool	S_invlist_iternext(SV* invlist, UV* start, UV* end)
 #endif
 
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
 PERL_CALLCONV bool	Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b);
 #define PERL_ARGS_ASSERT__INVLISTEQ	\
 	assert(a); assert(b)
@@ -6795,45 +6585,14 @@ PERL_CALLCONV SV*	Perl__new_invlist_C_array(pTHX_ const UV* const list)
 	assert(list)
 
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_PP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UNIVERSAL_C)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE const char *	S_get_regex_charset_name(const U32 flags, STRLEN* const lenp);
 #define PERL_ARGS_ASSERT_GET_REGEX_CHARSET_NAME	\
 	assert(lenp)
 #endif
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE bool	Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER
-#endif
-
-PERL_CALLCONV int	Perl_re_printf(pTHX_ const char *fmt, ...)
-			__attribute__visibility__("hidden")
-			__attribute__format__(__printf__,pTHX_1,pTHX_2);
-#define PERL_ARGS_ASSERT_RE_PRINTF	\
-	assert(fmt)
-
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE regnode*	Perl_regnext(pTHX_ const regnode* p)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGNEXT
-#endif
-
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE regnode*	Perl_regnode_after(pTHX_ const regnode* p, bool varies)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGNODE_AFTER
-#endif
-
-PERL_CALLCONV void	Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
-			__attribute__visibility__("hidden");
-#define PERL_ARGS_ASSERT_REGPROP	\
-	assert(sv); assert(o)
-
-#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_DOOP_C)
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_DOOP_C)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE bool	S__invlist_contains_cp(SV* const invlist, const UV cp)
 			__attribute__warn_unused_result__;
@@ -6874,6 +6633,172 @@ PERL_STATIC_INLINE bool	S_is_invlist(const SV* const invlist)
 #endif
 
 #endif
+#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_SV_C)
+PERL_CALLCONV SV*	Perl_invlist_clone(pTHX_ SV* const invlist, SV* newlist);
+#define PERL_ARGS_ASSERT_INVLIST_CLONE	\
+	assert(invlist)
+#endif
+#if defined(PERL_IN_REGCOMP_C)
+STATIC AV*	S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count);
+#define PERL_ARGS_ASSERT_ADD_MULTI_MATCH	\
+	assert(multi_string)
+STATIC void	S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size);
+#define PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE	\
+	assert(pRExC_state)
+STATIC REGEXP*	S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len, const bool ignore_case)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_COMPILE_WILDCARD	\
+	assert(subpattern)
+
+STATIC U8	S_compute_EXACTish(RExC_state_t *pRExC_state);
+#define PERL_ARGS_ASSERT_COMPUTE_EXACTISH	\
+	assert(pRExC_state)
+STATIC int	S_edit_distance(const UV *src, const UV *tgt, const STRLEN x, const STRLEN y, const SSize_t maxDistance)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_EDIT_DISTANCE	\
+	assert(src); assert(tgt)
+
+STATIC I32	S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, SSize_t minend, SV* screamer, U32 nosave);
+#define PERL_ARGS_ASSERT_EXECUTE_WILDCARD	\
+	assert(prog); assert(stringarg); assert(strend); assert(strbeg); assert(screamer)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE Size_t	S_find_first_differing_byte_pos(const U8 * s1, const U8 * s2, const Size_t max);
+#define PERL_ARGS_ASSERT_FIND_FIRST_DIFFERING_BYTE_POS	\
+	assert(s1); assert(s2)
+#endif
+STATIC U32	S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state, const char * start, const char * end);
+#define PERL_ARGS_ASSERT_GET_QUANTIFIER_VALUE	\
+	assert(pRExC_state); assert(start); assert(end)
+STATIC bool	S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
+#define PERL_ARGS_ASSERT_GROK_BSLASH_N	\
+	assert(pRExC_state); assert(flagp)
+STATIC regnode_offset	S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * backref_parse_start, char ch);
+#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF	\
+	assert(pRExC_state); assert(flagp); assert(backref_parse_start)
+STATIC bool	S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname_len, SV ** prop_definition, AV ** strings);
+#define PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD	\
+	assert(wname); assert(prop_definition); assert(strings)
+STATIC int	S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
+#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX	\
+	assert(pRExC_state); assert(s)
+STATIC regnode_offset	S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth);
+#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS	\
+	assert(pRExC_state); assert(flagp)
+STATIC SV *	S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
+#define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY	\
+	assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
+STATIC void	S_nextchar(pTHX_ RExC_state_t *pRExC_state);
+#define PERL_ARGS_ASSERT_NEXTCHAR	\
+	assert(pRExC_state)
+STATIC U8	S_optimize_regclass(pTHX_ RExC_state_t *pRExC_state, SV* cp_list, SV* only_utf8_locale_list, SV* upper_latin1_only_utf8_matches, const U32 has_runtime_dependency, const U32 posixl, U8 * anyof_flags, bool * invert, regnode_offset * ret, I32 *flagp);
+#define PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS	\
+	assert(pRExC_state); assert(anyof_flags); assert(invert); assert(ret); assert(flagp)
+STATIC void	S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings);
+#define PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS	\
+	assert(pRExC_state); assert(posix_warnings)
+STATIC void	S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state);
+#define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS	\
+	assert(pRExC_state)
+STATIC SV *	S_parse_uniprop_string(pTHX_ const char * const name, Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, AV ** strings, bool * user_defined_ptr, SV * msg, const STRLEN level);
+#define PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING	\
+	assert(name); assert(user_defined_ptr); assert(msg)
+PERL_STATIC_NO_RET void	S_re_croak(pTHX_ bool utf8, const char* pat, ...)
+			__attribute__noreturn__
+			__attribute__format__(__printf__,pTHX_2,pTHX_3);
+#define PERL_ARGS_ASSERT_RE_CROAK	\
+	assert(pat)
+
+STATIC regnode_offset	S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth);
+#define PERL_ARGS_ASSERT_REG	\
+	assert(pRExC_state); assert(flagp)
+STATIC regnode_offset	S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2);
+#define PERL_ARGS_ASSERT_REG2LANODE	\
+	assert(pRExC_state)
+STATIC regnode_offset	S_reg_la_NOTHING(pTHX_ RExC_state_t *pRExC_state, U32 flags, const char *type);
+#define PERL_ARGS_ASSERT_REG_LA_NOTHING	\
+	assert(pRExC_state); assert(type)
+STATIC regnode_offset	S_reg_la_OPFAIL(pTHX_ RExC_state_t *pRExC_state, U32 flags, const char *type);
+#define PERL_ARGS_ASSERT_REG_LA_OPFAIL	\
+	assert(pRExC_state); assert(type)
+STATIC regnode_offset	S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
+#define PERL_ARGS_ASSERT_REG_NODE	\
+	assert(pRExC_state)
+STATIC SV *	S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags);
+#define PERL_ARGS_ASSERT_REG_SCAN_NAME	\
+	assert(pRExC_state)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE char *	S_reg_skipcomment(RExC_state_t *pRExC_state, char * p);
+#define PERL_ARGS_ASSERT_REG_SKIPCOMMENT	\
+	assert(pRExC_state); assert(p)
+#endif
+STATIC regnode_offset	S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg);
+#define PERL_ARGS_ASSERT_REGANODE	\
+	assert(pRExC_state)
+STATIC regnode_offset	S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
+#define PERL_ARGS_ASSERT_REGATOM	\
+	assert(pRExC_state); assert(flagp)
+STATIC regnode_offset	S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth);
+#define PERL_ARGS_ASSERT_REGBRANCH	\
+	assert(pRExC_state); assert(flagp)
+STATIC regnode_offset	S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist);
+#define PERL_ARGS_ASSERT_REGCLASS	\
+	assert(pRExC_state); assert(flagp)
+STATIC unsigned int	S_regex_set_precedence(const U8 my_operator)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGEX_SET_PRECEDENCE
+
+STATIC void	S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, const regnode_offset operand, const U32 depth);
+#define PERL_ARGS_ASSERT_REGINSERT	\
+	assert(pRExC_state)
+STATIC regnode_offset	S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_len);
+#define PERL_ARGS_ASSERT_REGNODE_GUTS	\
+	assert(pRExC_state)
+STATIC regnode_offset	S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
+#define PERL_ARGS_ASSERT_REGPIECE	\
+	assert(pRExC_state); assert(flagp)
+STATIC regnode_offset	S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg);
+#define PERL_ARGS_ASSERT_REGPNODE	\
+	assert(pRExC_state); assert(arg)
+STATIC bool	S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGTAIL	\
+	assert(pRExC_state); assert(p); assert(val)
+
+STATIC void	S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx);
+#define PERL_ARGS_ASSERT_SET_REGEX_PV	\
+	assert(pRExC_state); assert(Rx)
+STATIC void	S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state, char ** p, const bool force_to_xmod);
+#define PERL_ARGS_ASSERT_SKIP_TO_BE_IGNORED_TEXT	\
+	assert(pRExC_state); assert(p)
+#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C)
+PERL_CALLCONV const char *	Perl_form_alien_digit_msg(pTHX_ const U8 which, const STRLEN valids_len, const char * const first_bad, const char * const send, const bool UTF, const bool braced)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG	\
+	assert(first_bad); assert(send)
+
+PERL_CALLCONV bool	Perl_grok_bslash_c(pTHX_ const char source, U8 * result, const char** message, U32 * packed_warn)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GROK_BSLASH_C	\
+	assert(result); assert(message)
+
+PERL_CALLCONV bool	Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GROK_BSLASH_O	\
+	assert(s); assert(send); assert(uv); assert(message)
+
+PERL_CALLCONV bool	Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** message, U32 * packed_warn, const bool strict, const bool allow_UV_MAX, const bool utf8)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GROK_BSLASH_X	\
+	assert(s); assert(send); assert(uv); assert(message)
+
+#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
+PERL_CALLCONV const char *	Perl_form_cp_too_large_msg(pTHX_ const U8 which, const char * string, const Size_t len, const UV cp)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG
+
+#endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C)
 PERL_CALLCONV bool	Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 *strend, const UV cp)
 			__attribute__warn_unused_result__
@@ -6887,11 +6812,6 @@ PERL_CALLCONV UV	Perl__to_fold_latin1(const U8 c, U8 *p, STRLEN *lenp, const uns
 #define PERL_ARGS_ASSERT__TO_FOLD_LATIN1	\
 	assert(p); assert(lenp)
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
-PERL_CALLCONV SV*	Perl_invlist_clone(pTHX_ SV* const invlist, SV* newlist);
-#define PERL_ARGS_ASSERT_INVLIST_CLONE	\
-	assert(invlist)
-#endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
 PERL_CALLCONV bool	Perl_regcurly(const char *s, const char *e, const char * result[5])
 			__attribute__warn_unused_result__;
@@ -6899,6 +6819,89 @@ PERL_CALLCONV bool	Perl_regcurly(const char *s, const char *e, const char * resu
 	assert(s); assert(e)
 
 #endif
+#if defined(PERL_IN_REGCOMP_DEBUG_C) && defined(DEBUGGING)
+STATIC U8	S_put_charclass_bitmap_innards(pTHX_ SV* sv, char* bitmap, SV* nonbitmap_invlist, SV* only_utf8_locale_invlist, const regnode * const node, const U8 flags, const bool force_as_is_display);
+#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS	\
+	assert(sv)
+STATIC SV*	S_put_charclass_bitmap_innards_common(pTHX_ SV* invlist, SV* posixes, SV* only_utf8, SV* not_utf8, SV* only_utf8_locale, const bool invert);
+#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON	\
+	assert(invlist)
+STATIC void	S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist);
+#define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST	\
+	assert(sv); assert(invlist)
+STATIC void	S_put_code_point(pTHX_ SV* sv, UV c);
+#define PERL_ARGS_ASSERT_PUT_CODE_POINT	\
+	assert(sv)
+STATIC void	S_put_range(pTHX_ SV* sv, UV start, const UV end, const bool allow_literals);
+#define PERL_ARGS_ASSERT_PUT_RANGE	\
+	assert(sv)
+STATIC void	S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
+#define PERL_ARGS_ASSERT_REGDUMP_EXTFLAGS
+STATIC void	S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
+#define PERL_ARGS_ASSERT_REGDUMP_INTFLAGS
+#endif
+#if defined(PERL_IN_REGCOMP_INVLIST_C) || defined(PERL_IN_REGCOMP_C)
+PERL_CALLCONV void	Perl_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_POPULATE_BITMAP_FROM_INVLIST	\
+	assert(invlist); assert(bitmap)
+
+PERL_CALLCONV void	Perl_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_len, SV ** invlist, const UV offset)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_POPULATE_INVLIST_FROM_BITMAP	\
+	assert(bitmap); assert(invlist)
+
+#endif
+#if defined(PERL_IN_REGCOMP_STUDY_C)
+STATIC SV*	S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, const regnode_charclass* const node);
+#define PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC	\
+	assert(pRExC_state); assert(node)
+STATIC SV*	S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST	\
+	assert(pRExC_state); assert(node)
+
+STATIC void	S_rck_elide_nothing(pTHX_ regnode *node);
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING	\
+	assert(node)
+STATIC void	S_ssc_add_range(pTHX_ regnode_ssc *ssc, UV const start, UV const end);
+#define PERL_ARGS_ASSERT_SSC_ADD_RANGE	\
+	assert(ssc)
+STATIC void	S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_charclass *and_with);
+#define PERL_ARGS_ASSERT_SSC_AND	\
+	assert(pRExC_state); assert(ssc); assert(and_with)
+STATIC void	S_ssc_anything(pTHX_ regnode_ssc *ssc);
+#define PERL_ARGS_ASSERT_SSC_ANYTHING	\
+	assert(ssc)
+STATIC void	S_ssc_clear_locale(regnode_ssc *ssc);
+#define PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE	\
+	assert(ssc)
+STATIC void	S_ssc_cp_and(pTHX_ regnode_ssc *ssc, UV const cp);
+#define PERL_ARGS_ASSERT_SSC_CP_AND	\
+	assert(ssc)
+STATIC void	S_ssc_intersection(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
+#define PERL_ARGS_ASSERT_SSC_INTERSECTION	\
+	assert(ssc); assert(invlist)
+STATIC int	S_ssc_is_anything(const regnode_ssc *ssc)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_SSC_IS_ANYTHING	\
+	assert(ssc)
+
+STATIC int	S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state, const regnode_ssc *ssc)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT	\
+	assert(pRExC_state); assert(ssc)
+
+STATIC void	S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_charclass *or_with);
+#define PERL_ARGS_ASSERT_SSC_OR	\
+	assert(pRExC_state); assert(ssc); assert(or_with)
+STATIC void	S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
+#define PERL_ARGS_ASSERT_SSC_UNION	\
+	assert(ssc); assert(invlist)
+STATIC void	S_unwind_scan_frames(pTHX_ const void *p);
+#define PERL_ARGS_ASSERT_UNWIND_SCAN_FRAMES	\
+	assert(p)
+#endif
 #if defined(PERL_IN_REGEXEC_C)
 STATIC LB_enum	S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
 			__attribute__warn_unused_result__;
@@ -7045,6 +7048,64 @@ STATIC void	S_to_utf8_substr(pTHX_ regexp * prog);
 #define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR	\
 	assert(prog)
 #endif
+#if defined(PERL_IN_REGEX_ENGINE)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE bool	Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE regnode*	Perl_regnext(pTHX_ const regnode* p)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGNEXT
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE regnode*	Perl_regnode_after(pTHX_ const regnode* p, bool varies)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGNODE_AFTER
+#endif
+
+#endif
+#if defined(PERL_IN_REGEX_ENGINE) && defined(DEBUGGING)
+PERL_CALLCONV void	Perl_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state, regnode *scan, U32 depth, U32 flags)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_DEBUG_PEEP	\
+	assert(str); assert(pRExC_state)
+
+PERL_CALLCONV void	Perl_debug_show_study_flags(pTHX_ U32 flags, const char *open_str, const char *close_str)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_DEBUG_SHOW_STUDY_FLAGS	\
+	assert(open_str); assert(close_str)
+
+PERL_CALLCONV void	Perl_debug_studydata(pTHX_ const char *where, scan_data_t *data, U32 depth, int is_inf, SSize_t min, SSize_t stopmin, SSize_t delta)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_DEBUG_STUDYDATA	\
+	assert(where)
+
+PERL_CALLCONV const regnode*	Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const regnode *last, const regnode *plast, SV* sv, I32 indent, U32 depth)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_DUMPUNTIL	\
+	assert(r); assert(start); assert(node); assert(sv)
+
+PERL_CALLCONV int	Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_RE_INDENTF	\
+	assert(fmt)
+
+PERL_CALLCONV int	Perl_re_printf(pTHX_ const char *fmt, ...)
+			__attribute__visibility__("hidden")
+			__attribute__format__(__printf__,pTHX_1,pTHX_2);
+#define PERL_ARGS_ASSERT_RE_PRINTF	\
+	assert(fmt)
+
+PERL_CALLCONV void	Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
+			__attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_REGPROP	\
+	assert(sv); assert(o)
+
+#endif
 #if defined(PERL_IN_SCOPE_C)
 STATIC void	S_save_pushptri32ptr(pTHX_ void *const ptr1, const I32 i, void *const ptr2, const int type);
 #define PERL_ARGS_ASSERT_SAVE_PUSHPTRI32PTR
diff --git a/regcomp.c b/regcomp.c
index de3335ae25..d88109c78b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -31,29 +31,25 @@
  * with the POSIX routines of the same names.
 */
 
-#ifdef PERL_EXT_RE_BUILD
-#include "re_top.h"
-#endif
-
 /*
  * pregcomp and pregexec -- regsub and regerror are not used in perl
  *
- *      Copyright (c) 1986 by University of Toronto.
- *      Written by Henry Spencer.  Not derived from licensed software.
+ *	Copyright (c) 1986 by University of Toronto.
+ *	Written by Henry Spencer.  Not derived from licensed software.
  *
- *      Permission is granted to anyone to use this software for any
- *      purpose on any computer system, and to redistribute it freely,
- *      subject to the following restrictions:
+ *	Permission is granted to anyone to use this software for any
+ *	purpose on any computer system, and to redistribute it freely,
+ *	subject to the following restrictions:
  *
- *      1. The author is not responsible for the consequences of use of
- *              this software, no matter how awful, even if they arise
- *              from defects in it.
+ *	1. The author is not responsible for the consequences of use of
+ *		this software, no matter how awful, even if they arise
+ *		from defects in it.
  *
- *      2. The origin of this software must not be misrepresented, either
- *              by explicit claim or by omission.
+ *	2. The origin of this software must not be misrepresented, either
+ *		by explicit claim or by omission.
  *
- *      3. Altered versions must be plainly marked as such, and must not
- *              be misrepresented as being the original software.
+ *	3. Altered versions must be plainly marked as such, and must not
+ *		be misrepresented as being the original software.
  *
  *
  ****    Alterations to Henry's code are...
@@ -127,11 +123,16 @@
  * access data that we don't want to duplicate.
  */
 
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
 #include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
 #define PERL_IN_REGCOMP_C
 #include "perl.h"
 
-#define REG_COMP_C
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
 EXTERN_C const struct regexp_engine my_reg_engine;
@@ -142,1349 +143,7 @@ EXTERN_C const struct regexp_engine wild_reg_engine;
 
 #include "invlist_inline.h"
 #include "unicode_constants.h"
-
-#ifndef STATIC
-#define STATIC  static
-#endif
-
-/* this is a chain of data about sub patterns we are processing that
-   need to be handled separately/specially in study_chunk. Its so
-   we can simulate recursion without losing state.  */
-struct scan_frame;
-typedef struct scan_frame {
-    regnode *last_regnode;      /* last node to process in this frame */
-    regnode *next_regnode;      /* next node to process when last is reached */
-    U32 prev_recursed_depth;
-    I32 stopparen;              /* what stopparen do we use */
-    bool in_gosub;              /* this or an outer frame is for GOSUB */
-
-    struct scan_frame *this_prev_frame; /* this previous frame */
-    struct scan_frame *prev_frame;      /* previous frame */
-    struct scan_frame *next_frame;      /* next frame */
-} scan_frame;
-
-/* Certain characters are output as a sequence with the first being a
- * backslash. */
-#define isBACKSLASHED_PUNCT(c)  memCHRs("-[]\\^", c)
-
-
-struct RExC_state_t {
-    U32         flags;                  /* RXf_* are we folding, multilining? */
-    U32         pm_flags;               /* PMf_* stuff from the calling PMOP */
-    char        *precomp;               /* uncompiled string. */
-    char        *precomp_end;           /* pointer to end of uncompiled string. */
-    REGEXP      *rx_sv;                 /* The SV that is the regexp. */
-    regexp      *rx;                    /* perl core regexp structure */
-    regexp_internal     *rxi;           /* internal data for regexp object
-                                           pprivate field */
-    char        *start;                 /* Start of input for compile */
-    char        *end;                   /* End of input for compile */
-    char        *parse;                 /* Input-scan pointer. */
-    char        *copy_start;            /* start of copy of input within
-                                           constructed parse string */
-    char        *save_copy_start;       /* Provides one level of saving
-                                           and restoring 'copy_start' */
-    char        *copy_start_in_input;   /* Position in input string
-                                           corresponding to copy_start */
-    SSize_t     whilem_seen;            /* number of WHILEM in this expr */
-    regnode     *emit_start;            /* Start of emitted-code area */
-    regnode_offset emit;                /* Code-emit pointer */
-    I32         naughty;                /* How bad is this pattern? */
-    I32         sawback;                /* Did we see \1, ...? */
-    SSize_t     size;                   /* Number of regnode equivalents in
-                                           pattern */
-    Size_t      sets_depth;              /* Counts recursion depth of already-
-                                           compiled regex set patterns */
-    U32         seen;
-
-    I32      parens_buf_size;           /* #slots malloced open/close_parens */
-    regnode_offset *open_parens;        /* offsets to open parens */
-    regnode_offset *close_parens;       /* offsets to close parens */
-    HV          *paren_names;           /* Paren names */
-
-    /* position beyond 'precomp' of the warning message furthest away from
-     * 'precomp'.  During the parse, no warnings are raised for any problems
-     * earlier in the parse than this position.  This works if warnings are
-     * raised the first time a given spot is parsed, and if only one
-     * independent warning is raised for any given spot */
-    Size_t      latest_warn_offset;
-
-    I32         npar;                   /* Capture buffer count so far in the
-                                           parse, (OPEN) plus one. ("par" 0 is
-                                           the whole pattern)*/
-    I32         total_par;              /* During initial parse, is either 0,
-                                           or -1; the latter indicating a
-                                           reparse is needed.  After that pass,
-                                           it is what 'npar' became after the
-                                           pass.  Hence, it being > 0 indicates
-                                           we are in a reparse situation */
-    I32         nestroot;               /* root parens we are in - used by
-                                           accept */
-    I32         seen_zerolen;
-    regnode     *end_op;                /* END node in program */
-    I32         utf8;           /* whether the pattern is utf8 or not */
-    I32         orig_utf8;      /* whether the pattern was originally in utf8 */
-                                /* XXX use this for future optimisation of case
-                                 * where pattern must be upgraded to utf8. */
-    I32         uni_semantics;  /* If a d charset modifier should use unicode
-                                   rules, even if the pattern is not in
-                                   utf8 */
-
-    I32         recurse_count;          /* Number of recurse regops we have generated */
-    regnode     **recurse;              /* Recurse regops */
-    U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
-                                           through */
-    U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
-    I32         in_lookaround;
-    I32         contains_locale;
-    I32         override_recoding;
-    I32         recode_x_to_native;
-    I32         in_multi_char_class;
-    int         code_index;             /* next code_blocks[] slot */
-    struct reg_code_blocks *code_blocks;/* positions of literal (?{})
-                                            within pattern */
-    SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
-    scan_frame *frame_head;
-    scan_frame *frame_last;
-    U32         frame_count;
-    AV         *warn_text;
-    HV         *unlexed_names;
-    SV          *runtime_code_qr;       /* qr with the runtime code blocks */
-#ifdef DEBUGGING
-    const char  *lastparse;
-    I32         lastnum;
-    U32         study_chunk_recursed_count;
-    AV          *paren_name_list;       /* idx -> name */
-    SV          *mysv1;
-    SV          *mysv2;
-
-#define RExC_lastparse  (pRExC_state->lastparse)
-#define RExC_lastnum    (pRExC_state->lastnum)
-#define RExC_paren_name_list    (pRExC_state->paren_name_list)
-#define RExC_study_chunk_recursed_count    (pRExC_state->study_chunk_recursed_count)
-#define RExC_mysv       (pRExC_state->mysv1)
-#define RExC_mysv1      (pRExC_state->mysv1)
-#define RExC_mysv2      (pRExC_state->mysv2)
-
-#endif
-    bool        seen_d_op;
-    bool        strict;
-    bool        study_started;
-    bool        in_script_run;
-    bool        use_BRANCHJ;
-    bool        sWARN_EXPERIMENTAL__VLB;
-    bool        sWARN_EXPERIMENTAL__REGEX_SETS;
-};
-
-#define RExC_flags      (pRExC_state->flags)
-#define RExC_pm_flags   (pRExC_state->pm_flags)
-#define RExC_precomp    (pRExC_state->precomp)
-#define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
-#define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
-#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
-#define RExC_precomp_end (pRExC_state->precomp_end)
-#define RExC_rx_sv      (pRExC_state->rx_sv)
-#define RExC_rx         (pRExC_state->rx)
-#define RExC_rxi        (pRExC_state->rxi)
-#define RExC_start      (pRExC_state->start)
-#define RExC_end        (pRExC_state->end)
-#define RExC_parse      (pRExC_state->parse)
-#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
-#define RExC_whilem_seen        (pRExC_state->whilem_seen)
-#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
-                                                   under /d from /u ? */
-
-#define RExC_emit       (pRExC_state->emit)
-#define RExC_emit_start (pRExC_state->emit_start)
-#define RExC_sawback    (pRExC_state->sawback)
-#define RExC_seen       (pRExC_state->seen)
-#define RExC_size       (pRExC_state->size)
-#define RExC_maxlen        (pRExC_state->maxlen)
-#define RExC_npar       (pRExC_state->npar)
-#define RExC_total_parens       (pRExC_state->total_par)
-#define RExC_parens_buf_size    (pRExC_state->parens_buf_size)
-#define RExC_nestroot   (pRExC_state->nestroot)
-#define RExC_seen_zerolen       (pRExC_state->seen_zerolen)
-#define RExC_utf8       (pRExC_state->utf8)
-#define RExC_uni_semantics      (pRExC_state->uni_semantics)
-#define RExC_orig_utf8  (pRExC_state->orig_utf8)
-#define RExC_open_parens        (pRExC_state->open_parens)
-#define RExC_close_parens       (pRExC_state->close_parens)
-#define RExC_end_op     (pRExC_state->end_op)
-#define RExC_paren_names        (pRExC_state->paren_names)
-#define RExC_recurse    (pRExC_state->recurse)
-#define RExC_recurse_count      (pRExC_state->recurse_count)
-#define RExC_sets_depth         (pRExC_state->sets_depth)
-#define RExC_study_chunk_recursed        (pRExC_state->study_chunk_recursed)
-#define RExC_study_chunk_recursed_bytes  \
-                                   (pRExC_state->study_chunk_recursed_bytes)
-#define RExC_in_lookaround      (pRExC_state->in_lookaround)
-#define RExC_contains_locale    (pRExC_state->contains_locale)
-#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
-
-#ifdef EBCDIC
-#  define SET_recode_x_to_native(x)                                         \
-                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
-#else
-#  define SET_recode_x_to_native(x) NOOP
-#endif
-
-#define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
-#define RExC_frame_head (pRExC_state->frame_head)
-#define RExC_frame_last (pRExC_state->frame_last)
-#define RExC_frame_count (pRExC_state->frame_count)
-#define RExC_strict (pRExC_state->strict)
-#define RExC_study_started      (pRExC_state->study_started)
-#define RExC_warn_text (pRExC_state->warn_text)
-#define RExC_in_script_run      (pRExC_state->in_script_run)
-#define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
-#define RExC_warned_WARN_EXPERIMENTAL__VLB (pRExC_state->sWARN_EXPERIMENTAL__VLB)
-#define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
-#define RExC_unlexed_names (pRExC_state->unlexed_names)
-
-
-/***********************************************************************/
-/* UTILITY MACROS FOR ADVANCING OR SETTING THE PARSE "CURSOR" RExC_parse
- *
- * All of these macros depend on the above RExC_ accessor macros, which
- * in turns depend on a variable pRExC_state being in scope where they
- * are used. This is the standard regexp parser context variable which is
- * passed into every non-trivial parse function in this file.
- *
- * Note that the UTF macro is itself a wrapper around RExC_utf8, so all
- * of the macros which do not take an argument will operate on the
- * pRExC_state structure *only*.
- *
- * Please do NOT modify RExC_parse without using these macros. In the
- * future these macros will be extended for enhanced debugging and trace
- * output during the parse process.
- */
-
-/* RExC_parse_incf(flag)
- *
- * Increment RExC_parse to point at the next codepoint, while doing
- * the right thing depending on whether we are parsing UTF-8 strings
- * or not. The 'flag' argument determines if content is UTF-8 or not,
- * intended for cases where this is NOT governed by the UTF macro.
- *
- * Use RExC_parse_inc() if UTF-8ness is controlled by the UTF macro.
- *
- * WARNING: Does NOT take into account RExC_end; it is the callers
- * responsibility to make sure there are enough octets left in
- * RExC_parse to ensure that when processing UTF-8 we would not read
- * past the end of the string.
- */
-#define RExC_parse_incf(flag) STMT_START {              \
-    RExC_parse += (flag) ? UTF8SKIP(RExC_parse) : 1;    \
-} STMT_END
-
-/* RExC_parse_inc_safef(flag)
- *
- * Safely increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not and NOT reading past the end of the buffer.
- * The 'flag' argument determines if content is UTF-8 or not,
- * intended for cases where this is NOT governed by the UTF macro.
- *
- * Use RExC_parse_safe() if UTF-8ness is controlled by the UTF macro.
- *
- * NOTE: Will NOT read past RExC_end when content is UTF-8.
- */
-#define RExC_parse_inc_safef(flag) STMT_START {                     \
-    RExC_parse += (flag) ? UTF8_SAFE_SKIP(RExC_parse,RExC_end) : 1; \
-} STMT_END
-
-/* RExC_parse_inc()
- *
- * Increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not.
- *
- * WARNING: Does NOT take into account RExC_end, it is the callers
- * responsibility to make sure there are enough octets left in
- * RExC_parse to ensure that when processing UTF-8 we would not read
- * past the end of the string.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc() RExC_parse_incf(UTF)
-
-/* RExC_parse_inc_safe()
- *
- * Safely increment RExC_parse to point at the next codepoint,
- * doing the right thing depending on whether we are parsing
- * UTF-8 strings or not and NOT reading past the end of the buffer.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc_safe() RExC_parse_inc_safef(UTF)
-
-/* RExC_parse_inc_utf8()
- *
- * Increment RExC_parse to point at the next utf8 codepoint,
- * assumes content is UTF-8.
- *
- * WARNING: Does NOT take into account RExC_end; it is the callers
- * responsibility to make sure there are enough octets left in RExC_parse
- * to ensure that when processing UTF-8 we would not read past the end
- * of the string.
- */
-#define RExC_parse_inc_utf8() STMT_START {  \
-    RExC_parse += UTF8SKIP(RExC_parse);     \
-} STMT_END
-
-/* RExC_parse_inc_if_char()
- *
- * Increment RExC_parse to point at the next codepoint, if and only
- * if the current parse point is NOT a NULL, while doing the right thing
- * depending on whether we are parsing UTF-8 strings or not.
- *
- * WARNING: Does NOT take into account RExC_end, it is the callers
- * responsibility to make sure there are enough octets left in RExC_parse
- * to ensure that when processing UTF-8 we would not read past the end
- * of the string.
- *
- * NOTE: whether we are parsing UTF-8 or not is determined by the
- * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
- * macro operates on the pRExC_state structure only.
- */
-#define RExC_parse_inc_if_char() STMT_START {         \
-    RExC_parse += SKIP_IF_CHAR(RExC_parse,RExC_end);  \
-} STMT_END
-
-/* RExC_parse_inc_by(n_octets)
- *
- * Increment the parse cursor by the number of octets specified by
- * the 'n_octets' argument.
- *
- * NOTE: Does NOT check ANY constraints. It is the callers responsibility
- * that this will not move past the end of the string, or leave the
- * pointer in the middle of a UTF-8 sequence.
- *
- * Typically used to advanced past previously analyzed content.
- */
-#define RExC_parse_inc_by(n_octets) STMT_START {  \
-    RExC_parse += (n_octets);                     \
-} STMT_END
-
-/* RExC_parse_set(to_ptr)
- *
- * Sets the RExC_parse pointer to the pointer specified by the 'to'
- * argument. No validation whatsoever is performed on the to pointer.
- */
-#define RExC_parse_set(to_ptr) STMT_START { \
-    RExC_parse = (to_ptr);                  \
-} STMT_END
-
-/**********************************************************************/
-
-/* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
- * a flag to disable back-off on the fixed/floating substrings - if it's
- * a high complexity pattern we assume the benefit of avoiding a full match
- * is worth the cost of checking for the substrings even if they rarely help.
- */
-#define RExC_naughty    (pRExC_state->naughty)
-#define TOO_NAUGHTY (10)
-#define MARK_NAUGHTY(add) \
-    if (RExC_naughty < TOO_NAUGHTY) \
-        RExC_naughty += (add)
-#define MARK_NAUGHTY_EXP(exp, add) \
-    if (RExC_naughty < TOO_NAUGHTY) \
-        RExC_naughty += RExC_naughty / (exp) + (add)
-
-#define isNON_BRACE_QUANTIFIER(c)   ((c) == '*' || (c) == '+' || (c) == '?')
-#define isQUANTIFIER(s,e)  (   isNON_BRACE_QUANTIFIER(*s)                      \
-                            || ((*s) == '{' && regcurly(s, e, NULL)))
-
-/*
- * Flags to be passed up.
- */
-#define HASWIDTH        0x01    /* Known to not match null strings, could match
-                                   non-null ones. */
-#define SIMPLE          0x02    /* Exactly one character wide */
-                                /* (or LNBREAK as a special case) */
-#define POSTPONED       0x08    /* (?1),(?&name), (??{...}) or similar */
-#define TRYAGAIN        0x10    /* Weeded out a declaration. */
-#define RESTART_PARSE   0x20    /* Need to redo the parse */
-#define NEED_UTF8       0x40    /* In conjunction with RESTART_PARSE, need to
-                                   calcuate sizes as UTF-8 */
-
-#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
-
-/* whether trie related optimizations are enabled */
-#if PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION
-#define TRIE_STUDY_OPT
-#define FULL_TRIE_STUDY
-#define TRIE_STCLASS
-#endif
-
-/* About the term "restudy" and the var "restudied" and the defines
- * "SCF_TRIE_RESTUDY" and "SCF_TRIE_DOING_RESTUDY": All of these relate to
- * doing multiple study_chunk() calls over the same set of opcodes for* the
- * purpose of enhanced TRIE optimizations.
- *
- * Specifically, when TRIE_STUDY_OPT is defined, and it is defined in normal
- * builds, (see above), during compilation SCF_TRIE_RESTUDY may be enabled
- * which then causes the Perl_re_op_compile() to then call the optimizer
- * S_study_chunk() a second time to perform additional optimizations,
- * including the aho_corasick startclass optimization.
- * This additional pass will only happen once, which is managed by the
- * 'restudied' variable in Perl_re_op_compile().
- *
- * When this second pass is under way the flags passed into study_chunk() will
- * include SCF_TRIE_DOING_RESTUDY and this flag is and must be cascaded down
- * to any recursive calls to S_study_chunk().
- *
- * IMPORTANT: Any logic in study_chunk() that emits warnings should check that
- * the SCF_TRIE_DOING_RESTUDY flag is NOT set in 'flags', or the warning may
- * be produced twice.
- *
- * See commit 07be1b83a6b2d24b492356181ddf70e1c7917ae3 and
- * 688e03912e3bff2d2419c457d8b0e1bab3eb7112 for more details.
- */
-
-
-#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
-#define PBITVAL(paren) (1 << ((paren) & 7))
-#define PAREN_OFFSET(depth) \
-    (RExC_study_chunk_recursed + (depth) * RExC_study_chunk_recursed_bytes)
-#define PAREN_TEST(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) & PBITVAL(paren))
-#define PAREN_SET(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) |= PBITVAL(paren))
-#define PAREN_UNSET(depth, paren) \
-    (PBYTE(PAREN_OFFSET(depth), paren) &= ~PBITVAL(paren))
-
-#define REQUIRE_UTF8(flagp) STMT_START {                                   \
-                                     if (!UTF) {                           \
-                                         *flagp = RESTART_PARSE|NEED_UTF8; \
-                                         return 0;                         \
-                                     }                                     \
-                             } STMT_END
-
-/* /u is to be chosen if we are supposed to use Unicode rules, or if the
- * pattern is in UTF-8.  This latter condition is in case the outermost rules
- * are locale.  See GH #17278 */
-#define toUSE_UNI_CHARSET_NOT_DEPENDS (RExC_uni_semantics || UTF)
-
-/* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
- * a flag that indicates we need to override /d with /u as a result of
- * something in the pattern.  It should only be used in regards to calling
- * set_regex_charset() or get_regex_charset() */
-#define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
-    STMT_START {                                                            \
-            if (DEPENDS_SEMANTICS) {                                        \
-                set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
-                RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
-                    /* No need to restart the parse if we haven't seen      \
-                     * anything that differs between /u and /d, and no need \
-                     * to restart immediately if we're going to reparse     \
-                     * anyway to count parens */                            \
-                    *flagp |= RESTART_PARSE;                                \
-                    return restart_retval;                                  \
-                }                                                           \
-            }                                                               \
-    } STMT_END
-
-#define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
-    STMT_START {                                                            \
-                RExC_use_BRANCHJ = 1;                                       \
-                *flagp |= RESTART_PARSE;                                    \
-                return restart_retval;                                      \
-    } STMT_END
-
-/* Until we have completed the parse, we leave RExC_total_parens at 0 or
- * less.  After that, it must always be positive, because the whole re is
- * considered to be surrounded by virtual parens.  Setting it to negative
- * indicates there is some construct that needs to know the actual number of
- * parens to be properly handled.  And that means an extra pass will be
- * required after we've counted them all */
-#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
-#define REQUIRE_PARENS_PASS                                                 \
-    STMT_START {  /* No-op if have completed a pass */                      \
-                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
-    } STMT_END
-#define IN_PARENS_PASS (RExC_total_parens < 0)
-
-
-/* This is used to return failure (zero) early from the calling function if
- * various flags in 'flags' are set.  Two flags always cause a return:
- * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
- * additional flags that should cause a return; 0 if none.  If the return will
- * be done, '*flagp' is first set to be all of the flags that caused the
- * return. */
-#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
-    STMT_START {                                                            \
-            if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
-                *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
-                return 0;                                                   \
-            }                                                               \
-    } STMT_END
-
-#define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
-
-#define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
-                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
-#define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
-                                    if (MUST_RESTART(*(flagp))) return 0
-
-/* This converts the named class defined in regcomp.h to its equivalent class
- * number defined in handy.h. */
-#define namedclass_to_classnum(class)  ((int) ((class) / 2))
-#define classnum_to_namedclass(classnum)  ((classnum) * 2)
-
-#define _invlist_union_complement_2nd(a, b, output) \
-                        _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
-#define _invlist_intersection_complement_2nd(a, b, output) \
-                 _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
-
-/* We add a marker if we are deferring expansion of a property that is both
- * 1) potentiallly user-defined; and
- * 2) could also be an official Unicode property.
- *
- * Without this marker, any deferred expansion can only be for a user-defined
- * one.  This marker shouldn't conflict with any that could be in a legal name,
- * and is appended to its name to indicate this.  There is a string and
- * character form */
-#define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
-#define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
-
-/* What is infinity for optimization purposes */
-#define OPTIMIZE_INFTY  SSize_t_MAX
-
-/* About scan_data_t.
-
-  During optimisation we recurse through the regexp program performing
-  various inplace (keyhole style) optimisations. In addition study_chunk
-  and scan_commit populate this data structure with information about
-  what strings MUST appear in the pattern. We look for the longest
-  string that must appear at a fixed location, and we look for the
-  longest string that may appear at a floating location. So for instance
-  in the pattern:
-
-    /FOO[xX]A.*B[xX]BAR/
-
-  Both 'FOO' and 'A' are fixed strings. Both 'B' and 'BAR' are floating
-  strings (because they follow a .* construct). study_chunk will identify
-  both FOO and BAR as being the longest fixed and floating strings respectively.
-
-  The strings can be composites, for instance
-
-     /(f)(o)(o)/
-
-  will result in a composite fixed substring 'foo'.
-
-  For each string some basic information is maintained:
-
-  - min_offset
-    This is the position the string must appear at, or not before.
-    It also implicitly (when combined with minlenp) tells us how many
-    characters must match before the string we are searching for.
-    Likewise when combined with minlenp and the length of the string it
-    tells us how many characters must appear after the string we have
-    found.
-
-  - max_offset
-    Only used for floating strings. This is the rightmost point that
-    the string can appear at. If set to OPTIMIZE_INFTY it indicates that the
-    string can occur infinitely far to the right.
-    For fixed strings, it is equal to min_offset.
-
-  - minlenp
-    A pointer to the minimum number of characters of the pattern that the
-    string was found inside. This is important as in the case of positive
-    lookahead or positive lookbehind we can have multiple patterns
-    involved. Consider
-
-    /(?=FOO).*F/
-
-    The minimum length of the pattern overall is 3, the minimum length
-    of the lookahead part is 3, but the minimum length of the part that
-    will actually match is 1. So 'FOO's minimum length is 3, but the
-    minimum length for the F is 1. This is important as the minimum length
-    is used to determine offsets in front of and behind the string being
-    looked for.  Since strings can be composites this is the length of the
-    pattern at the time it was committed with a scan_commit. Note that
-    the length is calculated by study_chunk, so that the minimum lengths
-    are not known until the full pattern has been compiled, thus the
-    pointer to the value.
-
-  - lookbehind
-
-    In the case of lookbehind the string being searched for can be
-    offset past the start point of the final matching string.
-    If this value was just blithely removed from the min_offset it would
-    invalidate some of the calculations for how many chars must match
-    before or after (as they are derived from min_offset and minlen and
-    the length of the string being searched for).
-    When the final pattern is compiled and the data is moved from the
-    scan_data_t structure into the regexp structure the information
-    about lookbehind is factored in, with the information that would
-    have been lost precalculated in the end_shift field for the
-    associated string.
-
-  The fields pos_min and pos_delta are used to store the minimum offset
-  and the delta to the maximum offset at the current point in the pattern.
-
-*/
-
-struct scan_data_substrs {
-    SV      *str;       /* longest substring found in pattern */
-    SSize_t min_offset; /* earliest point in string it can appear */
-    SSize_t max_offset; /* latest point in string it can appear */
-    SSize_t *minlenp;   /* pointer to the minlen relevant to the string */
-    SSize_t lookbehind; /* is the pos of the string modified by LB */
-    I32 flags;          /* per substring SF_* and SCF_* flags */
-};
-
-typedef struct scan_data_t {
-    /*I32 len_min;      unused */
-    /*I32 len_delta;    unused */
-    SSize_t pos_min;
-    SSize_t pos_delta;
-    SV *last_found;
-    SSize_t last_end;       /* min value, <0 unless valid. */
-    SSize_t last_start_min;
-    SSize_t last_start_max;
-    U8      cur_is_floating; /* whether the last_* values should be set as
-                              * the next fixed (0) or floating (1)
-                              * substring */
-
-    /* [0] is longest fixed substring so far, [1] is longest float so far */
-    struct scan_data_substrs  substrs[2];
-
-    I32 flags;             /* common SF_* and SCF_* flags */
-    I32 whilem_c;
-    SSize_t *last_closep;
-    regnode **last_close_opp; /* pointer to pointer to last CLOSE regop
-                                 seen. DO NOT DEREFERENCE the regnode
-                                 pointer - the op may have been optimized
-                                 away */
-    regnode_ssc *start_class;
-} scan_data_t;
-
-/*
- * Forward declarations for pregcomp()'s friends.
- */
-
-static const scan_data_t zero_scan_data = {
-    0, 0, NULL, 0, 0, 0, 0,
-    {
-        { NULL, 0, 0, 0, 0, 0 },
-        { NULL, 0, 0, 0, 0, 0 },
-    },
-    0, 0, NULL, NULL, NULL
-};
-
-/* study flags */
-
-#define SF_BEFORE_SEOL          0x0001
-#define SF_BEFORE_MEOL          0x0002
-#define SF_BEFORE_EOL           (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
-
-#define SF_IS_INF               0x0040
-#define SF_HAS_PAR              0x0080
-#define SF_IN_PAR               0x0100
-#define SF_HAS_EVAL             0x0200
-
-
-/* SCF_DO_SUBSTR is the flag that tells the regexp analyzer to track the
- * longest substring in the pattern. When it is not set the optimiser keeps
- * track of position, but does not keep track of the actual strings seen,
- *
- * So for instance /foo/ will be parsed with SCF_DO_SUBSTR being true, but
- * /foo/i will not.
- *
- * Similarly, /foo.*(blah|erm|huh).*fnorble/ will have "foo" and "fnorble"
- * parsed with SCF_DO_SUBSTR on, but while processing the (...) it will be
- * turned off because of the alternation (BRANCH). */
-#define SCF_DO_SUBSTR           0x0400
-
-#define SCF_DO_STCLASS_AND      0x0800
-#define SCF_DO_STCLASS_OR       0x1000
-#define SCF_DO_STCLASS          (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
-#define SCF_WHILEM_VISITED_POS  0x2000
-
-#define SCF_TRIE_RESTUDY        0x4000 /* Need to do restudy in study_chunk()?
-                                          Search for "restudy" in this file
-                                          to find a detailed explanation.*/
-#define SCF_SEEN_ACCEPT         0x8000
-#define SCF_TRIE_DOING_RESTUDY 0x10000 /* Are we in restudy right now?
-                                          Search for "restudy" in this file
-                                          to find a detailed explanation. */
-#define SCF_IN_DEFINE          0x20000
-
-
-
-#define UTF cBOOL(RExC_utf8)
-
-/* The enums for all these are ordered so things work out correctly */
-#define LOC (get_regex_charset(RExC_flags) == REGEX_LOCALE_CHARSET)
-#define DEPENDS_SEMANTICS (get_regex_charset(RExC_flags)                    \
-                                                     == REGEX_DEPENDS_CHARSET)
-#define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
-#define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags)                \
-                                                     >= REGEX_UNICODE_CHARSET)
-#define ASCII_RESTRICTED (get_regex_charset(RExC_flags)                      \
-                                            == REGEX_ASCII_RESTRICTED_CHARSET)
-#define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags)             \
-                                            >= REGEX_ASCII_RESTRICTED_CHARSET)
-#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags)                 \
-                                        == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
-
-#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
-
-/* For programs that want to be strictly Unicode compatible by dying if any
- * attempt is made to match a non-Unicode code point against a Unicode
- * property.  */
-#define ALWAYS_WARN_SUPER  ckDEAD(packWARN(WARN_NON_UNICODE))
-
-#define OOB_NAMEDCLASS          -1
-
-/* There is no code point that is out-of-bounds, so this is problematic.  But
- * its only current use is to initialize a variable that is always set before
- * looked at. */
-#define OOB_UNICODE             0xDEADBEEF
-
-#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-
-
-/* length of regex to show in messages that don't mark a position within */
-#define RegexLengthToShowInErrorMessages 127
-
-/*
- * If MARKER[12] are adjusted, be sure to adjust the constants at the top
- * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
- * op/pragma/warn/regcomp.
- */
-#define MARKER1 "<-- HERE"    /* marker as it appears in the description */
-#define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
-
-#define REPORT_LOCATION " in regex; marked by " MARKER1    \
-                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
-
-/* The code in this file in places uses one level of recursion with parsing
- * rebased to an alternate string constructed by us in memory.  This can take
- * the form of something that is completely different from the input, or
- * something that uses the input as part of the alternate.  In the first case,
- * there should be no possibility of an error, as we are in complete control of
- * the alternate string.  But in the second case we don't completely control
- * the input portion, so there may be errors in that.  Here's an example:
- *      /[abc\x{DF}def]/ui
- * is handled specially because \x{df} folds to a sequence of more than one
- * character: 'ss'.  What is done is to create and parse an alternate string,
- * which looks like this:
- *      /(?:\x{DF}|[abc\x{DF}def])/ui
- * where it uses the input unchanged in the middle of something it constructs,
- * which is a branch for the DF outside the character class, and clustering
- * parens around the whole thing. (It knows enough to skip the DF inside the
- * class while in this substitute parse.) 'abc' and 'def' may have errors that
- * need to be reported.  The general situation looks like this:
- *
- *                                       |<------- identical ------>|
- *              sI                       tI               xI       eI
- * Input:       ---------------------------------------------------------------
- * Constructed:         ---------------------------------------------------
- *                      sC               tC               xC       eC     EC
- *                                       |<------- identical ------>|
- *
- * sI..eI   is the portion of the input pattern we are concerned with here.
- * sC..EC   is the constructed substitute parse string.
- *  sC..tC  is constructed by us
- *  tC..eC  is an exact duplicate of the portion of the input pattern tI..eI.
- *          In the diagram, these are vertically aligned.
- *  eC..EC  is also constructed by us.
- * xC       is the position in the substitute parse string where we found a
- *          problem.
- * xI       is the position in the original pattern corresponding to xC.
- *
- * We want to display a message showing the real input string.  Thus we need to
- * translate from xC to xI.  We know that xC >= tC, since the portion of the
- * string sC..tC has been constructed by us, and so shouldn't have errors.  We
- * get:
- *      xI = tI + (xC - tC)
- *
- * When the substitute parse is constructed, the code needs to set:
- *      RExC_start (sC)
- *      RExC_end (eC)
- *      RExC_copy_start_in_input  (tI)
- *      RExC_copy_start_in_constructed (tC)
- * and restore them when done.
- *
- * During normal processing of the input pattern, both
- * 'RExC_copy_start_in_input' and 'RExC_copy_start_in_constructed' are set to
- * sI, so that xC equals xI.
- */
-
-#define sI              RExC_precomp
-#define eI              RExC_precomp_end
-#define sC              RExC_start
-#define eC              RExC_end
-#define tI              RExC_copy_start_in_input
-#define tC              RExC_copy_start_in_constructed
-#define xI(xC)          (tI + (xC - tC))
-#define xI_offset(xC)   (xI(xC) - sI)
-
-#define REPORT_LOCATION_ARGS(xC)                                            \
-    UTF8fARG(UTF,                                                           \
-             (xI(xC) > eI) /* Don't run off end */                          \
-              ? eI - sI   /* Length before the <--HERE */                   \
-              : ((xI_offset(xC) >= 0)                                       \
-                 ? xI_offset(xC)                                            \
-                 : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
-                                    IVdf " trying to output message for "   \
-                                    " pattern %.*s",                        \
-                                    __FILE__, __LINE__, (IV) xI_offset(xC), \
-                                    ((int) (eC - sC)), sC), 0)),            \
-             sI),         /* The input pattern printed up to the <--HERE */ \
-    UTF8fARG(UTF,                                                           \
-             (xI(xC) > eI) ? 0 : eI - xI(xC), /* Length after <--HERE */    \
-             (xI(xC) > eI) ? eI : xI(xC))     /* pattern after <--HERE */
-
-/* Used to point after bad bytes for an error message, but avoid skipping
- * past a nul byte. */
-#define SKIP_IF_CHAR(s, e) (!*(s) ? 0 : UTF ? UTF8_SAFE_SKIP(s, e) : 1)
-
-/* Set up to clean up after our imminent demise */
-#define PREPARE_TO_DIE                                                      \
-    STMT_START {                                                            \
-        if (RExC_rx_sv)                                                     \
-            SAVEFREESV(RExC_rx_sv);                                         \
-        if (RExC_open_parens)                                               \
-            SAVEFREEPV(RExC_open_parens);                                   \
-        if (RExC_close_parens)                                              \
-            SAVEFREEPV(RExC_close_parens);                                  \
-    } STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
- * arg. Show regex, up to a maximum length. If it's too long, chop and add
- * "...".
- */
-#define _FAIL(code) STMT_START {                                        \
-    const char *ellipses = "";                                          \
-    IV len = RExC_precomp_end - RExC_precomp;                           \
-                                                                        \
-    PREPARE_TO_DIE;                                                     \
-    if (len > RegexLengthToShowInErrorMessages) {                       \
-        /* chop 10 shorter than the max, to ensure meaning of "..." */  \
-        len = RegexLengthToShowInErrorMessages - 10;                    \
-        ellipses = "...";                                               \
-    }                                                                   \
-    code;                                                               \
-} STMT_END
-
-#define FAIL(msg) _FAIL(                            \
-    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",         \
-            msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-#define FAIL2(msg,arg) _FAIL(                       \
-    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
-            arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-#define FAIL3(msg,arg1,arg2) _FAIL(                         \
-    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
-     arg1, arg2, UTF8fARG(UTF, len, RExC_precomp), ellipses))
-
-/*
- * Simple_vFAIL -- like FAIL, but marks the current location in the scan
- */
-#define Simple_vFAIL(m) STMT_START {                                    \
-    Perl_croak(aTHX_ "%s" REPORT_LOCATION,                              \
-            m, REPORT_LOCATION_ARGS(RExC_parse));                       \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
- */
-#define vFAIL(m) STMT_START {                           \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL(m);                                    \
-} STMT_END
-
-/*
- * Like Simple_vFAIL(), but accepts two arguments.
- */
-#define Simple_vFAIL2(m,a1) STMT_START {                        \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,                \
-                      REPORT_LOCATION_ARGS(RExC_parse));        \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
- */
-#define vFAIL2(m,a1) STMT_START {                       \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL2(m, a1);                               \
-} STMT_END
-
-
-/*
- * Like Simple_vFAIL(), but accepts three arguments.
- */
-#define Simple_vFAIL3(m, a1, a2) STMT_START {                   \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,            \
-            REPORT_LOCATION_ARGS(RExC_parse));                  \
-} STMT_END
-
-/*
- * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
- */
-#define vFAIL3(m,a1,a2) STMT_START {                    \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL3(m, a1, a2);                           \
-} STMT_END
-
-/*
- * Like Simple_vFAIL(), but accepts four arguments.
- */
-#define Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3,        \
-            REPORT_LOCATION_ARGS(RExC_parse));                  \
-} STMT_END
-
-#define vFAIL4(m,a1,a2,a3) STMT_START {                 \
-    PREPARE_TO_DIE;                                     \
-    Simple_vFAIL4(m, a1, a2, a3);                       \
-} STMT_END
-
-/* A specialized version of vFAIL2 that works with UTF8f */
-#define vFAIL2utf8f(m, a1) STMT_START {             \
-    PREPARE_TO_DIE;                                 \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
-            REPORT_LOCATION_ARGS(RExC_parse));      \
-} STMT_END
-
-#define vFAIL3utf8f(m, a1, a2) STMT_START {             \
-    PREPARE_TO_DIE;                                     \
-    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
-            REPORT_LOCATION_ARGS(RExC_parse));          \
-} STMT_END
-
-/* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
-    STMT_START {                                                            \
-      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
-      RExC_copy_start_in_constructed = NULL;                                \
-    } STMT_END
-#define RESTORE_WARNINGS                                                    \
-    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
-
-/* Since a warning can be generated multiple times as the input is reparsed, we
- * output it the first time we come to that point in the parse, but suppress it
- * otherwise.  'RExC_copy_start_in_constructed' being NULL is a flag to not
- * generate any warnings */
-#define TO_OUTPUT_WARNINGS(loc)                                         \
-  (   RExC_copy_start_in_constructed                                    \
-   && ((xI(loc)) - RExC_precomp) > (Ptrdiff_t) RExC_latest_warn_offset)
-
-/* After we've emitted a warning, we save the position in the input so we don't
- * output it again */
-#define UPDATE_WARNINGS_LOC(loc)                                        \
-    STMT_START {                                                        \
-        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            RExC_latest_warn_offset = MAX(sI, MIN(eI, xI(loc)))         \
-                                                       - RExC_precomp;  \
-        }                                                               \
-    } STMT_END
-
-/* 'warns' is the output of the packWARNx macro used in 'code' */
-#define _WARN_HELPER(loc, warns, code)                                  \
-    STMT_START {                                                        \
-        if (! RExC_copy_start_in_constructed) {                         \
-            Perl_croak( aTHX_ "panic! %s: %d: Tried to warn when none"  \
-                              " expected at '%s'",                      \
-                              __FILE__, __LINE__, loc);                 \
-        }                                                               \
-        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            if (ckDEAD(warns))                                          \
-                PREPARE_TO_DIE;                                         \
-            code;                                                       \
-            UPDATE_WARNINGS_LOC(loc);                                   \
-        }                                                               \
-    } STMT_END
-
-/* m is not necessarily a "literal string", in this macro */
-#define warn_non_literal_string(loc, packed_warn, m)                    \
-    _WARN_HELPER(loc, packed_warn,                                      \
-                      Perl_warner(aTHX_ packed_warn,                    \
-                                       "%s" REPORT_LOCATION,            \
-                                  m, REPORT_LOCATION_ARGS(loc)))
-#define reg_warn_non_literal_string(loc, m)                             \
-                warn_non_literal_string(loc, packWARN(WARN_REGEXP), m)
-
-#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
-    STMT_START {                                                            \
-                char * format;                                              \
-                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
-                Newx(format, format_size, char);                            \
-                my_strlcpy(format, m, format_size);                         \
-                my_strlcat(format, REPORT_LOCATION, format_size);           \
-                SAVEFREEPV(format);                                         \
-                _WARN_HELPER(loc, packwarn,                                 \
-                      Perl_ck_warner(aTHX_ packwarn,                        \
-                                        format,                             \
-                                        a1, REPORT_LOCATION_ARGS(loc)));    \
-    } STMT_END
-
-#define ckWARNreg(loc,m)                                                \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
-                                          m REPORT_LOCATION,            \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN(loc, m)                                                   \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       REPORT_LOCATION_ARGS(loc)))      \
-
-#define vWARN_dep(loc, m)                                               \
-    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
-                      Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),      \
-                                       m REPORT_LOCATION,               \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNdep(loc,m)                                                \
-    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
-                      Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
-                                            m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNregdep(loc,m)                                                 \
-    _WARN_HELPER(loc, packWARN2(WARN_DEPRECATED, WARN_REGEXP),              \
-                      Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED,     \
-                                                      WARN_REGEXP),         \
-                                             m REPORT_LOCATION,             \
-                                             REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN2reg_d(loc,m, a1)                                             \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),         \
-                                            m REPORT_LOCATION,              \
-                                            a1, REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN2reg(loc, m, a1)                                              \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
-                                          m REPORT_LOCATION,                \
-                                          a1, REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN3(loc, m, a1, a2)                                              \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),              \
-                                       m REPORT_LOCATION,                   \
-                                       a1, a2, REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN3reg(loc, m, a1, a2)                                          \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
-                                          m REPORT_LOCATION,                \
-                                          a1, a2,                           \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN4(loc, m, a1, a2, a3)                                      \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       a1, a2, a3,                      \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARN4reg(loc, m, a1, a2, a3)                                  \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
-                                          m REPORT_LOCATION,            \
-                                          a1, a2, a3,                   \
-                                          REPORT_LOCATION_ARGS(loc)))
-
-#define vWARN5(loc, m, a1, a2, a3, a4)                                  \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
-                                       m REPORT_LOCATION,               \
-                                       a1, a2, a3, a4,                  \
-                                       REPORT_LOCATION_ARGS(loc)))
-
-#define ckWARNexperimental(loc, class, m)                               \
-    STMT_START {                                                        \
-        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
-            RExC_warned_ ## class = 1;                                  \
-            _WARN_HELPER(loc, packWARN(class),                          \
-                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
-                                            m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)));\
-        }                                                               \
-    } STMT_END
-
-#define ckWARNexperimental_with_arg(loc, class, m, arg)                 \
-    STMT_START {                                                        \
-        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
-            RExC_warned_ ## class = 1;                                  \
-            _WARN_HELPER(loc, packWARN(class),                          \
-                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
-                                       m REPORT_LOCATION,               \
-                                       arg, REPORT_LOCATION_ARGS(loc)));\
-        }                                                               \
-    } STMT_END
-
-/* Convert between a pointer to a node and its offset from the beginning of the
- * program */
-#define REGNODE_p(offset)    (RExC_emit_start + (offset))
-#define REGNODE_OFFSET(node) (__ASSERT_((node) >= RExC_emit_start)      \
-                              (SSize_t) ((node) - RExC_emit_start))
-
-#define ProgLen(ri) ri->proglen
-#define SetProgLen(ri,x) ri->proglen = x
-
-#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
-#define EXPERIMENTAL_INPLACESCAN
-#endif /*PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS*/
-
-STATIC void
-S_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len)
-{
-    PERL_ARGS_ASSERT_POPULATE_BITMAP_FROM_INVLIST;
-
-    /* As the name says.  The zeroth bit corresponds to the code point given by
-     * 'offset' */
-
-    UV start, end;
-
-    Zero(bitmap, len, U8);
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        assert(start >= offset);
-
-        for (UV i = start; i <= end; i++) {
-            UV adjusted = i - offset;
-
-            BITMAP_BYTE(bitmap, adjusted) |= BITMAP_BIT(adjusted);
-        }
-    }
-    invlist_iterfinish(invlist);
-}
-
-STATIC void
-S_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_len, SV ** invlist, const UV offset)
-{
-    PERL_ARGS_ASSERT_POPULATE_INVLIST_FROM_BITMAP;
-
-    /* As the name says.  The zeroth bit corresponds to the code point given by
-     * 'offset' */
-
-    Size_t i;
-
-    for (i = 0; i < bitmap_len; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            int start = i++;
-
-            /* Save a little work by adding a range all at once instead of bit
-             * by bit */
-            while (i < bitmap_len && BITMAP_TEST(bitmap, i)) {
-                i++;
-            }
-
-            *invlist = _add_range_to_invlist(*invlist,
-                                             start + offset,
-                                             i + offset - 1);
-        }
-    }
-}
-
-#ifdef DEBUGGING
-int
-Perl_re_printf(pTHX_ const char *fmt, ...)
-{
-    va_list ap;
-    int result;
-    PerlIO *f= Perl_debug_log;
-    PERL_ARGS_ASSERT_RE_PRINTF;
-    va_start(ap, fmt);
-    result = PerlIO_vprintf(f, fmt, ap);
-    va_end(ap);
-    return result;
-}
-
-int
-Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
-{
-    va_list ap;
-    int result;
-    PerlIO *f= Perl_debug_log;
-    PERL_ARGS_ASSERT_RE_INDENTF;
-    va_start(ap, depth);
-    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
-    result = PerlIO_vprintf(f, fmt, ap);
-    va_end(ap);
-    return result;
-}
-#endif /* DEBUGGING */
-
-#define DEBUG_RExC_seen()                                                   \
-        DEBUG_OPTIMISE_MORE_r({                                             \
-            Perl_re_printf( aTHX_ "RExC_seen: ");                           \
-                                                                            \
-            if (RExC_seen & REG_ZERO_LEN_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                \
-                                                                            \
-            if (RExC_seen & REG_LOOKBEHIND_SEEN)                            \
-                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");              \
-                                                                            \
-            if (RExC_seen & REG_GPOS_SEEN)                                  \
-                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                    \
-                                                                            \
-            if (RExC_seen & REG_RECURSE_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                 \
-                                                                            \
-            if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                    \
-                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");      \
-                                                                            \
-            if (RExC_seen & REG_VERBARG_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                 \
-                                                                            \
-            if (RExC_seen & REG_CUTGROUP_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                \
-                                                                            \
-            if (RExC_seen & REG_RUN_ON_COMMENT_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");          \
-                                                                            \
-            if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");          \
-                                                                            \
-            if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                  \
-                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");    \
-                                                                            \
-            Perl_re_printf( aTHX_ "\n");                                    \
-        });
-
-#define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
-  if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
-
-
-#ifdef DEBUGGING
-static void
-S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
-                                    const char *close_str)
-{
-    if (!flags)
-        return;
-
-    Perl_re_printf( aTHX_  "%s", open_str);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
-    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
-    Perl_re_printf( aTHX_  "%s", close_str);
-}
-
-
-static void
-S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
-                    U32 depth, int is_inf,
-                    SSize_t min, SSize_t stopmin, SSize_t delta)
-{
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    DEBUG_OPTIMISE_MORE_r({
-        if (!data)
-            return;
-        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
-            depth,
-            where,
-            min, stopmin, delta,
-            (IV)data->pos_min,
-            (IV)data->pos_delta,
-            (UV)data->flags
-        );
-
-        S_debug_show_study_flags(aTHX_ data->flags," [","]");
-
-        Perl_re_printf( aTHX_
-            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
-            (IV)data->whilem_c,
-            (IV)(data->last_closep ? *((data)->last_closep) : -1),
-            is_inf ? "INF " : ""
-        );
-
-        if (data->last_found) {
-            int i;
-            Perl_re_printf(aTHX_
-                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
-                    SvPVX_const(data->last_found),
-                    (IV)data->last_end,
-                    (IV)data->last_start_min,
-                    (IV)data->last_start_max
-            );
-
-            for (i = 0; i < 2; i++) {
-                Perl_re_printf(aTHX_
-                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
-                    data->cur_is_floating == i ? "*" : "",
-                    i ? "Float" : "Fixed",
-                    SvPVX_const(data->substrs[i].str),
-                    (IV)data->substrs[i].min_offset,
-                    (IV)data->substrs[i].max_offset
-                );
-                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
-            }
-        }
-
-        Perl_re_printf( aTHX_ "\n");
-    });
-}
-
-
-static void
-S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
-                regnode *scan, U32 depth, U32 flags)
-{
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    DEBUG_OPTIMISE_r({
-        regnode *Next;
-
-        if (!scan)
-            return;
-        Next = regnext(scan);
-        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
-        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
-            depth,
-            str,
-            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
-            Next ? (REG_NODE_NUM(Next)) : 0 );
-        S_debug_show_study_flags(aTHX_ flags," [ ","]");
-        Perl_re_printf( aTHX_  "\n");
-   });
-}
-
-
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
-                    S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
-
-#  define DEBUG_PEEP(str, scan, depth, flags)   \
-                    S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
-
-#else
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
-#  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
-#endif
-
+#include "regcomp_internal.h"
 
 /* =========================================================
  * BEGIN edit_distance stuff.
@@ -1631,5313 +290,16 @@ S_edit_distance(const UV* src,
 /* END of edit_distance() stuff
  * ========================================================= */
 
-/* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring or the longest found
-   floating substrings if needed. */
-
-STATIC void
-S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
-                    SSize_t *minlenp, int is_inf)
-{
-    const STRLEN l = CHR_SVLEN(data->last_found);
-    SV * const longest_sv = data->substrs[data->cur_is_floating].str;
-    const STRLEN old_l = CHR_SVLEN(longest_sv);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_SCAN_COMMIT;
-
-    if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
-        const U8 i = data->cur_is_floating;
-        SvSetMagicSV(longest_sv, data->last_found);
-        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
-
-        if (!i) /* fixed */
-            data->substrs[0].max_offset = data->substrs[0].min_offset;
-        else { /* float */
-            data->substrs[1].max_offset =
-                      (is_inf)
-                       ? OPTIMIZE_INFTY
-                       : (l
-                          ? data->last_start_max
-                          : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
-                                         ? OPTIMIZE_INFTY
-                                         : data->pos_min + data->pos_delta));
-        }
-
-        data->substrs[i].flags &= ~SF_BEFORE_EOL;
-        data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
-        data->substrs[i].minlenp = minlenp;
-        data->substrs[i].lookbehind = 0;
-    }
-
-    SvCUR_set(data->last_found, 0);
-    {
-        SV * const sv = data->last_found;
-        if (SvUTF8(sv) && SvMAGICAL(sv)) {
-            MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
-            if (mg)
-                mg->mg_len = 0;
-        }
-    }
-    data->last_end = -1;
-    data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
-}
-
-/* An SSC is just a regnode_charclass_posix with an extra field: the inversion
- * list that describes which code points it matches */
-
-STATIC void
-S_ssc_anything(pTHX_ regnode_ssc *ssc)
-{
-    /* Set the SSC 'ssc' to match an empty string or any code point */
-
-    PERL_ARGS_ASSERT_SSC_ANYTHING;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* mortalize so won't leak */
-    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
-    ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
-}
-
-STATIC int
-S_ssc_is_anything(const regnode_ssc *ssc)
-{
-    /* Returns TRUE if the SSC 'ssc' can match the empty string and any code
-     * point; FALSE otherwise.  Thus, this is used to see if using 'ssc' buys
-     * us anything: if the function returns TRUE, 'ssc' hasn't been restricted
-     * in any way, so there's no point in using it */
-
-    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
-    bool ret;
-
-    PERL_ARGS_ASSERT_SSC_IS_ANYTHING;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    if (! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
-        return FALSE;
-    }
-
-    /* See if the list consists solely of the range 0 - Infinity */
-    invlist_iterinit(ssc->invlist);
-    ret = invlist_iternext(ssc->invlist, &start, &end)
-          && start == 0
-          && end == UV_MAX;
-
-    invlist_iterfinish(ssc->invlist);
-
-    if (ret) {
-        return TRUE;
-    }
-
-    /* If e.g., both \w and \W are set, matches everything */
-    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        int i;
-        for (i = 0; i < ANYOF_POSIXL_MAX; i += 2) {
-            if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i+1)) {
-                return TRUE;
-            }
-        }
-    }
-
-    return FALSE;
-}
-
-STATIC void
-S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
-{
-    /* Initializes the SSC 'ssc'.  This includes setting it to match an empty
-     * string, any code point, or any posix class under locale */
-
-    PERL_ARGS_ASSERT_SSC_INIT;
-
-    Zero(ssc, 1, regnode_ssc);
-    set_ANYOF_SYNTHETIC(ssc);
-    ARG_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
-    ssc_anything(ssc);
-
-    /* If any portion of the regex is to operate under locale rules that aren't
-     * fully known at compile time, initialization includes it.  The reason
-     * this isn't done for all regexes is that the optimizer was written under
-     * the assumption that locale was all-or-nothing.  Given the complexity and
-     * lack of documentation in the optimizer, and that there are inadequate
-     * test cases for locale, many parts of it may not work properly, it is
-     * safest to avoid locale unless necessary. */
-    if (RExC_contains_locale) {
-        ANYOF_POSIXL_SETALL(ssc);
-    }
-    else {
-        ANYOF_POSIXL_ZERO(ssc);
-    }
-}
-
-STATIC int
-S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
-                        const regnode_ssc *ssc)
-{
-    /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
-     * to the list of code points matched, and locale posix classes; hence does
-     * not check its flags) */
-
-    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
-    bool ret;
-
-    PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    invlist_iterinit(ssc->invlist);
-    ret = invlist_iternext(ssc->invlist, &start, &end)
-          && start == 0
-          && end == UV_MAX;
-
-    invlist_iterfinish(ssc->invlist);
-
-    if (! ret) {
-        return FALSE;
-    }
-
-    if (RExC_contains_locale && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
-        return FALSE;
-    }
-
-    return TRUE;
-}
-
-#define INVLIST_INDEX 0
-#define ONLY_LOCALE_MATCHES_INDEX 1
-#define DEFERRED_USER_DEFINED_INDEX 2
-
-STATIC SV*
-S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
-                               const regnode_charclass* const node)
-{
-    /* Returns a mortal inversion list defining which code points are matched
-     * by 'node', which is of ANYOF-ish type .  Handles complementing the
-     * result if appropriate.  If some code points aren't knowable at this
-     * time, the returned list must, and will, contain every code point that is
-     * a possibility. */
-
-    SV* invlist = NULL;
-    SV* only_utf8_locale_invlist = NULL;
-    bool new_node_has_latin1 = FALSE;
-    const U8 flags = (REGNODE_TYPE(OP(node)) == ANYOF)
-                      ? ANYOF_FLAGS(node)
-                      : 0;
-
-    PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
-
-    /* Look at the data structure created by S_set_ANYOF_arg() */
-    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
-        invlist = sv_2mortal(_new_invlist(1));
-        invlist = _add_range_to_invlist(invlist, NUM_ANYOF_CODE_POINTS, UV_MAX);
-    }
-    else if (ANYOF_HAS_AUX(node)) {
-        const U32 n = ARG(node);
-        SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
-        AV * const av = MUTABLE_AV(SvRV(rv));
-        SV **const ary = AvARRAY(av);
-
-        if (av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
-
-            /* Here there are things that won't be known until runtime -- we
-             * have to assume it could be anything */
-            invlist = sv_2mortal(_new_invlist(1));
-            return _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else if (ary[INVLIST_INDEX]) {
-
-            /* Use the node's inversion list */
-            invlist = sv_2mortal(invlist_clone(ary[INVLIST_INDEX], NULL));
-        }
-
-        /* Get the code points valid only under UTF-8 locales */
-        if (   (flags & ANYOFL_FOLD)
-            &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
-        {
-            only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
-        }
-    }
-
-    if (! invlist) {
-        invlist = sv_2mortal(_new_invlist(0));
-    }
-
-    /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
-     * code points, and an inversion list for the others, but if there are code
-     * points that should match only conditionally on the target string being
-     * UTF-8, those are placed in the inversion list, and not the bitmap.
-     * Since there are circumstances under which they could match, they are
-     * included in the SSC.  But if the ANYOF node is to be inverted, we have
-     * to exclude them here, so that when we invert below, the end result
-     * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
-     * have to do this here before we add the unconditionally matched code
-     * points */
-    if (flags & ANYOF_INVERT) {
-        _invlist_intersection_complement_2nd(invlist,
-                                             PL_UpperLatin1,
-                                             &invlist);
-    }
-
-    /* Add in the points from the bit map */
-    if (REGNODE_TYPE(OP(node)) == ANYOF){
-        for (unsigned i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-            if (ANYOF_BITMAP_TEST(node, i)) {
-                unsigned int start = i++;
-
-                for (;    i < NUM_ANYOF_CODE_POINTS
-                       && ANYOF_BITMAP_TEST(node, i); ++i)
-                {
-                    /* empty */
-                }
-                invlist = _add_range_to_invlist(invlist, start, i-1);
-                new_node_has_latin1 = TRUE;
-            }
-        }
-    }
-
-    /* If this can match all upper Latin1 code points, have to add them
-     * as well.  But don't add them if inverting, as when that gets done below,
-     * it would exclude all these characters, including the ones it shouldn't
-     * that were added just above */
-    if ( ! (flags & ANYOF_INVERT)
-        &&  OP(node) == ANYOFD
-        && (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared))
-    {
-        _invlist_union(invlist, PL_UpperLatin1, &invlist);
-    }
-
-    /* Similarly for these */
-    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
-        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
-    }
-
-    if (flags & ANYOF_INVERT) {
-        _invlist_invert(invlist);
-    }
-    else if (flags & ANYOFL_FOLD) {
-        if (new_node_has_latin1) {
-
-            /* These folds are potential in Turkic locales */
-            if (_invlist_contains_cp(invlist, 'i')) {
-                invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-            }
-            if (_invlist_contains_cp(invlist, 'I')) {
-                invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-            }
-
-            /* Under /li, any 0-255 could fold to any other 0-255, depending on
-             * the locale.  We can skip this if there are no 0-255 at all. */
-            _invlist_union(invlist, PL_Latin1, &invlist);
-        }
-        else {
-            if (_invlist_contains_cp(invlist, LATIN_SMALL_LETTER_DOTLESS_I)) {
-                invlist = add_cp_to_invlist(invlist, 'I');
-            }
-            if (_invlist_contains_cp(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
-            {
-                invlist = add_cp_to_invlist(invlist, 'i');
-            }
-        }
-    }
-
-    /* Similarly add the UTF-8 locale possible matches.  These have to be
-     * deferred until after the non-UTF-8 locale ones are taken care of just
-     * above, or it leads to wrong results under ANYOF_INVERT */
-    if (only_utf8_locale_invlist) {
-        _invlist_union_maybe_complement_2nd(invlist,
-                                            only_utf8_locale_invlist,
-                                            flags & ANYOF_INVERT,
-                                            &invlist);
-    }
-
-    return invlist;
-}
-
-/* These two functions currently do the exact same thing */
-#define ssc_init_zero           ssc_init
-
-#define ssc_add_cp(ssc, cp)   ssc_add_range((ssc), (cp), (cp))
-#define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
-
-/* 'AND' a given class with another one.  Can create false positives.  'ssc'
- * should not be inverted. */
-
-STATIC void
-S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
-                const regnode_charclass *and_with)
-{
-    /* Accumulate into SSC 'ssc' its 'AND' with 'and_with', which is either
-     * another SSC or a regular ANYOF class.  Can create false positives. */
-
-    SV* anded_cp_list;
-    U8  and_with_flags = (REGNODE_TYPE(OP(and_with)) == ANYOF)
-                          ? ANYOF_FLAGS(and_with)
-                          : 0;
-    U8  anded_flags;
-
-    PERL_ARGS_ASSERT_SSC_AND;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* 'and_with' is used as-is if it too is an SSC; otherwise have to extract
-     * the code point inversion list and just the relevant flags */
-    if (is_ANYOF_SYNTHETIC(and_with)) {
-        anded_cp_list = ((regnode_ssc *)and_with)->invlist;
-        anded_flags = and_with_flags;
-
-        /* XXX This is a kludge around what appears to be deficiencies in the
-         * optimizer.  If we make S_ssc_anything() add in the WARN_SUPER flag,
-         * there are paths through the optimizer where it doesn't get weeded
-         * out when it should.  And if we don't make some extra provision for
-         * it like the code just below, it doesn't get added when it should.
-         * This solution is to add it only when AND'ing, which is here, and
-         * only when what is being AND'ed is the pristine, original node
-         * matching anything.  Thus it is like adding it to ssc_anything() but
-         * only when the result is to be AND'ed.  Probably the same solution
-         * could be adopted for the same problem we have with /l matching,
-         * which is solved differently in S_ssc_init(), and that would lead to
-         * fewer false positives than that solution has.  But if this solution
-         * creates bugs, the consequences are only that a warning isn't raised
-         * that should be; while the consequences for having /l bugs is
-         * incorrect matches */
-        if (ssc_is_anything((regnode_ssc *)and_with)) {
-            anded_flags |= ANYOF_WARN_SUPER__shared;
-        }
-    }
-    else {
-        anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
-        if (OP(and_with) == ANYOFD) {
-            anded_flags = and_with_flags & ANYOF_COMMON_FLAGS;
-        }
-        else {
-            anded_flags = and_with_flags
-                            & ( ANYOF_COMMON_FLAGS
-                               |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-                               |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
-            if (and_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
-                anded_flags &= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
-            }
-        }
-    }
-
-    ANYOF_FLAGS(ssc) &= anded_flags;
-
-    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
-     * C2 is the list of code points in 'and-with'; P2, its posix classes.
-     * 'and_with' may be inverted.  When not inverted, we have the situation of
-     * computing:
-     *  (C1 | P1) & (C2 | P2)
-     *                     =  (C1 & (C2 | P2)) | (P1 & (C2 | P2))
-     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
-     *                    <=  ((C1 & C2) |       P2)) | ( P1       | (P1 & P2))
-     *                    <=  ((C1 & C2) | P1 | P2)
-     * Alternatively, the last few steps could be:
-     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
-     *                    <=  ((C1 & C2) |  C1      ) | (      C2  | (P1 & P2))
-     *                    <=  (C1 | C2 | (P1 & P2))
-     * We favor the second approach if either P1 or P2 is non-empty.  This is
-     * because these components are a barrier to doing optimizations, as what
-     * they match cannot be known until the moment of matching as they are
-     * dependent on the current locale, 'AND"ing them likely will reduce or
-     * eliminate them.
-     * But we can do better if we know that C1,P1 are in their initial state (a
-     * frequent occurrence), each matching everything:
-     *  (<everything>) & (C2 | P2) =  C2 | P2
-     * Similarly, if C2,P2 are in their initial state (again a frequent
-     * occurrence), the result is a no-op
-     *  (C1 | P1) & (<everything>) =  C1 | P1
-     *
-     * Inverted, we have
-     *  (C1 | P1) & ~(C2 | P2)  =  (C1 | P1) & (~C2 & ~P2)
-     *                          =  (C1 & (~C2 & ~P2)) | (P1 & (~C2 & ~P2))
-     *                         <=  (C1 & ~C2) | (P1 & ~P2)
-     * */
-
-    if ((and_with_flags & ANYOF_INVERT)
-        && ! is_ANYOF_SYNTHETIC(and_with))
-    {
-        unsigned int i;
-
-        ssc_intersection(ssc,
-                         anded_cp_list,
-                         FALSE /* Has already been inverted */
-                         );
-
-        /* If either P1 or P2 is empty, the intersection will be also; can skip
-         * the loop */
-        if (! (and_with_flags & ANYOF_MATCHES_POSIXL)) {
-            ANYOF_POSIXL_ZERO(ssc);
-        }
-        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-
-            /* Note that the Posix class component P from 'and_with' actually
-             * looks like:
-             *      P = Pa | Pb | ... | Pn
-             * where each component is one posix class, such as in [\w\s].
-             * Thus
-             *      ~P = ~(Pa | Pb | ... | Pn)
-             *         = ~Pa & ~Pb & ... & ~Pn
-             *        <= ~Pa | ~Pb | ... | ~Pn
-             * The last is something we can easily calculate, but unfortunately
-             * is likely to have many false positives.  We could do better
-             * in some (but certainly not all) instances if two classes in
-             * P have known relationships.  For example
-             *      :lower: <= :alpha: <= :alnum: <= \w <= :graph: <= :print:
-             * So
-             *      :lower: & :print: = :lower:
-             * And similarly for classes that must be disjoint.  For example,
-             * since \s and \w can have no elements in common based on rules in
-             * the POSIX standard,
-             *      \w & ^\S = nothing
-             * Unfortunately, some vendor locales do not meet the Posix
-             * standard, in particular almost everything by Microsoft.
-             * The loop below just changes e.g., \w into \W and vice versa */
-
-            regnode_charclass_posixl temp;
-            int add = 1;    /* To calculate the index of the complement */
-
-            Zero(&temp, 1, regnode_charclass_posixl);
-            ANYOF_POSIXL_ZERO(&temp);
-            for (i = 0; i < ANYOF_MAX; i++) {
-                assert(i % 2 != 0
-                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)
-                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i + 1));
-
-                if (ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)) {
-                    ANYOF_POSIXL_SET(&temp, i + add);
-                }
-                add = 0 - add; /* 1 goes to -1; -1 goes to 1 */
-            }
-            ANYOF_POSIXL_AND(&temp, ssc);
-
-        } /* else ssc already has no posixes */
-    } /* else: Not inverted.  This routine is a no-op if 'and_with' is an SSC
-         in its initial state */
-    else if (! is_ANYOF_SYNTHETIC(and_with)
-             || ! ssc_is_cp_posixl_init(pRExC_state, (regnode_ssc *)and_with))
-    {
-        /* But if 'ssc' is in its initial state, the result is just 'and_with';
-         * copy it over 'ssc' */
-        if (ssc_is_cp_posixl_init(pRExC_state, ssc)) {
-            if (is_ANYOF_SYNTHETIC(and_with)) {
-                StructCopy(and_with, ssc, regnode_ssc);
-            }
-            else {
-                ssc->invlist = anded_cp_list;
-                ANYOF_POSIXL_ZERO(ssc);
-                if (and_with_flags & ANYOF_MATCHES_POSIXL) {
-                    ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
-                }
-            }
-        }
-        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
-                 || (and_with_flags & ANYOF_MATCHES_POSIXL))
-        {
-            /* One or the other of P1, P2 is non-empty. */
-            if (and_with_flags & ANYOF_MATCHES_POSIXL) {
-                ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
-            }
-            ssc_union(ssc, anded_cp_list, FALSE);
-        }
-        else { /* P1 = P2 = empty */
-            ssc_intersection(ssc, anded_cp_list, FALSE);
-        }
-    }
-}
-
-STATIC void
-S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
-               const regnode_charclass *or_with)
-{
-    /* Accumulate into SSC 'ssc' its 'OR' with 'or_with', which is either
-     * another SSC or a regular ANYOF class.  Can create false positives if
-     * 'or_with' is to be inverted. */
-
-    SV* ored_cp_list;
-    U8 ored_flags;
-    U8  or_with_flags = (REGNODE_TYPE(OP(or_with)) == ANYOF)
-                         ? ANYOF_FLAGS(or_with)
-                         : 0;
-
-    PERL_ARGS_ASSERT_SSC_OR;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* 'or_with' is used as-is if it too is an SSC; otherwise have to extract
-     * the code point inversion list and just the relevant flags */
-    if (is_ANYOF_SYNTHETIC(or_with)) {
-        ored_cp_list = ((regnode_ssc*) or_with)->invlist;
-        ored_flags = or_with_flags;
-    }
-    else {
-        ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
-        ored_flags = or_with_flags & ANYOF_COMMON_FLAGS;
-        if (OP(or_with) != ANYOFD) {
-            ored_flags |=
-                or_with_flags & ( ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-                                 |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
-            if (or_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
-                ored_flags |= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
-            }
-        }
-    }
-
-    ANYOF_FLAGS(ssc) |= ored_flags;
-
-    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
-     * C2 is the list of code points in 'or-with'; P2, its posix classes.
-     * 'or_with' may be inverted.  When not inverted, we have the simple
-     * situation of computing:
-     *  (C1 | P1) | (C2 | P2)  =  (C1 | C2) | (P1 | P2)
-     * If P1|P2 yields a situation with both a class and its complement are
-     * set, like having both \w and \W, this matches all code points, and we
-     * can delete these from the P component of the ssc going forward.  XXX We
-     * might be able to delete all the P components, but I (khw) am not certain
-     * about this, and it is better to be safe.
-     *
-     * Inverted, we have
-     *  (C1 | P1) | ~(C2 | P2)  =  (C1 | P1) | (~C2 & ~P2)
-     *                         <=  (C1 | P1) | ~C2
-     *                         <=  (C1 | ~C2) | P1
-     * (which results in actually simpler code than the non-inverted case)
-     * */
-
-    if ((or_with_flags & ANYOF_INVERT)
-        && ! is_ANYOF_SYNTHETIC(or_with))
-    {
-        /* We ignore P2, leaving P1 going forward */
-    }   /* else  Not inverted */
-    else if (or_with_flags & ANYOF_MATCHES_POSIXL) {
-        ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
-        if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-            unsigned int i;
-            for (i = 0; i < ANYOF_MAX; i += 2) {
-                if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i + 1))
-                {
-                    ssc_match_all_cp(ssc);
-                    ANYOF_POSIXL_CLEAR(ssc, i);
-                    ANYOF_POSIXL_CLEAR(ssc, i+1);
-                }
-            }
-        }
-    }
-
-    ssc_union(ssc,
-              ored_cp_list,
-              FALSE /* Already has been inverted */
-              );
-}
-
-STATIC void
-S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
-{
-    PERL_ARGS_ASSERT_SSC_UNION;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    _invlist_union_maybe_complement_2nd(ssc->invlist,
-                                        invlist,
-                                        invert2nd,
-                                        &ssc->invlist);
-}
-
-STATIC void
-S_ssc_intersection(pTHX_ regnode_ssc *ssc,
-                         SV* const invlist,
-                         const bool invert2nd)
-{
-    PERL_ARGS_ASSERT_SSC_INTERSECTION;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    _invlist_intersection_maybe_complement_2nd(ssc->invlist,
-                                               invlist,
-                                               invert2nd,
-                                               &ssc->invlist);
-}
-
-STATIC void
-S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
-{
-    PERL_ARGS_ASSERT_SSC_ADD_RANGE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
-}
-
-STATIC void
-S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
-{
-    /* AND just the single code point 'cp' into the SSC 'ssc' */
-
-    SV* cp_list = _new_invlist(2);
-
-    PERL_ARGS_ASSERT_SSC_CP_AND;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    cp_list = add_cp_to_invlist(cp_list, cp);
-    ssc_intersection(ssc, cp_list,
-                     FALSE /* Not inverted */
-                     );
-    SvREFCNT_dec_NN(cp_list);
-}
-
-STATIC void
-S_ssc_clear_locale(regnode_ssc *ssc)
-{
-    /* Set the SSC 'ssc' to not match any locale things */
-    PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    ANYOF_POSIXL_ZERO(ssc);
-    ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
-}
-
-STATIC bool
-S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
-{
-    /* The synthetic start class is used to hopefully quickly winnow down
-     * places where a pattern could start a match in the target string.  If it
-     * doesn't really narrow things down that much, there isn't much point to
-     * having the overhead of using it.  This function uses some very crude
-     * heuristics to decide if to use the ssc or not.
-     *
-     * It returns TRUE if 'ssc' rules out more than half what it considers to
-     * be the "likely" possible matches, but of course it doesn't know what the
-     * actual things being matched are going to be; these are only guesses
-     *
-     * For /l matches, it assumes that the only likely matches are going to be
-     *      in the 0-255 range, uniformly distributed, so half of that is 127
-     * For /a and /d matches, it assumes that the likely matches will be just
-     *      the ASCII range, so half of that is 63
-     * For /u and there isn't anything matching above the Latin1 range, it
-     *      assumes that that is the only range likely to be matched, and uses
-     *      half that as the cut-off: 127.  If anything matches above Latin1,
-     *      it assumes that all of Unicode could match (uniformly), except for
-     *      non-Unicode code points and things in the General Category "Other"
-     *      (unassigned, private use, surrogates, controls and formats).  This
-     *      is a much large number. */
-
-    U32 count = 0;      /* Running total of number of code points matched by
-                           'ssc' */
-    UV start, end;      /* Start and end points of current range in inversion
-                           XXX outdated.  UTF-8 locales are common, what about invert? list */
-    const U32 max_code_points = (LOC)
-                                ?  256
-                                : ((  ! UNI_SEMANTICS
-                                    ||  invlist_highest(ssc->invlist) < 256)
-                                  ? 128
-                                  : NON_OTHER_COUNT);
-    const U32 max_match = max_code_points / 2;
-
-    PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
-
-    invlist_iterinit(ssc->invlist);
-    while (invlist_iternext(ssc->invlist, &start, &end)) {
-        if (start >= max_code_points) {
-            break;
-        }
-        end = MIN(end, max_code_points - 1);
-        count += end - start + 1;
-        if (count >= max_match) {
-            invlist_iterfinish(ssc->invlist);
-            return FALSE;
-        }
-    }
-
-    return TRUE;
-}
-
-
-STATIC void
-S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
-{
-    /* The inversion list in the SSC is marked mortal; now we need a more
-     * permanent copy, which is stored the same way that is done in a regular
-     * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
-     * map */
-
-    SV* invlist = invlist_clone(ssc->invlist, NULL);
-
-    PERL_ARGS_ASSERT_SSC_FINALIZE;
-
-    assert(is_ANYOF_SYNTHETIC(ssc));
-
-    /* The code in this file assumes that all but these flags aren't relevant
-     * to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
-     * by the time we reach here */
-    assert(! (ANYOF_FLAGS(ssc)
-        & ~( ANYOF_COMMON_FLAGS
-            |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
-            |ANYOF_HAS_EXTRA_RUNTIME_MATCHES)));
-
-    populate_anyof_bitmap_from_invlist( (regnode *) ssc, &invlist);
-
-    set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
-    SvREFCNT_dec(invlist);
-
-    /* Make sure is clone-safe */
-    ssc->invlist = NULL;
-
-    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
-        OP(ssc) = ANYOFPOSIXL;
-    }
-    else if (RExC_contains_locale) {
-        OP(ssc) = ANYOFL;
-    }
-
-    assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
-}
-
-#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
-#define TRIE_LIST_CUR(state)  ( TRIE_LIST_ITEM( state, 0 ).forid )
-#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
-#define TRIE_LIST_USED(idx)  ( trie->states[state].trans.list         \
-                               ? (TRIE_LIST_CUR( idx ) - 1)           \
-                               : 0 )
-
-
-#ifdef DEBUGGING
-/*
-   dump_trie(trie,widecharmap,revcharmap)
-   dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
-   dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
-
-   These routines dump out a trie in a somewhat readable format.
-   The _interim_ variants are used for debugging the interim
-   tables that are used to generate the final compressed
-   representation which is what dump_trie expects.
-
-   Part of the reason for their existence is to provide a form
-   of documentation as to how the different representations function.
-
-*/
-
-/*
-  Dumps the final compressed table form of the trie to Perl_debug_log.
-  Used for debugging make_trie().
-*/
-
-STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
-            AV *revcharmap, U32 depth)
-{
-    U32 state;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    U16 word;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE;
-
-    Perl_re_indentf( aTHX_  "Char : %-6s%-6s%-4s ",
-        depth+1, "Match","Base","Ofs" );
-
-    for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
-        SV ** const tmp = av_fetch_simple( revcharmap, state, 0);
-        if ( tmp ) {
-            Perl_re_printf( aTHX_  "%*s",
-                colwidth,
-                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
-                            PL_colors[0], PL_colors[1],
-                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                            PERL_PV_ESCAPE_FIRSTCHAR
-                )
-            );
-        }
-    }
-    Perl_re_printf( aTHX_  "\n");
-    Perl_re_indentf( aTHX_ "State|-----------------------", depth+1);
-
-    for( state = 0 ; state < trie->uniquecharcount ; state++ )
-        Perl_re_printf( aTHX_  "%.*s", colwidth, "--------");
-    Perl_re_printf( aTHX_  "\n");
-
-    for( state = 1 ; state < trie->statecount ; state++ ) {
-        const U32 base = trie->states[ state ].trans.base;
-
-        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
-
-        if ( trie->states[ state ].wordnum ) {
-            Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
-        } else {
-            Perl_re_printf( aTHX_  "%6s", "" );
-        }
-
-        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
-
-        if ( base ) {
-            U32 ofs = 0;
-
-            while( ( base + ofs  < trie->uniquecharcount ) ||
-                   ( base + ofs - trie->uniquecharcount < trie->lasttrans
-                     && trie->trans[ base + ofs - trie->uniquecharcount ].check
-                                                                    != state))
-                    ofs++;
-
-            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
-
-            for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
-                if ( ( base + ofs >= trie->uniquecharcount )
-                        && ( base + ofs - trie->uniquecharcount
-                                                        < trie->lasttrans )
-                        && trie->trans[ base + ofs
-                                    - trie->uniquecharcount ].check == state )
-                {
-                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
-                    (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
-                   );
-                } else {
-                    Perl_re_printf( aTHX_  "%*s", colwidth,"   ." );
-                }
-            }
-
-            Perl_re_printf( aTHX_  "]");
-
-        }
-        Perl_re_printf( aTHX_  "\n" );
-    }
-    Perl_re_indentf( aTHX_  "word_info N:(prev,len)=",
-                                depth);
-    for (word=1; word <= trie->wordcount; word++) {
-        Perl_re_printf( aTHX_  " %d:(%d,%d)",
-            (int)word, (int)(trie->wordinfo[word].prev),
-            (int)(trie->wordinfo[word].len));
-    }
-    Perl_re_printf( aTHX_  "\n" );
-}
-/*
-  Dumps a fully constructed but uncompressed trie in list form.
-  List tries normally only are used for construction when the number of
-  possible chars (trie->uniquecharcount) is very high.
-  Used for debugging make_trie().
-*/
-STATIC void
-S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
-                         HV *widecharmap, AV *revcharmap, U32 next_alloc,
-                         U32 depth)
-{
-    U32 state;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
-
-    /* print out the table precompression.  */
-    Perl_re_indentf( aTHX_  "State :Word | Transition Data\n",
-            depth+1 );
-    Perl_re_indentf( aTHX_  "%s",
-            depth+1, "------:-----+-----------------\n" );
-
-    for( state=1 ; state < next_alloc ; state ++ ) {
-        U16 charid;
-
-        Perl_re_indentf( aTHX_  " %4" UVXf " :",
-            depth+1, (UV)state  );
-        if ( ! trie->states[ state ].wordnum ) {
-            Perl_re_printf( aTHX_  "%5s| ","");
-        } else {
-            Perl_re_printf( aTHX_  "W%4x| ",
-                trie->states[ state ].wordnum
-            );
-        }
-        for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
-            SV ** const tmp = av_fetch_simple( revcharmap,
-                                        TRIE_LIST_ITEM(state, charid).forid, 0);
-            if ( tmp ) {
-                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
-                    colwidth,
-                    pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
-                              colwidth,
-                              PL_colors[0], PL_colors[1],
-                              (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
-                              | PERL_PV_ESCAPE_FIRSTCHAR
-                    ) ,
-                    TRIE_LIST_ITEM(state, charid).forid,
-                    (UV)TRIE_LIST_ITEM(state, charid).newstate
-                );
-                if (!(charid % 10))
-                    Perl_re_printf( aTHX_  "\n%*s| ",
-                        (int)((depth * 2) + 14), "");
-            }
-        }
-        Perl_re_printf( aTHX_  "\n");
-    }
-}
-
-/*
-  Dumps a fully constructed but uncompressed trie in table form.
-  This is the normal DFA style state transition table, with a few
-  twists to facilitate compression later.
-  Used for debugging make_trie().
-*/
-STATIC void
-S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
-                          HV *widecharmap, AV *revcharmap, U32 next_alloc,
-                          U32 depth)
-{
-    U32 state;
-    U16 charid;
-    SV *sv=sv_newmortal();
-    int colwidth= widecharmap ? 6 : 4;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
-
-    /*
-       print out the table precompression so that we can do a visual check
-       that they are identical.
-     */
-
-    Perl_re_indentf( aTHX_  "Char : ", depth+1 );
-
-    for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
-        SV ** const tmp = av_fetch_simple( revcharmap, charid, 0);
-        if ( tmp ) {
-            Perl_re_printf( aTHX_  "%*s",
-                colwidth,
-                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
-                            PL_colors[0], PL_colors[1],
-                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                            PERL_PV_ESCAPE_FIRSTCHAR
-                )
-            );
-        }
-    }
-
-    Perl_re_printf( aTHX_ "\n");
-    Perl_re_indentf( aTHX_  "State+-", depth+1 );
-
-    for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
-        Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
-    }
-
-    Perl_re_printf( aTHX_  "\n" );
-
-    for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
-
-        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
-            depth+1,
-            (UV)TRIE_NODENUM( state ) );
-
-        for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
-            UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
-            if (v)
-                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
-            else
-                Perl_re_printf( aTHX_  "%*s", colwidth, "." );
-        }
-        if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
-            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
-                                            (UV)trie->trans[ state ].check );
-        } else {
-            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
-                                            (UV)trie->trans[ state ].check,
-            trie->states[ TRIE_NODENUM( state ) ].wordnum );
-        }
-    }
-}
-
-#endif
-
-
-/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
-  startbranch: the first branch in the whole branch sequence
-  first      : start branch of sequence of branch-exact nodes.
-               May be the same as startbranch
-  last       : Thing following the last branch.
-               May be the same as tail.
-  tail       : item following the branch sequence
-  count      : words in the sequence
-  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/
-  depth      : indent depth
-
-Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
-
-A trie is an N'ary tree where the branches are determined by digital
-decomposition of the key. IE, at the root node you look up the 1st character and
-follow that branch repeat until you find the end of the branches. Nodes can be
-marked as "accepting" meaning they represent a complete word. Eg:
-
-  /he|she|his|hers/
-
-would convert into the following structure. Numbers represent states, letters
-following numbers represent valid transitions on the letter from that state, if
-the number is in square brackets it represents an accepting state, otherwise it
-will be in parenthesis.
-
-      +-h->+-e->[3]-+-r->(8)-+-s->[9]
-      |    |
-      |   (2)
-      |    |
-     (1)   +-i->(6)-+-s->[7]
-      |
-      +-s->(3)-+-h->(4)-+-e->[5]
-
-      Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
-
-This shows that when matching against the string 'hers' we will begin at state 1
-read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
-then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
-is also accepting. Thus we know that we can match both 'he' and 'hers' with a
-single traverse. We store a mapping from accepting to state to which word was
-matched, and then when we have multiple possibilities we try to complete the
-rest of the regex in the order in which they occurred in the alternation.
-
-The only prior NFA like behaviour that would be changed by the TRIE support is
-the silent ignoring of duplicate alternations which are of the form:
-
- / (DUPE|DUPE) X? (?{ ... }) Y /x
-
-Thus EVAL blocks following a trie may be called a different number of times with
-and without the optimisation. With the optimisations dupes will be silently
-ignored. This inconsistent behaviour of EVAL type nodes is well established as
-the following demonstrates:
-
- 'words'=~/(word|word|word)(?{ print $1 })[xyz]/
-
-which prints out 'word' three times, but
-
- 'words'=~/(word|word|word)(?{ print $1 })S/
-
-which doesnt print it out at all. This is due to other optimisations kicking in.
-
-Example of what happens on a structural level:
-
-The regexp /(ac|ad|ab)+/ will produce the following debug output:
-
-   1: CURLYM[1] {1,32767}(18)
-   5:   BRANCH(8)
-   6:     EXACT <ac>(16)
-   8:   BRANCH(11)
-   9:     EXACT <ad>(16)
-  11:   BRANCH(14)
-  12:     EXACT <ab>(16)
-  16:   SUCCEED(0)
-  17:   NOTHING(18)
-  18: END(0)
-
-This would be optimizable with startbranch=5, first=5, last=16, tail=16
-and should turn into:
-
-   1: CURLYM[1] {1,32767}(18)
-   5:   TRIE(16)
-        [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
-          <ac>
-          <ad>
-          <ab>
-  16:   SUCCEED(0)
-  17:   NOTHING(18)
-  18: END(0)
-
-Cases where tail != last would be like /(?foo|bar)baz/:
-
-   1: BRANCH(4)
-   2:   EXACT <foo>(8)
-   4: BRANCH(7)
-   5:   EXACT <bar>(8)
-   7: TAIL(8)
-   8: EXACT <baz>(10)
-  10: END(0)
-
-which would be optimizable with startbranch=1, first=1, last=7, tail=8
-and would end up looking like:
-
-    1: TRIE(8)
-      [Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
-        <foo>
-        <bar>
-   7: TAIL(8)
-   8: EXACT <baz>(10)
-  10: END(0)
-
-    d = uvchr_to_utf8_flags(d, uv, 0);
-
-is the recommended Unicode-aware way of saying
-
-    *(d++) = uv;
-*/
-
-#define TRIE_STORE_REVCHAR(val)                                            \
-    STMT_START {                                                           \
-        if (UTF) {                                                         \
-            SV *zlopp = newSV(UTF8_MAXBYTES);                              \
-            unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
-            *kapow = '\0';                                                 \
-            SvCUR_set(zlopp, kapow - flrbbbbb);                            \
-            SvPOK_on(zlopp);                                               \
-            SvUTF8_on(zlopp);                                              \
-            av_push_simple(revcharmap, zlopp);                                     \
-        } else {                                                           \
-            char ooooff = (char)val;                                           \
-            av_push_simple(revcharmap, newSVpvn(&ooooff, 1));                      \
-        }                                                                  \
-        } STMT_END
-
-/* This gets the next character from the input, folding it if not already
- * folded. */
-#define TRIE_READ_CHAR STMT_START {                                           \
-    wordlen++;                                                                \
-    if ( UTF ) {                                                              \
-        /* if it is UTF then it is either already folded, or does not need    \
-         * folding */                                                         \
-        uvc = valid_utf8_to_uvchr( (const U8*) uc, &len);                     \
-    }                                                                         \
-    else if (folder == PL_fold_latin1) {                                      \
-        /* This folder implies Unicode rules, which in the range expressible  \
-         *  by not UTF is the lower case, with the two exceptions, one of     \
-         *  which should have been taken care of before calling this */       \
-        assert(*uc != LATIN_SMALL_LETTER_SHARP_S);                            \
-        uvc = toLOWER_L1(*uc);                                                \
-        if (UNLIKELY(uvc == MICRO_SIGN)) uvc = GREEK_SMALL_LETTER_MU;         \
-        len = 1;                                                              \
-    } else {                                                                  \
-        /* raw data, will be folded later if needed */                        \
-        uvc = (U32)*uc;                                                       \
-        len = 1;                                                              \
-    }                                                                         \
-} STMT_END
-
-
-
-#define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
-    if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
-        U32 ging = TRIE_LIST_LEN( state ) * 2;                  \
-        Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
-        TRIE_LIST_LEN( state ) = ging;                          \
-    }                                                           \
-    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
-    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
-    TRIE_LIST_CUR( state )++;                                   \
-} STMT_END
-
-#define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newx( trie->states[ state ].trans.list,                     \
-        4, reg_trie_trans_le );                                 \
-     TRIE_LIST_CUR( state ) = 1;                                \
-     TRIE_LIST_LEN( state ) = 4;                                \
-} STMT_END
-
-#define TRIE_HANDLE_WORD(state) STMT_START {                    \
-    U16 dupe= trie->states[ state ].wordnum;                    \
-    regnode * const noper_next = regnext( noper );              \
-                                                                \
-    DEBUG_r({                                                   \
-        /* store the word for dumping */                        \
-        SV* tmp;                                                \
-        if (OP(noper) != NOTHING)                               \
-            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);    \
-        else                                                    \
-            tmp = newSVpvn_utf8( "", 0, UTF );                  \
-        av_push_simple( trie_words, tmp );                             \
-    });                                                         \
-                                                                \
-    curword++;                                                  \
-    trie->wordinfo[curword].prev   = 0;                         \
-    trie->wordinfo[curword].len    = wordlen;                   \
-    trie->wordinfo[curword].accept = state;                     \
-                                                                \
-    if ( noper_next < tail ) {                                  \
-        if (!trie->jump)                                        \
-            trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, \
-                                                 sizeof(U16) ); \
-        trie->jump[curword] = (U16)(noper_next - convert);      \
-        if (!jumper)                                            \
-            jumper = noper_next;                                \
-        if (!nextbranch)                                        \
-            nextbranch= regnext(cur);                           \
-    }                                                           \
-                                                                \
-    if ( dupe ) {                                               \
-        /* It's a dupe. Pre-insert into the wordinfo[].prev   */\
-        /* chain, so that when the bits of chain are later    */\
-        /* linked together, the dups appear in the chain      */\
-        trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
-        trie->wordinfo[dupe].prev = curword;                    \
-    } else {                                                    \
-        /* we haven't inserted this word yet.                */ \
-        trie->states[ state ].wordnum = curword;                \
-    }                                                           \
-} STMT_END
-
-
-#define TRIE_TRANS_STATE(state,base,ucharcount,charid,special)          \
-     ( ( base + charid >=  ucharcount                                   \
-         && base + charid < ubound                                      \
-         && state == trie->trans[ base - ucharcount + charid ].check    \
-         && trie->trans[ base - ucharcount + charid ].next )            \
-           ? trie->trans[ base - ucharcount + charid ].next             \
-           : ( state==1 ? special : 0 )                                 \
-      )
-
-#define TRIE_BITMAP_SET_FOLDED(trie, uvc, folder)           \
-STMT_START {                                                \
-    TRIE_BITMAP_SET(trie, uvc);                             \
-    /* store the folded codepoint */                        \
-    if ( folder )                                           \
-        TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);           \
-                                                            \
-    if ( !UTF ) {                                           \
-        /* store first byte of utf8 representation of */    \
-        /* variant codepoints */                            \
-        if (! UVCHR_IS_INVARIANT(uvc)) {                    \
-            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));   \
-        }                                                   \
-    }                                                       \
-} STMT_END
-#define MADE_TRIE       1
-#define MADE_JUMP_TRIE  2
-#define MADE_EXACT_TRIE 4
-
-STATIC I32
-S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
-                  regnode *first, regnode *last, regnode *tail,
-                  U32 word_count, U32 flags, U32 depth)
-{
-    /* first pass, loop through and scan words */
-    reg_trie_data *trie;
-    HV *widecharmap = NULL;
-    AV *revcharmap = newAV();
-    regnode *cur;
-    STRLEN len = 0;
-    UV uvc = 0;
-    U16 curword = 0;
-    U32 next_alloc = 0;
-    regnode *jumper = NULL;
-    regnode *nextbranch = NULL;
-    regnode *convert = NULL;
-    U32 *prev_states; /* temp array mapping each state to previous one */
-    /* we just use folder as a flag in utf8 */
-    const U8 * folder = NULL;
-
-    /* in the below add_data call we are storing either 'tu' or 'tuaa'
-     * which stands for one trie structure, one hash, optionally followed
-     * by two arrays */
-#ifdef DEBUGGING
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuaa"));
-    AV *trie_words = NULL;
-    /* along with revcharmap, this only used during construction but both are
-     * useful during debugging so we store them in the struct when debugging.
-     */
-#else
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tu"));
-    STRLEN trie_charcount=0;
-#endif
-    SV *re_trie_maxbuff;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_MAKE_TRIE;
-#ifndef DEBUGGING
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    switch (flags) {
-        case EXACT: case EXACT_REQ8: case EXACTL: break;
-        case EXACTFAA:
-        case EXACTFUP:
-        case EXACTFU:
-        case EXACTFLU8: folder = PL_fold_latin1; break;
-        case EXACTF:  folder = PL_fold; break;
-        default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, REGNODE_NAME(flags) );
-    }
-
-    trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
-    trie->refcount = 1;
-    trie->startstate = 1;
-    trie->wordcount = word_count;
-    RExC_rxi->data->data[ data_slot ] = (void*)trie;
-    trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
-    if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL)
-        trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
-    trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
-                       trie->wordcount+1, sizeof(reg_trie_wordinfo));
-
-    DEBUG_r({
-        trie_words = newAV();
-    });
-
-    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
-    assert(re_trie_maxbuff);
-    if (!SvIOK(re_trie_maxbuff)) {
-        sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
-    }
-    DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_
-          "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
-          depth+1,
-          REG_NODE_NUM(startbranch), REG_NODE_NUM(first),
-          REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth);
-    });
-
-   /* Find the node we are going to overwrite */
-    if ( first == startbranch && OP( last ) != BRANCH ) {
-        /* whole branch chain */
-        convert = first;
-    } else {
-        /* branch sub-chain */
-        convert = REGNODE_AFTER( first );
-    }
-
-    /*  -- First loop and Setup --
-
-       We first traverse the branches and scan each word to determine if it
-       contains widechars, and how many unique chars there are, this is
-       important as we have to build a table with at least as many columns as we
-       have unique chars.
-
-       We use an array of integers to represent the character codes 0..255
-       (trie->charmap) and we use a an HV* to store Unicode characters. We use
-       the native representation of the character value as the key and IV's for
-       the coded index.
-
-       *TODO* If we keep track of how many times each character is used we can
-       remap the columns so that the table compression later on is more
-       efficient in terms of memory by ensuring the most common value is in the
-       middle and the least common are on the outside.  IMO this would be better
-       than a most to least common mapping as theres a decent chance the most
-       common letter will share a node with the least common, meaning the node
-       will not be compressible. With a middle is most common approach the worst
-       case is when we have the least common nodes twice.
-
-     */
-
-    for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-        regnode *noper = REGNODE_AFTER( cur );
-        const U8 *uc;
-        const U8 *e;
-        int foldlen = 0;
-        U32 wordlen      = 0;         /* required init */
-        STRLEN minchars = 0;
-        STRLEN maxchars = 0;
-        bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the
-                                               bitmap?*/
-
-        if (OP(noper) == NOTHING) {
-            /* skip past a NOTHING at the start of an alternation
-             * eg, /(?:)a|(?:b)/ should be the same as /a|b/
-             *
-             * If the next node is not something we are supposed to process
-             * we will just ignore it due to the condition guarding the
-             * next block.
-             */
-
-            regnode *noper_next= regnext(noper);
-            if (noper_next < tail)
-                noper= noper_next;
-        }
-
-        if (    noper < tail
-            && (    OP(noper) == flags
-                || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                         || OP(noper) == EXACTFUP))))
-        {
-            uc= (U8*)STRING(noper);
-            e= uc + STR_LEN(noper);
-        } else {
-            trie->minlen= 0;
-            continue;
-        }
-
-
-        if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
-            TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
-                                          regardless of encoding */
-            if (OP( noper ) == EXACTFUP) {
-                /* false positives are ok, so just set this */
-                TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
-            }
-        }
-
-        for ( ; uc < e ; uc += len ) {  /* Look at each char in the current
-                                           branch */
-            TRIE_CHARCOUNT(trie)++;
-            TRIE_READ_CHAR;
-
-            /* TRIE_READ_CHAR returns the current character, or its fold if /i
-             * is in effect.  Under /i, this character can match itself, or
-             * anything that folds to it.  If not under /i, it can match just
-             * itself.  Most folds are 1-1, for example k, K, and KELVIN SIGN
-             * all fold to k, and all are single characters.   But some folds
-             * expand to more than one character, so for example LATIN SMALL
-             * LIGATURE FFI folds to the three character sequence 'ffi'.  If
-             * the string beginning at 'uc' is 'ffi', it could be matched by
-             * three characters, or just by the one ligature character. (It
-             * could also be matched by two characters: LATIN SMALL LIGATURE FF
-             * followed by 'i', or by 'f' followed by LATIN SMALL LIGATURE FI).
-             * (Of course 'I' and/or 'F' instead of 'i' and 'f' can also
-             * match.)  The trie needs to know the minimum and maximum number
-             * of characters that could match so that it can use size alone to
-             * quickly reject many match attempts.  The max is simple: it is
-             * the number of folded characters in this branch (since a fold is
-             * never shorter than what folds to it. */
-
-            maxchars++;
-
-            /* And the min is equal to the max if not under /i (indicated by
-             * 'folder' being NULL), or there are no multi-character folds.  If
-             * there is a multi-character fold, the min is incremented just
-             * once, for the character that folds to the sequence.  Each
-             * character in the sequence needs to be added to the list below of
-             * characters in the trie, but we count only the first towards the
-             * min number of characters needed.  This is done through the
-             * variable 'foldlen', which is returned by the macros that look
-             * for these sequences as the number of bytes the sequence
-             * occupies.  Each time through the loop, we decrement 'foldlen' by
-             * how many bytes the current char occupies.  Only when it reaches
-             * 0 do we increment 'minchars' or look for another multi-character
-             * sequence. */
-            if (folder == NULL) {
-                minchars++;
-            }
-            else if (foldlen > 0) {
-                foldlen -= (UTF) ? UTF8SKIP(uc) : 1;
-            }
-            else {
-                minchars++;
-
-                /* See if *uc is the beginning of a multi-character fold.  If
-                 * so, we decrement the length remaining to look at, to account
-                 * for the current character this iteration.  (We can use 'uc'
-                 * instead of the fold returned by TRIE_READ_CHAR because the
-                 * macro is smart enough to account for any unfolded
-                 * characters. */
-                if (UTF) {
-                    if ((foldlen = is_MULTI_CHAR_FOLD_utf8_safe(uc, e))) {
-                        foldlen -= UTF8SKIP(uc);
-                    }
-                }
-                else if ((foldlen = is_MULTI_CHAR_FOLD_latin1_safe(uc, e))) {
-                    foldlen--;
-                }
-            }
-
-            /* The current character (and any potential folds) should be added
-             * to the possible matching characters for this position in this
-             * branch */
-            if ( uvc < 256 ) {
-                if ( folder ) {
-                    U8 folded= folder[ (U8) uvc ];
-                    if ( !trie->charmap[ folded ] ) {
-                        trie->charmap[ folded ]=( ++trie->uniquecharcount );
-                        TRIE_STORE_REVCHAR( folded );
-                    }
-                }
-                if ( !trie->charmap[ uvc ] ) {
-                    trie->charmap[ uvc ]=( ++trie->uniquecharcount );
-                    TRIE_STORE_REVCHAR( uvc );
-                }
-                if ( set_bit ) {
-                    /* store the codepoint in the bitmap, and its folded
-                     * equivalent. */
-                    TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
-                    set_bit = 0; /* We've done our bit :-) */
-                }
-            } else {
-
-                /* XXX We could come up with the list of code points that fold
-                 * to this using PL_utf8_foldclosures, except not for
-                 * multi-char folds, as there may be multiple combinations
-                 * there that could work, which needs to wait until runtime to
-                 * resolve (The comment about LIGATURE FFI above is such an
-                 * example */
-
-                SV** svpp;
-                if ( !widecharmap )
-                    widecharmap = newHV();
-
-                svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
-
-                if ( !svpp )
-                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
-
-                if ( !SvTRUE( *svpp ) ) {
-                    sv_setiv( *svpp, ++trie->uniquecharcount );
-                    TRIE_STORE_REVCHAR(uvc);
-                }
-            }
-        } /* end loop through characters in this branch of the trie */
-
-        /* We take the min and max for this branch and combine to find the min
-         * and max for all branches processed so far */
-        if( cur == first ) {
-            trie->minlen = minchars;
-            trie->maxlen = maxchars;
-        } else if (minchars < trie->minlen) {
-            trie->minlen = minchars;
-        } else if (maxchars > trie->maxlen) {
-            trie->maxlen = maxchars;
-        }
-    } /* end first pass */
-    DEBUG_TRIE_COMPILE_r(
-        Perl_re_indentf( aTHX_
-                "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
-                depth+1,
-                ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
-                (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
-                (int)trie->minlen, (int)trie->maxlen )
-    );
-
-    /*
-        We now know what we are dealing with in terms of unique chars and
-        string sizes so we can calculate how much memory a naive
-        representation using a flat table  will take. If it's over a reasonable
-        limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
-        conservative but potentially much slower representation using an array
-        of lists.
-
-        At the end we convert both representations into the same compressed
-        form that will be used in regexec.c for matching with. The latter
-        is a form that cannot be used to construct with but has memory
-        properties similar to the list form and access properties similar
-        to the table form making it both suitable for fast searches and
-        small enough that its feasable to store for the duration of a program.
-
-        See the comment in the code where the compressed table is produced
-        inplace from the flat tabe representation for an explanation of how
-        the compression works.
-
-    */
-
-
-    Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32);
-    prev_states[1] = 0;
-
-    if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1)
-                                                    > SvIV(re_trie_maxbuff) )
-    {
-        /*
-            Second Pass -- Array Of Lists Representation
-
-            Each state will be represented by a list of charid:state records
-            (reg_trie_trans_le) the first such element holds the CUR and LEN
-            points of the allocated array. (See defines above).
-
-            We build the initial structure using the lists, and then convert
-            it into the compressed table form which allows faster lookups
-            (but cant be modified once converted).
-        */
-
-        STRLEN transcount = 1;
-
-        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using list compiler\n",
-            depth+1));
-
-        trie->states = (reg_trie_state *)
-            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
-                                  sizeof(reg_trie_state) );
-        TRIE_LIST_NEW(1);
-        next_alloc = 2;
-
-        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-
-            regnode *noper   = REGNODE_AFTER( cur );
-            U32 state        = 1;         /* required init */
-            U16 charid       = 0;         /* sanity init */
-            U32 wordlen      = 0;         /* required init */
-
-            if (OP(noper) == NOTHING) {
-                regnode *noper_next= regnext(noper);
-                if (noper_next < tail)
-                    noper= noper_next;
-                /* we will undo this assignment if noper does not
-                 * point at a trieable type in the else clause of
-                 * the following statement. */
-            }
-
-            if (    noper < tail
-                && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                             || OP(noper) == EXACTFUP))))
-            {
-                const U8 *uc= (U8*)STRING(noper);
-                const U8 *e= uc + STR_LEN(noper);
-
-                for ( ; uc < e ; uc += len ) {
-
-                    TRIE_READ_CHAR;
-
-                    if ( uvc < 256 ) {
-                        charid = trie->charmap[ uvc ];
-                    } else {
-                        SV** const svpp = hv_fetch( widecharmap,
-                                                    (char*)&uvc,
-                                                    sizeof( UV ),
-                                                    0);
-                        if ( !svpp ) {
-                            charid = 0;
-                        } else {
-                            charid=(U16)SvIV( *svpp );
-                        }
-                    }
-                    /* charid is now 0 if we dont know the char read, or
-                     * nonzero if we do */
-                    if ( charid ) {
-
-                        U16 check;
-                        U32 newstate = 0;
-
-                        charid--;
-                        if ( !trie->states[ state ].trans.list ) {
-                            TRIE_LIST_NEW( state );
-                        }
-                        for ( check = 1;
-                              check <= TRIE_LIST_USED( state );
-                              check++ )
-                        {
-                            if ( TRIE_LIST_ITEM( state, check ).forid
-                                                                    == charid )
-                            {
-                                newstate = TRIE_LIST_ITEM( state, check ).newstate;
-                                break;
-                            }
-                        }
-                        if ( ! newstate ) {
-                            newstate = next_alloc++;
-                            prev_states[newstate] = state;
-                            TRIE_LIST_PUSH( state, charid, newstate );
-                            transcount++;
-                        }
-                        state = newstate;
-                    } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
-                    }
-                }
-            } else {
-                /* If we end up here it is because we skipped past a NOTHING, but did not end up
-                 * on a trieable type. So we need to reset noper back to point at the first regop
-                 * in the branch before we call TRIE_HANDLE_WORD()
-                */
-                noper= REGNODE_AFTER(cur);
-            }
-            TRIE_HANDLE_WORD(state);
-
-        } /* end second pass */
-
-        /* next alloc is the NEXT state to be allocated */
-        trie->statecount = next_alloc;
-        trie->states = (reg_trie_state *)
-            PerlMemShared_realloc( trie->states,
-                                   next_alloc
-                                   * sizeof(reg_trie_state) );
-
-        /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
-                                                         revcharmap, next_alloc,
-                                                         depth+1)
-        );
-
-        trie->trans = (reg_trie_trans *)
-            PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
-        {
-            U32 state;
-            U32 tp = 0;
-            U32 zp = 0;
-
-
-            for( state=1 ; state < next_alloc ; state ++ ) {
-                U32 base=0;
-
-                /*
-                DEBUG_TRIE_COMPILE_MORE_r(
-                    Perl_re_printf( aTHX_  "tp: %d zp: %d ",tp,zp)
-                );
-                */
-
-                if (trie->states[state].trans.list) {
-                    U16 minid=TRIE_LIST_ITEM( state, 1).forid;
-                    U16 maxid=minid;
-                    U16 idx;
-
-                    for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
-                        const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
-                        if ( forid < minid ) {
-                            minid=forid;
-                        } else if ( forid > maxid ) {
-                            maxid=forid;
-                        }
-                    }
-                    if ( transcount < tp + maxid - minid + 1) {
-                        transcount *= 2;
-                        trie->trans = (reg_trie_trans *)
-                            PerlMemShared_realloc( trie->trans,
-                                                     transcount
-                                                     * sizeof(reg_trie_trans) );
-                        Zero( trie->trans + (transcount / 2),
-                              transcount / 2,
-                              reg_trie_trans );
-                    }
-                    base = trie->uniquecharcount + tp - minid;
-                    if ( maxid == minid ) {
-                        U32 set = 0;
-                        for ( ; zp < tp ; zp++ ) {
-                            if ( ! trie->trans[ zp ].next ) {
-                                base = trie->uniquecharcount + zp - minid;
-                                trie->trans[ zp ].next = TRIE_LIST_ITEM( state,
-                                                                   1).newstate;
-                                trie->trans[ zp ].check = state;
-                                set = 1;
-                                break;
-                            }
-                        }
-                        if ( !set ) {
-                            trie->trans[ tp ].next = TRIE_LIST_ITEM( state,
-                                                                   1).newstate;
-                            trie->trans[ tp ].check = state;
-                            tp++;
-                            zp = tp;
-                        }
-                    } else {
-                        for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
-                            const U32 tid = base
-                                           - trie->uniquecharcount
-                                           + TRIE_LIST_ITEM( state, idx ).forid;
-                            trie->trans[ tid ].next = TRIE_LIST_ITEM( state,
-                                                                idx ).newstate;
-                            trie->trans[ tid ].check = state;
-                        }
-                        tp += ( maxid - minid + 1 );
-                    }
-                    Safefree(trie->states[ state ].trans.list);
-                }
-                /*
-                DEBUG_TRIE_COMPILE_MORE_r(
-                    Perl_re_printf( aTHX_  " base: %d\n",base);
-                );
-                */
-                trie->states[ state ].trans.base=base;
-            }
-            trie->lasttrans = tp + 1;
-        }
-    } else {
-        /*
-           Second Pass -- Flat Table Representation.
-
-           we dont use the 0 slot of either trans[] or states[] so we add 1 to
-           each.  We know that we will need Charcount+1 trans at most to store
-           the data (one row per char at worst case) So we preallocate both
-           structures assuming worst case.
-
-           We then construct the trie using only the .next slots of the entry
-           structs.
-
-           We use the .check field of the first entry of the node temporarily
-           to make compression both faster and easier by keeping track of how
-           many non zero fields are in the node.
-
-           Since trans are numbered from 1 any 0 pointer in the table is a FAIL
-           transition.
-
-           There are two terms at use here: state as a TRIE_NODEIDX() which is
-           a number representing the first entry of the node, and state as a
-           TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1)
-           and TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3)
-           if there are 2 entrys per node. eg:
-
-             A B       A B
-          1. 2 4    1. 3 7
-          2. 0 3    3. 0 5
-          3. 0 0    5. 0 0
-          4. 0 0    7. 0 0
-
-           The table is internally in the right hand, idx form. However as we
-           also have to deal with the states array which is indexed by nodenum
-           we have to use TRIE_NODENUM() to convert.
-
-        */
-        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using table compiler\n",
-            depth+1));
-
-        trie->trans = (reg_trie_trans *)
-            PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
-                                  * trie->uniquecharcount + 1,
-                                  sizeof(reg_trie_trans) );
-        trie->states = (reg_trie_state *)
-            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
-                                  sizeof(reg_trie_state) );
-        next_alloc = trie->uniquecharcount + 1;
-
-
-        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-
-            regnode *noper   = REGNODE_AFTER( cur );
-
-            U32 state        = 1;         /* required init */
-
-            U16 charid       = 0;         /* sanity init */
-            U32 accept_state = 0;         /* sanity init */
-
-            U32 wordlen      = 0;         /* required init */
-
-            if (OP(noper) == NOTHING) {
-                regnode *noper_next= regnext(noper);
-                if (noper_next < tail)
-                    noper= noper_next;
-                /* we will undo this assignment if noper does not
-                 * point at a trieable type in the else clause of
-                 * the following statement. */
-            }
-
-            if (    noper < tail
-                && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
-                                             || OP(noper) == EXACTFUP))))
-            {
-                const U8 *uc= (U8*)STRING(noper);
-                const U8 *e= uc + STR_LEN(noper);
-
-                for ( ; uc < e ; uc += len ) {
-
-                    TRIE_READ_CHAR;
-
-                    if ( uvc < 256 ) {
-                        charid = trie->charmap[ uvc ];
-                    } else {
-                        SV* const * const svpp = hv_fetch( widecharmap,
-                                                           (char*)&uvc,
-                                                           sizeof( UV ),
-                                                           0);
-                        charid = svpp ? (U16)SvIV(*svpp) : 0;
-                    }
-                    if ( charid ) {
-                        charid--;
-                        if ( !trie->trans[ state + charid ].next ) {
-                            trie->trans[ state + charid ].next = next_alloc;
-                            trie->trans[ state ].check++;
-                            prev_states[TRIE_NODENUM(next_alloc)]
-                                    = TRIE_NODENUM(state);
-                            next_alloc += trie->uniquecharcount;
-                        }
-                        state = trie->trans[ state + charid ].next;
-                    } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
-                    }
-                    /* charid is now 0 if we dont know the char read, or
-                     * nonzero if we do */
-                }
-            } else {
-                /* If we end up here it is because we skipped past a NOTHING, but did not end up
-                 * on a trieable type. So we need to reset noper back to point at the first regop
-                 * in the branch before we call TRIE_HANDLE_WORD().
-                */
-                noper= REGNODE_AFTER(cur);
-            }
-            accept_state = TRIE_NODENUM( state );
-            TRIE_HANDLE_WORD(accept_state);
-
-        } /* end second pass */
-
-        /* and now dump it out before we compress it */
-        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
-                                                          revcharmap,
-                                                          next_alloc, depth+1));
-
-        {
-        /*
-           * Inplace compress the table.*
-
-           For sparse data sets the table constructed by the trie algorithm will
-           be mostly 0/FAIL transitions or to put it another way mostly empty.
-           (Note that leaf nodes will not contain any transitions.)
-
-           This algorithm compresses the tables by eliminating most such
-           transitions, at the cost of a modest bit of extra work during lookup:
-
-           - Each states[] entry contains a .base field which indicates the
-           index in the state[] array wheres its transition data is stored.
-
-           - If .base is 0 there are no valid transitions from that node.
-
-           - If .base is nonzero then charid is added to it to find an entry in
-           the trans array.
-
-           -If trans[states[state].base+charid].check!=state then the
-           transition is taken to be a 0/Fail transition. Thus if there are fail
-           transitions at the front of the node then the .base offset will point
-           somewhere inside the previous nodes data (or maybe even into a node
-           even earlier), but the .check field determines if the transition is
-           valid.
-
-           XXX - wrong maybe?
-           The following process inplace converts the table to the compressed
-           table: We first do not compress the root node 1,and mark all its
-           .check pointers as 1 and set its .base pointer as 1 as well. This
-           allows us to do a DFA construction from the compressed table later,
-           and ensures that any .base pointers we calculate later are greater
-           than 0.
-
-           - We set 'pos' to indicate the first entry of the second node.
-
-           - We then iterate over the columns of the node, finding the first and
-           last used entry at l and m. We then copy l..m into pos..(pos+m-l),
-           and set the .check pointers accordingly, and advance pos
-           appropriately and repreat for the next node. Note that when we copy
-           the next pointers we have to convert them from the original
-           NODEIDX form to NODENUM form as the former is not valid post
-           compression.
-
-           - If a node has no transitions used we mark its base as 0 and do not
-           advance the pos pointer.
-
-           - If a node only has one transition we use a second pointer into the
-           structure to fill in allocated fail transitions from other states.
-           This pointer is independent of the main pointer and scans forward
-           looking for null transitions that are allocated to a state. When it
-           finds one it writes the single transition into the "hole".  If the
-           pointer doesnt find one the single transition is appended as normal.
-
-           - Once compressed we can Renew/realloc the structures to release the
-           excess space.
-
-           See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
-           specifically Fig 3.47 and the associated pseudocode.
-
-           demq
-        */
-        const U32 laststate = TRIE_NODENUM( next_alloc );
-        U32 state, charid;
-        U32 pos = 0, zp=0;
-        trie->statecount = laststate;
-
-        for ( state = 1 ; state < laststate ; state++ ) {
-            U8 flag = 0;
-            const U32 stateidx = TRIE_NODEIDX( state );
-            const U32 o_used = trie->trans[ stateidx ].check;
-            U32 used = trie->trans[ stateidx ].check;
-            trie->trans[ stateidx ].check = 0;
-
-            for ( charid = 0;
-                  used && charid < trie->uniquecharcount;
-                  charid++ )
-            {
-                if ( flag || trie->trans[ stateidx + charid ].next ) {
-                    if ( trie->trans[ stateidx + charid ].next ) {
-                        if (o_used == 1) {
-                            for ( ; zp < pos ; zp++ ) {
-                                if ( ! trie->trans[ zp ].next ) {
-                                    break;
-                                }
-                            }
-                            trie->states[ state ].trans.base
-                                                    = zp
-                                                      + trie->uniquecharcount
-                                                      - charid ;
-                            trie->trans[ zp ].next
-                                = SAFE_TRIE_NODENUM( trie->trans[ stateidx
-                                                             + charid ].next );
-                            trie->trans[ zp ].check = state;
-                            if ( ++zp > pos ) pos = zp;
-                            break;
-                        }
-                        used--;
-                    }
-                    if ( !flag ) {
-                        flag = 1;
-                        trie->states[ state ].trans.base
-                                       = pos + trie->uniquecharcount - charid ;
-                    }
-                    trie->trans[ pos ].next
-                        = SAFE_TRIE_NODENUM(
-                                       trie->trans[ stateidx + charid ].next );
-                    trie->trans[ pos ].check = state;
-                    pos++;
-                }
-            }
-        }
-        trie->lasttrans = pos + 1;
-        trie->states = (reg_trie_state *)
-            PerlMemShared_realloc( trie->states, laststate
-                                   * sizeof(reg_trie_state) );
-        DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
-                depth+1,
-                (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
-                       + 1 ),
-                (IV)next_alloc,
-                (IV)pos,
-                ( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
-            );
-
-        } /* end table compress */
-    }
-    DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
-                depth+1,
-                (UV)trie->statecount,
-                (UV)trie->lasttrans)
-    );
-    /* resize the trans array to remove unused space */
-    trie->trans = (reg_trie_trans *)
-        PerlMemShared_realloc( trie->trans, trie->lasttrans
-                               * sizeof(reg_trie_trans) );
-
-    {   /* Modify the program and insert the new TRIE node */
-        U8 nodetype =(U8) flags;
-        char *str=NULL;
-
-#ifdef DEBUGGING
-        regnode *optimize = NULL;
-#endif /* DEBUGGING */
-        /*
-           This means we convert either the first branch or the first Exact,
-           depending on whether the thing following (in 'last') is a branch
-           or not and whther first is the startbranch (ie is it a sub part of
-           the alternation or is it the whole thing.)
-           Assuming its a sub part we convert the EXACT otherwise we convert
-           the whole branch sequence, including the first.
-         */
-        /* Find the node we are going to overwrite */
-        if ( first != startbranch || OP( last ) == BRANCH ) {
-            /* branch sub-chain */
-            NEXT_OFF( first ) = (U16)(last - first);
-            /* whole branch chain */
-        }
-        /* But first we check to see if there is a common prefix we can
-           split out as an EXACT and put in front of the TRIE node.  */
-        trie->startstate= 1;
-        if ( trie->bitmap && !widecharmap && !trie->jump  ) {
-            /* we want to find the first state that has more than
-             * one transition, if that state is not the first state
-             * then we have a common prefix which we can remove.
-             */
-            U32 state;
-            for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
-                U32 ofs = 0;
-                I32 first_ofs = -1; /* keeps track of the ofs of the first
-                                       transition, -1 means none */
-                U32 count = 0;
-                const U32 base = trie->states[ state ].trans.base;
-
-                /* does this state terminate an alternation? */
-                if ( trie->states[state].wordnum )
-                        count = 1;
-
-                for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
-                    if ( ( base + ofs >= trie->uniquecharcount ) &&
-                         ( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
-                         trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
-                    {
-                        if ( ++count > 1 ) {
-                            /* we have more than one transition */
-                            SV **tmp;
-                            U8 *ch;
-                            /* if this is the first state there is no common prefix
-                             * to extract, so we can exit */
-                            if ( state == 1 ) break;
-                            tmp = av_fetch_simple( revcharmap, ofs, 0);
-                            ch = (U8*)SvPV_nolen_const( *tmp );
-
-                            /* if we are on count 2 then we need to initialize the
-                             * bitmap, and store the previous char if there was one
-                             * in it*/
-                            if ( count == 2 ) {
-                                /* clear the bitmap */
-                                Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
-                                DEBUG_OPTIMISE_r(
-                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
-                                        depth+1,
-                                        (UV)state));
-                                if (first_ofs >= 0) {
-                                    SV ** const tmp = av_fetch_simple( revcharmap, first_ofs, 0);
-                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
-
-                                    TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
-                                    DEBUG_OPTIMISE_r(
-                                        Perl_re_printf( aTHX_  "%s", (char*)ch)
-                                    );
-                                }
-                            }
-                            /* store the current firstchar in the bitmap */
-                            TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
-                            DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
-                        }
-                        first_ofs = ofs;
-                    }
-                }
-                if ( count == 1 ) {
-                    /* This state has only one transition, its transition is part
-                     * of a common prefix - we need to concatenate the char it
-                     * represents to what we have so far. */
-                    SV **tmp = av_fetch_simple( revcharmap, first_ofs, 0);
-                    STRLEN len;
-                    char *ch = SvPV( *tmp, len );
-                    DEBUG_OPTIMISE_r({
-                        SV *sv=sv_newmortal();
-                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
-                            depth+1,
-                            (UV)state, (UV)first_ofs,
-                            pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
-                                PL_colors[0], PL_colors[1],
-                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
-                                PERL_PV_ESCAPE_FIRSTCHAR
-                            )
-                        );
-                    });
-                    if ( state==1 ) {
-                        OP( convert ) = nodetype;
-                        str=STRING(convert);
-                        setSTR_LEN(convert, 0);
-                    }
-                    assert( ( STR_LEN(convert) + len ) < 256 );
-                    setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
-                    while (len--)
-                        *str++ = *ch++;
-                } else {
-#ifdef DEBUGGING
-                    if (state>1)
-                        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n"));
-#endif
-                    break;
-                }
-            }
-            trie->prefixlen = (state-1);
-            if (str) {
-                regnode *n = REGNODE_AFTER(convert);
-                assert( n - convert <= U16_MAX );
-                NEXT_OFF(convert) = n - convert;
-                trie->startstate = state;
-                trie->minlen -= (state - 1);
-                trie->maxlen -= (state - 1);
-#ifdef DEBUGGING
-               /* At least the UNICOS C compiler choked on this
-                * being argument to DEBUG_r(), so let's just have
-                * it right here. */
-               if (
-#ifdef PERL_EXT_RE_BUILD
-                   1
-#else
-                   DEBUG_r_TEST
-#endif
-                   ) {
-                   U32 word = trie->wordcount;
-                   while (word--) {
-                       SV ** const tmp = av_fetch_simple( trie_words, word, 0 );
-                       if (tmp) {
-                           if ( STR_LEN(convert) <= SvCUR(*tmp) )
-                               sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
-                           else
-                               sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
-                       }
-                   }
-               }
-#endif
-                if (trie->maxlen) {
-                    convert = n;
-                } else {
-                    NEXT_OFF(convert) = (U16)(tail - convert);
-                    DEBUG_r(optimize= n);
-                }
-            }
-        }
-        if (!jumper)
-            jumper = last;
-        if ( trie->maxlen ) {
-            NEXT_OFF( convert ) = (U16)(tail - convert);
-            ARG_SET( convert, data_slot );
-            /* Store the offset to the first unabsorbed branch in
-               jump[0], which is otherwise unused by the jump logic.
-               We use this when dumping a trie and during optimisation. */
-            if (trie->jump)
-                trie->jump[0] = (U16)(nextbranch - convert);
-
-            /* If the start state is not accepting (meaning there is no empty string/NOTHING)
-             *   and there is a bitmap
-             *   and the first "jump target" node we found leaves enough room
-             * then convert the TRIE node into a TRIEC node, with the bitmap
-             * embedded inline in the opcode - this is hypothetically faster.
-             */
-            if ( !trie->states[trie->startstate].wordnum
-                 && trie->bitmap
-                 && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
-            {
-                OP( convert ) = TRIEC;
-                Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
-                PerlMemShared_free(trie->bitmap);
-                trie->bitmap= NULL;
-            } else
-                OP( convert ) = TRIE;
-
-            /* store the type in the flags */
-            convert->flags = nodetype;
-            DEBUG_r({
-            optimize = convert
-                      + NODE_STEP_REGNODE
-                      + REGNODE_ARG_LEN( OP( convert ) );
-            });
-            /* XXX We really should free up the resource in trie now,
-                   as we won't use them - (which resources?) dmq */
-        }
-        /* needed for dumping*/
-        DEBUG_r(if (optimize) {
-            /*
-                Try to clean up some of the debris left after the
-                optimisation.
-             */
-            while( optimize < jumper ) {
-                OP( optimize ) = OPTIMIZED;
-                optimize++;
-            }
-        });
-    } /* end node insert */
-
-    /*  Finish populating the prev field of the wordinfo array.  Walk back
-     *  from each accept state until we find another accept state, and if
-     *  so, point the first word's .prev field at the second word. If the
-     *  second already has a .prev field set, stop now. This will be the
-     *  case either if we've already processed that word's accept state,
-     *  or that state had multiple words, and the overspill words were
-     *  already linked up earlier.
-     */
-    {
-        U16 word;
-        U32 state;
-        U16 prev;
-
-        for (word=1; word <= trie->wordcount; word++) {
-            prev = 0;
-            if (trie->wordinfo[word].prev)
-                continue;
-            state = trie->wordinfo[word].accept;
-            while (state) {
-                state = prev_states[state];
-                if (!state)
-                    break;
-                prev = trie->states[state].wordnum;
-                if (prev)
-                    break;
-            }
-            trie->wordinfo[word].prev = prev;
-        }
-        Safefree(prev_states);
-    }
-
-
-    /* and now dump out the compressed format */
-    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
-
-    RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
-#ifdef DEBUGGING
-    RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
-    RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
-#else
-    SvREFCNT_dec_NN(revcharmap);
-#endif
-    return trie->jump
-           ? MADE_JUMP_TRIE
-           : trie->startstate>1
-             ? MADE_EXACT_TRIE
-             : MADE_TRIE;
-}
-
-STATIC regnode *
-S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
-{
-/* The Trie is constructed and compressed now so we can build a fail array if
- * it's needed
-
-   This is basically the Aho-Corasick algorithm. Its from exercise 3.31 and
-   3.32 in the
-   "Red Dragon" -- Compilers, principles, techniques, and tools. Aho, Sethi,
-   Ullman 1985/88
-   ISBN 0-201-10088-6
-
-   We find the fail state for each state in the trie, this state is the longest
-   proper suffix of the current state's 'word' that is also a proper prefix of
-   another word in our trie. State 1 represents the word '' and is thus the
-   default fail state. This allows the DFA not to have to restart after its
-   tried and failed a word at a given point, it simply continues as though it
-   had been matching the other word in the first place.
-   Consider
-      'abcdgu'=~/abcdefg|cdgu/
-   When we get to 'd' we are still matching the first word, we would encounter
-   'g' which would fail, which would bring us to the state representing 'd' in
-   the second word where we would try 'g' and succeed, proceeding to match
-   'cdgu'.
- */
- /* add a fail transition */
-    const U32 trie_offset = ARG(source);
-    reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[trie_offset];
-    U32 *q;
-    const U32 ucharcount = trie->uniquecharcount;
-    const U32 numstates = trie->statecount;
-    const U32 ubound = trie->lasttrans + ucharcount;
-    U32 q_read = 0;
-    U32 q_write = 0;
-    U32 charid;
-    U32 base = trie->states[ 1 ].trans.base;
-    U32 *fail;
-    reg_ac_data *aho;
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
-    regnode *stclass;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
-    PERL_UNUSED_CONTEXT;
-#ifndef DEBUGGING
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    if ( OP(source) == TRIE ) {
-        struct regnode_1 *op = (struct regnode_1 *)
-            PerlMemShared_calloc(1, sizeof(struct regnode_1));
-        StructCopy(source, op, struct regnode_1);
-        stclass = (regnode *)op;
-    } else {
-        struct regnode_charclass *op = (struct regnode_charclass *)
-            PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
-        StructCopy(source, op, struct regnode_charclass);
-        stclass = (regnode *)op;
-    }
-    OP(stclass)+=2; /* convert the TRIE type to its AHO-CORASICK equivalent */
-
-    ARG_SET( stclass, data_slot );
-    aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
-    RExC_rxi->data->data[ data_slot ] = (void*)aho;
-    aho->trie=trie_offset;
-    aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
-    Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newx( q, numstates, U32);
-    aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
-    aho->refcount = 1;
-    fail = aho->fail;
-    /* initialize fail[0..1] to be 1 so that we always have
-       a valid final fail state */
-    fail[ 0 ] = fail[ 1 ] = 1;
-
-    for ( charid = 0; charid < ucharcount ; charid++ ) {
-        const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
-        if ( newstate ) {
-            q[ q_write ] = newstate;
-            /* set to point at the root */
-            fail[ q[ q_write++ ] ]=1;
-        }
-    }
-    while ( q_read < q_write) {
-        const U32 cur = q[ q_read++ % numstates ];
-        base = trie->states[ cur ].trans.base;
-
-        for ( charid = 0 ; charid < ucharcount ; charid++ ) {
-            const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
-            if (ch_state) {
-                U32 fail_state = cur;
-                U32 fail_base;
-                do {
-                    fail_state = fail[ fail_state ];
-                    fail_base = aho->states[ fail_state ].trans.base;
-                } while ( !TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 ) );
-
-                fail_state = TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 );
-                fail[ ch_state ] = fail_state;
-                if ( !aho->states[ ch_state ].wordnum && aho->states[ fail_state ].wordnum )
-                {
-                        aho->states[ ch_state ].wordnum =  aho->states[ fail_state ].wordnum;
-                }
-                q[ q_write++ % numstates] = ch_state;
-            }
-        }
-    }
-    /* restore fail[0..1] to 0 so that we "fall out" of the AC loop
-       when we fail in state 1, this allows us to use the
-       charclass scan to find a valid start char. This is based on the principle
-       that theres a good chance the string being searched contains lots of stuff
-       that cant be a start char.
-     */
-    fail[ 0 ] = fail[ 1 ] = 0;
-    DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
-                      depth, (UV)numstates
-        );
-        for( q_read=1; q_read<numstates; q_read++ ) {
-            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
-        }
-        Perl_re_printf( aTHX_  "\n");
-    });
-    Safefree(q);
-    /*RExC_seen |= REG_TRIEDFA_SEEN;*/
-    return stclass;
-}
-
-
-/* The below joins as many adjacent EXACTish nodes as possible into a single
- * one.  The regop may be changed if the node(s) contain certain sequences that
- * require special handling.  The joining is only done if:
- * 1) there is room in the current conglomerated node to entirely contain the
- *    next one.
- * 2) they are compatible node types
- *
- * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
- * these get optimized out
- *
- * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
- * as possible, even if that means splitting an existing node so that its first
- * part is moved to the preceding node.  This would maximise the efficiency of
- * memEQ during matching.
- *
- * If a node is to match under /i (folded), the number of characters it matches
- * can be different than its character length if it contains a multi-character
- * fold.  *min_subtract is set to the total delta number of characters of the
- * input nodes.
- *
- * And *unfolded_multi_char is set to indicate whether or not the node contains
- * an unfolded multi-char fold.  This happens when it won't be known until
- * runtime whether the fold is valid or not; namely
- *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
- *      target string being matched against turns out to be UTF-8 is that fold
- *      valid; or
- *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
- *      runtime.
- * (Multi-char folds whose components are all above the Latin1 range are not
- * run-time locale dependent, and have already been folded by the time this
- * function is called.)
- *
- * This is as good a place as any to discuss the design of handling these
- * multi-character fold sequences.  It's been wrong in Perl for a very long
- * time.  There are three code points in Unicode whose multi-character folds
- * were long ago discovered to mess things up.  The previous designs for
- * dealing with these involved assigning a special node for them.  This
- * approach doesn't always work, as evidenced by this example:
- *      "\xDFs" =~ /s\xDF/ui    # Used to fail before these patches
- * Both sides fold to "sss", but if the pattern is parsed to create a node that
- * would match just the \xDF, it won't be able to handle the case where a
- * successful match would have to cross the node's boundary.  The new approach
- * that hopefully generally solves the problem generates an EXACTFUP node
- * that is "sss" in this case.
- *
- * It turns out that there are problems with all multi-character folds, and not
- * just these three.  Now the code is general, for all such cases.  The
- * approach taken is:
- * 1)   This routine examines each EXACTFish node that could contain multi-
- *      character folded sequences.  Since a single character can fold into
- *      such a sequence, the minimum match length for this node is less than
- *      the number of characters in the node.  This routine returns in
- *      *min_subtract how many characters to subtract from the actual
- *      length of the string to get a real minimum match length; it is 0 if
- *      there are no multi-char foldeds.  This delta is used by the caller to
- *      adjust the min length of the match, and the delta between min and max,
- *      so that the optimizer doesn't reject these possibilities based on size
- *      constraints.
- *
- * 2)   For the sequence involving the LATIN SMALL LETTER SHARP S (U+00DF)
- *      under /u, we fold it to 'ss' in regatom(), and in this routine, after
- *      joining, we scan for occurrences of the sequence 'ss' in non-UTF-8
- *      EXACTFU nodes.  The node type of such nodes is then changed to
- *      EXACTFUP, indicating it is problematic, and needs careful handling.
- *      (The procedures in step 1) above are sufficient to handle this case in
- *      UTF-8 encoded nodes.)  The reason this is problematic is that this is
- *      the only case where there is a possible fold length change in non-UTF-8
- *      patterns.  By reserving a special node type for problematic cases, the
- *      far more common regular EXACTFU nodes can be processed faster.
- *      regexec.c takes advantage of this.
- *
- *      EXACTFUP has been created as a grab-bag for (hopefully uncommon)
- *      problematic cases.   These all only occur when the pattern is not
- *      UTF-8.  In addition to the 'ss' sequence where there is a possible fold
- *      length change, it handles the situation where the string cannot be
- *      entirely folded.  The strings in an EXACTFish node are folded as much
- *      as possible during compilation in regcomp.c.  This saves effort in
- *      regex matching.  By using an EXACTFUP node when it is not possible to
- *      fully fold at compile time, regexec.c can know that everything in an
- *      EXACTFU node is folded, so folding can be skipped at runtime.  The only
- *      case where folding in EXACTFU nodes can't be done at compile time is
- *      the presumably uncommon MICRO SIGN, when the pattern isn't UTF-8.  This
- *      is because its fold requires UTF-8 to represent.  Thus EXACTFUP nodes
- *      handle two very different cases.  Alternatively, there could have been
- *      a node type where there are length changes, one for unfolded, and one
- *      for both.  If yet another special case needed to be created, the number
- *      of required node types would have to go to 7.  khw figures that even
- *      though there are plenty of node types to spare, that the maintenance
- *      cost wasn't worth the small speedup of doing it that way, especially
- *      since he thinks the MICRO SIGN is rarely encountered in practice.
- *
- *      There are other cases where folding isn't done at compile time, but
- *      none of them are under /u, and hence not for EXACTFU nodes.  The folds
- *      in EXACTFL nodes aren't known until runtime, and vary as the locale
- *      changes.  Some folds in EXACTF depend on if the runtime target string
- *      is UTF-8 or not.  (regatom() will create an EXACTFU node even under /di
- *      when no fold in it depends on the UTF-8ness of the target string.)
- *
- * 3)   A problem remains for unfolded multi-char folds. (These occur when the
- *      validity of the fold won't be known until runtime, and so must remain
- *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
- *      nodes when the pattern isn't in UTF-8.  (Note, BTW, that there cannot
- *      be an EXACTF node with a UTF-8 pattern.)  They also occur for various
- *      folds in EXACTFL nodes, regardless of the UTF-ness of the pattern.)
- *      The reason this is a problem is that the optimizer part of regexec.c
- *      (probably unwittingly, in Perl_regexec_flags()) makes an assumption
- *      that a character in the pattern corresponds to at most a single
- *      character in the target string.  (And I do mean character, and not byte
- *      here, unlike other parts of the documentation that have never been
- *      updated to account for multibyte Unicode.)  Sharp s in EXACTF and
- *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
- *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
- *      EXACTFL nodes, violate the assumption, and they are the only instances
- *      where it is violated.  I'm reluctant to try to change the assumption,
- *      as the code involved is impenetrable to me (khw), so instead the code
- *      here punts.  This routine examines EXACTFL nodes, and (when the pattern
- *      isn't UTF-8) EXACTF and EXACTFAA for such unfolded folds, and returns a
- *      boolean indicating whether or not the node contains such a fold.  When
- *      it is true, the caller sets a flag that later causes the optimizer in
- *      this file to not set values for the floating and fixed string lengths,
- *      and thus avoids the optimizer code in regexec.c that makes the invalid
- *      assumption.  Thus, there is no optimization based on string lengths for
- *      EXACTFL nodes that contain these few folds, nor for non-UTF8-pattern
- *      EXACTF and EXACTFAA nodes that contain the sharp s.  (The reason the
- *      assumption is wrong only in these cases is that all other non-UTF-8
- *      folds are 1-1; and, for UTF-8 patterns, we pre-fold all other folds to
- *      their expanded versions.  (Again, we can't prefold sharp s to 'ss' in
- *      EXACTF nodes because we don't know at compile time if it actually
- *      matches 'ss' or not.  For EXACTF nodes it will match iff the target
- *      string is in UTF-8.  This is in contrast to EXACTFU nodes, where it
- *      always matches; and EXACTFAA where it never does.  In an EXACTFAA node
- *      in a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
- *      problem; but in a non-UTF8 pattern, folding it to that above-Latin1
- *      string would require the pattern to be forced into UTF-8, the overhead
- *      of which we want to avoid.  Similarly the unfolded multi-char folds in
- *      EXACTFL nodes will match iff the locale at the time of match is a UTF-8
- *      locale.)
- *
- *      Similarly, the code that generates tries doesn't currently handle
- *      not-already-folded multi-char folds, and it looks like a pain to change
- *      that.  Therefore, trie generation of EXACTFAA nodes with the sharp s
- *      doesn't work.  Instead, such an EXACTFAA is turned into a new regnode,
- *      EXACTFAA_NO_TRIE, which the trie code knows not to handle.  Most people
- *      using /iaa matching will be doing so almost entirely with ASCII
- *      strings, so this should rarely be encountered in practice */
-
-STATIC U32
-S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
-                   UV *min_subtract, bool *unfolded_multi_char,
-                   U32 flags, regnode *val, U32 depth)
-{
-    /* Merge several consecutive EXACTish nodes into one. */
-
-    regnode *n = regnext(scan);
-    U32 stringok = 1;
-    regnode *next = REGNODE_AFTER_varies(scan);
-    U32 merged = 0;
-    U32 stopnow = 0;
-#ifdef DEBUGGING
-    regnode *stop = scan;
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-#else
-    PERL_UNUSED_ARG(depth);
-#endif
-
-    PERL_ARGS_ASSERT_JOIN_EXACT;
-#ifndef EXPERIMENTAL_INPLACESCAN
-    PERL_UNUSED_ARG(flags);
-    PERL_UNUSED_ARG(val);
-#endif
-    DEBUG_PEEP("join", scan, depth, 0);
-
-    assert(REGNODE_TYPE(OP(scan)) == EXACT);
-
-    /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
-     * EXACT ones that are mergeable to the current one. */
-    while (    n
-           && (    REGNODE_TYPE(OP(n)) == NOTHING
-               || (stringok && REGNODE_TYPE(OP(n)) == EXACT))
-           && NEXT_OFF(n)
-           && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX)
-    {
-
-        if (OP(n) == TAIL || n > next)
-            stringok = 0;
-        if (REGNODE_TYPE(OP(n)) == NOTHING) {
-            DEBUG_PEEP("skip:", n, depth, 0);
-            NEXT_OFF(scan) += NEXT_OFF(n);
-            next = n + NODE_STEP_REGNODE;
-#ifdef DEBUGGING
-            if (stringok)
-                stop = n;
-#endif
-            n = regnext(n);
-        }
-        else if (stringok) {
-            const unsigned int oldl = STR_LEN(scan);
-            regnode * const nnext = regnext(n);
-
-            /* XXX I (khw) kind of doubt that this works on platforms (should
-             * Perl ever run on one) where U8_MAX is above 255 because of lots
-             * of other assumptions */
-            /* Don't join if the sum can't fit into a single node */
-            if (oldl + STR_LEN(n) > U8_MAX)
-                break;
-
-            /* Joining something that requires UTF-8 with something that
-             * doesn't, means the result requires UTF-8. */
-            if (OP(scan) == EXACT && (OP(n) == EXACT_REQ8)) {
-                OP(scan) = EXACT_REQ8;
-            }
-            else if (OP(scan) == EXACT_REQ8 && (OP(n) == EXACT)) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_REQ8)) {
-                OP(scan) = EXACTFU_REQ8;
-            }
-            else if ((OP(scan) == EXACTFU_REQ8) && (OP(n) == EXACTFU)) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
-
-                 /* Under /di, temporary EXACTFU_S_EDGE nodes are generated,
-                  * which can join with EXACTFU ones.  We check for this case
-                  * here.  These need to be resolved to either EXACTFU or
-                  * EXACTF at joining time.  They have nothing in them that
-                  * would forbid them from being the more desirable EXACTFU
-                  * nodes except that they begin and/or end with a single [Ss].
-                  * The reason this is problematic is because they could be
-                  * joined in this loop with an adjacent node that ends and/or
-                  * begins with [Ss] which would then form the sequence 'ss',
-                  * which matches differently under /di than /ui, in which case
-                  * EXACTFU can't be used.  If the 'ss' sequence doesn't get
-                  * formed, the nodes get absorbed into any adjacent EXACTFU
-                  * node.  And if the only adjacent node is EXACTF, they get
-                  * absorbed into that, under the theory that a longer node is
-                  * better than two shorter ones, even if one is EXACTFU.  Note
-                  * that EXACTFU_REQ8 is generated only for UTF-8 patterns,
-                  * and the EXACTFU_S_EDGE ones only for non-UTF-8.  */
-
-                if (STRING(n)[STR_LEN(n)-1] == 's') {
-
-                    /* Here the joined node would end with 's'.  If the node
-                     * following the combination is an EXACTF one, it's better to
-                     * join this trailing edge 's' node with that one, leaving the
-                     * current one in 'scan' be the more desirable EXACTFU */
-                    if (OP(nnext) == EXACTF) {
-                        break;
-                    }
-
-                    OP(scan) = EXACTFU_S_EDGE;
-
-                }   /* Otherwise, the beginning 's' of the 2nd node just
-                       becomes an interior 's' in 'scan' */
-            }
-            else if (OP(scan) == EXACTF && OP(n) == EXACTF) {
-                ;   /* join is compatible, no need to change OP */
-            }
-            else if (OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
-
-                /* EXACTF nodes are compatible for joining with EXACTFU_S_EDGE
-                 * nodes.  But the latter nodes can be also joined with EXACTFU
-                 * ones, and that is a better outcome, so if the node following
-                 * 'n' is EXACTFU, quit now so that those two can be joined
-                 * later */
-                if (OP(nnext) == EXACTFU) {
-                    break;
-                }
-
-                /* The join is compatible, and the combined node will be
-                 * EXACTF.  (These don't care if they begin or end with 's' */
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
-                if (   STRING(scan)[STR_LEN(scan)-1] == 's'
-                    && STRING(n)[0] == 's')
-                {
-                    /* When combined, we have the sequence 'ss', which means we
-                     * have to remain /di */
-                    OP(scan) = EXACTF;
-                }
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
-                if (STRING(n)[0] == 's') {
-                    ;   /* Here the join is compatible and the combined node
-                           starts with 's', no need to change OP */
-                }
-                else {  /* Now the trailing 's' is in the interior */
-                    OP(scan) = EXACTFU;
-                }
-            }
-            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
-
-                /* The join is compatible, and the combined node will be
-                 * EXACTF.  (These don't care if they begin or end with 's' */
-                OP(scan) = EXACTF;
-            }
-            else if (OP(scan) != OP(n)) {
-
-                /* The only other compatible joinings are the same node type */
-                break;
-            }
-
-            DEBUG_PEEP("merg", n, depth, 0);
-            merged++;
-
-            next = REGNODE_AFTER_varies(n);
-            NEXT_OFF(scan) += NEXT_OFF(n);
-            assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
-            setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
-            /* Now we can overwrite *n : */
-            Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
-#ifdef DEBUGGING
-            stop = next - 1;
-#endif
-            n = nnext;
-            if (stopnow) break;
-        }
-
-#ifdef EXPERIMENTAL_INPLACESCAN
-        if (flags && !NEXT_OFF(n)) {
-            DEBUG_PEEP("atch", val, depth, 0);
-            if (REGNODE_OFF_BY_ARG(OP(n))) {
-                ARG_SET(n, val - n);
-            }
-            else {
-                NEXT_OFF(n) = val - n;
-            }
-            stopnow = 1;
-        }
-#endif
-    }
-
-    /* This temporary node can now be turned into EXACTFU, and must, as
-     * regexec.c doesn't handle it */
-    if (OP(scan) == EXACTFU_S_EDGE) {
-        OP(scan) = EXACTFU;
-    }
-
-    *min_subtract = 0;
-    *unfolded_multi_char = FALSE;
-
-    /* Here, all the adjacent mergeable EXACTish nodes have been merged.  We
-     * can now analyze for sequences of problematic code points.  (Prior to
-     * this final joining, sequences could have been split over boundaries, and
-     * hence missed).  The sequences only happen in folding, hence for any
-     * non-EXACT EXACTish node */
-    if (OP(scan) != EXACT && OP(scan) != EXACT_REQ8 && OP(scan) != EXACTL) {
-        U8* s0 = (U8*) STRING(scan);
-        U8* s = s0;
-        U8* s_end = s0 + STR_LEN(scan);
-
-        int total_count_delta = 0;  /* Total delta number of characters that
-                                       multi-char folds expand to */
-
-        /* One pass is made over the node's string looking for all the
-         * possibilities.  To avoid some tests in the loop, there are two main
-         * cases, for UTF-8 patterns (which can't have EXACTF nodes) and
-         * non-UTF-8 */
-        if (UTF) {
-            U8* folded = NULL;
-
-            if (OP(scan) == EXACTFL) {
-                U8 *d;
-
-                /* An EXACTFL node would already have been changed to another
-                 * node type unless there is at least one character in it that
-                 * is problematic; likely a character whose fold definition
-                 * won't be known until runtime, and so has yet to be folded.
-                 * For all but the UTF-8 locale, folds are 1-1 in length, but
-                 * to handle the UTF-8 case, we need to create a temporary
-                 * folded copy using UTF-8 locale rules in order to analyze it.
-                 * This is because our macros that look to see if a sequence is
-                 * a multi-char fold assume everything is folded (otherwise the
-                 * tests in those macros would be too complicated and slow).
-                 * Note that here, the non-problematic folds will have already
-                 * been done, so we can just copy such characters.  We actually
-                 * don't completely fold the EXACTFL string.  We skip the
-                 * unfolded multi-char folds, as that would just create work
-                 * below to figure out the size they already are */
-
-                Newx(folded, UTF8_MAX_FOLD_CHAR_EXPAND * STR_LEN(scan) + 1, U8);
-                d = folded;
-                while (s < s_end) {
-                    STRLEN s_len = UTF8SKIP(s);
-                    if (! is_PROBLEMATIC_LOCALE_FOLD_utf8(s)) {
-                        Copy(s, d, s_len, U8);
-                        d += s_len;
-                    }
-                    else if (is_FOLDS_TO_MULTI_utf8(s)) {
-                        *unfolded_multi_char = TRUE;
-                        Copy(s, d, s_len, U8);
-                        d += s_len;
-                    }
-                    else if (isASCII(*s)) {
-                        *(d++) = toFOLD(*s);
-                    }
-                    else {
-                        STRLEN len;
-                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
-                        d += len;
-                    }
-                    s += s_len;
-                }
-
-                /* Point the remainder of the routine to look at our temporary
-                 * folded copy */
-                s = folded;
-                s_end = d;
-            } /* End of creating folded copy of EXACTFL string */
-
-            /* Examine the string for a multi-character fold sequence.  UTF-8
-             * patterns have all characters pre-folded by the time this code is
-             * executed */
-            while (s < s_end - 1) /* Can stop 1 before the end, as minimum
-                                     length sequence we are looking for is 2 */
-            {
-                int count = 0;  /* How many characters in a multi-char fold */
-                int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
-                if (! len) {    /* Not a multi-char fold: get next char */
-                    s += UTF8SKIP(s);
-                    continue;
-                }
-
-                { /* Here is a generic multi-char fold. */
-                    U8* multi_end  = s + len;
-
-                    /* Count how many characters are in it.  In the case of
-                     * /aa, no folds which contain ASCII code points are
-                     * allowed, so check for those, and skip if found. */
-                    if (OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
-                        count = utf8_length(s, multi_end);
-                        s = multi_end;
-                    }
-                    else {
-                        while (s < multi_end) {
-                            if (isASCII(*s)) {
-                                s++;
-                                goto next_iteration;
-                            }
-                            else {
-                                s += UTF8SKIP(s);
-                            }
-                            count++;
-                        }
-                    }
-                }
-
-                /* The delta is how long the sequence is minus 1 (1 is how long
-                 * the character that folds to the sequence is) */
-                total_count_delta += count - 1;
-              next_iteration: ;
-            }
-
-            /* We created a temporary folded copy of the string in EXACTFL
-             * nodes.  Therefore we need to be sure it doesn't go below zero,
-             * as the real string could be shorter */
-            if (OP(scan) == EXACTFL) {
-                int total_chars = utf8_length((U8*) STRING(scan),
-                                           (U8*) STRING(scan) + STR_LEN(scan));
-                if (total_count_delta > total_chars) {
-                    total_count_delta = total_chars;
-                }
-            }
-
-            *min_subtract += total_count_delta;
-            Safefree(folded);
-        }
-        else if (OP(scan) == EXACTFAA) {
-
-            /* Non-UTF-8 pattern, EXACTFAA node.  There can't be a multi-char
-             * fold to the ASCII range (and there are no existing ones in the
-             * upper latin1 range).  But, as outlined in the comments preceding
-             * this function, we need to flag any occurrences of the sharp s.
-             * This character forbids trie formation (because of added
-             * complexity) */
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-            while (s < s_end) {
-                if (*s == LATIN_SMALL_LETTER_SHARP_S) {
-                    OP(scan) = EXACTFAA_NO_TRIE;
-                    *unfolded_multi_char = TRUE;
-                    break;
-                }
-                s++;
-            }
-        }
-        else if (OP(scan) != EXACTFAA_NO_TRIE) {
-
-            /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
-             * folds that are all Latin1.  As explained in the comments
-             * preceding this function, we look also for the sharp s in EXACTF
-             * and EXACTFL nodes; it can be in the final position.  Otherwise
-             * we can stop looking 1 byte earlier because have to find at least
-             * two characters for a multi-fold */
-            const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
-                              ? s_end
-                              : s_end -1;
-
-            while (s < upper) {
-                int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
-                if (! len) {    /* Not a multi-char fold. */
-                    if (*s == LATIN_SMALL_LETTER_SHARP_S
-                        && (OP(scan) == EXACTF || OP(scan) == EXACTFL))
-                    {
-                        *unfolded_multi_char = TRUE;
-                    }
-                    s++;
-                    continue;
-                }
-
-                if (len == 2
-                    && isALPHA_FOLD_EQ(*s, 's')
-                    && isALPHA_FOLD_EQ(*(s+1), 's'))
-                {
-
-                    /* EXACTF nodes need to know that the minimum length
-                     * changed so that a sharp s in the string can match this
-                     * ss in the pattern, but they remain EXACTF nodes, as they
-                     * won't match this unless the target string is in UTF-8,
-                     * which we don't know until runtime.  EXACTFL nodes can't
-                     * transform into EXACTFU nodes */
-                    if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
-                        OP(scan) = EXACTFUP;
-                    }
-                }
-
-                *min_subtract += len - 1;
-                s += len;
-            }
-#endif
-        }
-    }
-
-#ifdef DEBUGGING
-    /* Allow dumping but overwriting the collection of skipped
-     * ops and/or strings with fake optimized ops */
-    n = REGNODE_AFTER_varies(scan);
-    while (n <= stop) {
-        OP(n) = OPTIMIZED;
-        FLAGS(n) = 0;
-        NEXT_OFF(n) = 0;
-        n++;
-    }
-#endif
-    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl", scan, depth, 0);});
-    return stopnow;
-}
-
-/* REx optimizer.  Converts nodes into quicker variants "in place".
-   Finds fixed substrings.  */
-
-/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
-   to the position after last scanned or to NULL. */
-
-#define INIT_AND_WITHP \
-    assert(!and_withp); \
-    Newx(and_withp, 1, regnode_ssc); \
-    SAVEFREEPV(and_withp)
-
-
-static void
-S_unwind_scan_frames(pTHX_ const void *p)
-{
-    scan_frame *f= (scan_frame *)p;
-    do {
-        scan_frame *n= f->next_frame;
-        Safefree(f);
-        f= n;
-    } while (f);
-}
-
-/* Follow the next-chain of the current node and optimize away
-   all the NOTHINGs from it.
- */
-STATIC void
-S_rck_elide_nothing(pTHX_ regnode *node)
-{
-    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
-
-    if (OP(node) != CURLYX) {
-        const int max = (REGNODE_OFF_BY_ARG(OP(node))
-                        ? I32_MAX
-                          /* I32 may be smaller than U16 on CRAYs! */
-                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-        int off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG(node) : NEXT_OFF(node));
-        int noff;
-        regnode *n = node;
-
-        /* Skip NOTHING and LONGJMP. */
-        while (
-            (n = regnext(n))
-            && (
-                (REGNODE_TYPE(OP(n)) == NOTHING && (noff = NEXT_OFF(n)))
-                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
-            )
-            && off + noff < max
-        ) {
-            off += noff;
-        }
-        if (REGNODE_OFF_BY_ARG(OP(node)))
-            ARG(node) = off;
-        else
-            NEXT_OFF(node) = off;
-    }
-    return;
-}
-
-/* the return from this sub is the minimum length that could possibly match */
-STATIC SSize_t
-S_study_chunk(pTHX_
-    RExC_state_t *pRExC_state,
-    regnode **scanp,        /* Start here (read-write). */
-    SSize_t *minlenp,       /* used for the minlen of substrings? */
-    SSize_t *deltap,        /* Write maxlen-minlen here. */
-    regnode *last,          /* Stop before this one. */
-    scan_data_t *data,      /* string data about the pattern */
-    I32 stopparen,          /* treat CLOSE-N as END, see GOSUB */
-    U32 recursed_depth,     /* how deep have we recursed via GOSUB */
-    regnode_ssc *and_withp, /* Valid if flags & SCF_DO_STCLASS_OR */
-    U32 flags,              /* flags controlling this call, see SCF_ flags */
-    U32 depth,              /* how deep have we recursed period */
-    bool was_mutate_ok      /* TRUE if in-place optimizations are allowed.
-                               FALSE only if the caller (recursively) was
-                               prohibited from modifying the regops, because
-                               a higher caller is holding a ptr to them. */
-)
-{
-    /* vars about the regnodes we are working with */
-    regnode *scan = *scanp; /* the current opcode we are inspecting */
-    regnode *next = NULL;   /* the next opcode beyond scan, tmp var */
-    regnode *first_non_open = scan; /* FIXME: should this init to NULL?
-                                       the first non open regop, if the init
-                                       val IS an OPEN then we will skip past
-                                       it just after the var decls section */
-    I32 code = 0;           /* temp var used to hold the optype of a regop */
-
-    /* vars about the min and max length of the pattern */
-    SSize_t min = 0;    /* min length of this part of the pattern */
-    SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
-                                         this is adjusted down if we find
-                                         an ACCEPT */
-    SSize_t delta = 0;  /* difference between min and max length
-                           (not accounting for stopmin) */
-
-    /* vars about capture buffers in the pattern */
-    I32 pars = 0;       /* count of OPEN opcodes */
-    I32 is_par = OP(scan) == OPEN ? PARNO(scan) : 0; /* is this op an OPEN? */
-
-    /* vars about whether this pattern contains something that can match
-     * infinitely long strings, eg, X* or X+ */
-    int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
-    int is_inf_internal = 0;            /* The studied chunk is infinite */
-
-    /* scan_data_t (struct) is used to hold information about the substrings
-     * and start class we have extracted from the string */
-    scan_data_t data_fake; /* temp var used for recursing in some cases */
-
-    SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
-                                   trie optimizations */
-
-    scan_frame *frame = NULL;  /* used as part of fake recursion */
-
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_STUDY_CHUNK;
-    RExC_study_started= 1;
-
-    Zero(&data_fake, 1, scan_data_t);
-
-    if ( depth == 0 ) {
-        while (first_non_open && OP(first_non_open) == OPEN)
-            first_non_open=regnext(first_non_open);
-    }
-
-  fake_study_recurse:
-    DEBUG_r(
-        RExC_study_chunk_recursed_count++;
-    );
-    DEBUG_OPTIMISE_MORE_r(
-    {
-        Perl_re_indentf( aTHX_  "study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
-            depth, (long)stopparen,
-            (unsigned long)RExC_study_chunk_recursed_count,
-            (unsigned long)depth, (unsigned long)recursed_depth,
-            scan,
-            last);
-        if (recursed_depth) {
-            U32 i;
-            U32 j;
-            for ( j = 0 ; j < recursed_depth ; j++ ) {
-                for ( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
-                    if (PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
-                        Perl_re_printf( aTHX_ " %d",(int)i);
-                        break;
-                    }
-                }
-                if ( j + 1 < recursed_depth ) {
-                    Perl_re_printf( aTHX_  ",");
-                }
-            }
-        }
-        Perl_re_printf( aTHX_ "\n");
-    }
-    );
-    while ( scan && OP(scan) != END && scan < last ){
-        UV min_subtract = 0;    /* How mmany chars to subtract from the minimum
-                                   node length to get a real minimum (because
-                                   the folded version may be shorter) */
-        bool unfolded_multi_char = FALSE;
-        /* avoid mutating ops if we are anywhere within the recursed or
-         * enframed handling for a GOSUB: the outermost level will handle it.
-         */
-        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
-        /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
-        DEBUG_PEEP("Peep", scan, depth, flags);
-
-
-        /* The reason we do this here is that we need to deal with things like
-         * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT
-         * parsing code, as each (?:..) is handled by a different invocation of
-         * reg() -- Yves
-         */
-        if (REGNODE_TYPE(OP(scan)) == EXACT
-            && OP(scan) != LEXACT
-            && OP(scan) != LEXACT_REQ8
-            && mutate_ok
-        ) {
-            join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
-                    0, NULL, depth + 1);
-        }
-
-        /* Follow the next-chain of the current node and optimize
-           away all the NOTHINGs from it.
-         */
-        rck_elide_nothing(scan);
-
-        /* The principal pseudo-switch.  Cannot be a switch, since we look into
-         * several different things.  */
-        if ( OP(scan) == DEFINEP ) {
-            SSize_t minlen = 0;
-            SSize_t deltanext = 0;
-            SSize_t fake_last_close = 0;
-            regnode *fake_last_close_op = NULL;
-            U32 f = SCF_IN_DEFINE | (flags & SCF_TRIE_DOING_RESTUDY);
-
-            StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-            scan = regnext(scan);
-            assert( OP(scan) == IFTHEN );
-            DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
-
-            data_fake.last_closep= &fake_last_close;
-            data_fake.last_close_opp= &fake_last_close_op;
-            minlen = *minlenp;
-            next = regnext(scan);
-            scan = REGNODE_AFTER_type(scan,tregnode_IFTHEN);
-            DEBUG_PEEP("scan", scan, depth, flags);
-            DEBUG_PEEP("next", next, depth, flags);
-
-            /* we suppose the run is continuous, last=next...
-             * NOTE we dont use the return here! */
-            /* DEFINEP study_chunk() recursion */
-            (void)study_chunk(pRExC_state, &scan, &minlen,
-                              &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1, mutate_ok);
-
-            scan = next;
-        } else
-        if (
-            OP(scan) == BRANCH  ||
-            OP(scan) == BRANCHJ ||
-            OP(scan) == IFTHEN
-        ) {
-            next = regnext(scan);
-            code = OP(scan);
-
-            /* The op(next)==code check below is to see if we
-             * have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN"
-             * IFTHEN is special as it might not appear in pairs.
-             * Not sure whether BRANCH-BRANCHJ is possible, regardless
-             * we dont handle it cleanly. */
-            if (OP(next) == code || code == IFTHEN) {
-                /* NOTE - There is similar code to this block below for
-                 * handling TRIE nodes on a re-study.  If you change stuff here
-                 * check there too. */
-                SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
-                regnode_ssc accum;
-                regnode * const startbranch=scan;
-
-                if (flags & SCF_DO_SUBSTR) {
-                    /* Cannot merge strings after this. */
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                }
-
-                if (flags & SCF_DO_STCLASS)
-                    ssc_init_zero(pRExC_state, &accum);
-
-                while (OP(scan) == code) {
-                    SSize_t deltanext, minnext, fake_last_close = 0;
-                    regnode *fake_last_close_op = NULL;
-                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                    regnode_ssc this_class;
-
-                    DEBUG_PEEP("Branch", scan, depth, flags);
-
-                    num++;
-                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                    if (data) {
-                        data_fake.whilem_c = data->whilem_c;
-                        data_fake.last_closep = data->last_closep;
-                        data_fake.last_close_opp = data->last_close_opp;
-                    }
-                    else {
-                        data_fake.last_closep = &fake_last_close;
-                        data_fake.last_close_opp = &fake_last_close_op;
-                    }
-
-                    data_fake.pos_delta = delta;
-                    next = regnext(scan);
-
-                    scan = REGNODE_AFTER_opcode(scan, code);
-
-                    if (flags & SCF_DO_STCLASS) {
-                        ssc_init(pRExC_state, &this_class);
-                        data_fake.start_class = &this_class;
-                        f |= SCF_DO_STCLASS_AND;
-                    }
-                    if (flags & SCF_WHILEM_VISITED_POS)
-                        f |= SCF_WHILEM_VISITED_POS;
-
-                    /* we suppose the run is continuous, last=next...*/
-                    /* recurse study_chunk() for each BRANCH in an alternation */
-                    minnext = study_chunk(pRExC_state, &scan, minlenp,
-                                      &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1,
-                                      mutate_ok);
-
-                    if (min1 > minnext)
-                        min1 = minnext;
-                    if (deltanext == OPTIMIZE_INFTY) {
-                        is_inf = is_inf_internal = 1;
-                        max1 = OPTIMIZE_INFTY;
-                    } else if (max1 < minnext + deltanext)
-                        max1 = minnext + deltanext;
-                    scan = next;
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
-                        if ( stopmin > minnext)
-                            stopmin = min + min1;
-                        flags &= ~SCF_DO_SUBSTR;
-                        if (data)
-                            data->flags |= SCF_SEEN_ACCEPT;
-                    }
-                    if (data) {
-                        if (data_fake.flags & SF_HAS_EVAL)
-                            data->flags |= SF_HAS_EVAL;
-                        data->whilem_c = data_fake.whilem_c;
-                    }
-                    if (flags & SCF_DO_STCLASS)
-                        ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
-                    DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
-                }
-                if (code == IFTHEN && num < 2) /* Empty ELSE branch */
-                    min1 = 0;
-                if (flags & SCF_DO_SUBSTR) {
-                    data->pos_min += min1;
-                    if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
-                        data->pos_delta = OPTIMIZE_INFTY;
-                    else
-                        data->pos_delta += max1 - min1;
-                    if (max1 != min1 || is_inf)
-                        data->cur_is_floating = 1;
-                }
-                min += min1;
-                if (delta == OPTIMIZE_INFTY
-                 || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
-                    delta = OPTIMIZE_INFTY;
-                else
-                    delta += max1 - min1;
-                if (flags & SCF_DO_STCLASS_OR) {
-                    ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
-                    if (min1) {
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                        flags &= ~SCF_DO_STCLASS;
-                    }
-                }
-                else if (flags & SCF_DO_STCLASS_AND) {
-                    if (min1) {
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                        flags &= ~SCF_DO_STCLASS;
-                    }
-                    else {
-                        /* Switch to OR mode: cache the old value of
-                         * data->start_class */
-                        INIT_AND_WITHP;
-                        StructCopy(data->start_class, and_withp, regnode_ssc);
-                        flags &= ~SCF_DO_STCLASS_AND;
-                        StructCopy(&accum, data->start_class, regnode_ssc);
-                        flags |= SCF_DO_STCLASS_OR;
-                    }
-                }
-                DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
-
-                if (PERL_ENABLE_TRIE_OPTIMISATION
-                    && OP(startbranch) == BRANCH
-                    && mutate_ok
-                ) {
-                /* demq.
-
-                   Assuming this was/is a branch we are dealing with: 'scan'
-                   now points at the item that follows the branch sequence,
-                   whatever it is. We now start at the beginning of the
-                   sequence and look for subsequences of
-
-                   BRANCH->EXACT=>x1
-                   BRANCH->EXACT=>x2
-                   tail
-
-                   which would be constructed from a pattern like
-                   /A|LIST|OF|WORDS/
-
-                   If we can find such a subsequence we need to turn the first
-                   element into a trie and then add the subsequent branch exact
-                   strings to the trie.
-
-                   We have two cases
-
-                     1. patterns where the whole set of branches can be
-                        converted.
-
-                     2. patterns where only a subset can be converted.
-
-                   In case 1 we can replace the whole set with a single regop
-                   for the trie. In case 2 we need to keep the start and end
-                   branches so
-
-                     'BRANCH EXACT; BRANCH EXACT; BRANCH X'
-                     becomes BRANCH TRIE; BRANCH X;
-
-                  There is an additional case, that being where there is a
-                  common prefix, which gets split out into an EXACT like node
-                  preceding the TRIE node.
-
-                  If X(1..n)==tail then we can do a simple trie, if not we make
-                  a "jump" trie, such that when we match the appropriate word
-                  we "jump" to the appropriate tail node. Essentially we turn
-                  a nested if into a case structure of sorts.
-
-                */
-
-                    int made=0;
-                    if (!re_trie_maxbuff) {
-                        re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
-                        if (!SvIOK(re_trie_maxbuff))
-                            sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
-                    }
-                    if ( SvIV(re_trie_maxbuff)>=0  ) {
-                        regnode *cur;
-                        regnode *first = (regnode *)NULL;
-                        regnode *prev = (regnode *)NULL;
-                        regnode *tail = scan;
-                        U8 trietype = 0;
-                        U32 count=0;
-
-                        /* var tail is used because there may be a TAIL
-                           regop in the way. Ie, the exacts will point to the
-                           thing following the TAIL, but the last branch will
-                           point at the TAIL. So we advance tail. If we
-                           have nested (?:) we may have to move through several
-                           tails.
-                         */
-
-                        while ( OP( tail ) == TAIL ) {
-                            /* this is the TAIL generated by (?:) */
-                            tail = regnext( tail );
-                        }
-
-
-                        DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
-                              depth+1,
-                              "Looking for TRIE'able sequences. Tail node is ",
-                              (UV) REGNODE_OFFSET(tail),
-                              SvPV_nolen_const( RExC_mysv )
-                            );
-                        });
-
-                        /*
-
-                            Step through the branches
-                                cur represents each branch,
-                                noper is the first thing to be matched as part
-                                      of that branch
-                                noper_next is the regnext() of that node.
-
-                            We normally handle a case like this
-                            /FOO[xyz]|BAR[pqr]/ via a "jump trie" but we also
-                            support building with NOJUMPTRIE, which restricts
-                            the trie logic to structures like /FOO|BAR/.
-
-                            If noper is a trieable nodetype then the branch is
-                            a possible optimization target. If we are building
-                            under NOJUMPTRIE then we require that noper_next is
-                            the same as scan (our current position in the regex
-                            program).
-
-                            Once we have two or more consecutive such branches
-                            we can create a trie of the EXACT's contents and
-                            stitch it in place into the program.
-
-                            If the sequence represents all of the branches in
-                            the alternation we replace the entire thing with a
-                            single TRIE node.
-
-                            Otherwise when it is a subsequence we need to
-                            stitch it in place and replace only the relevant
-                            branches. This means the first branch has to remain
-                            as it is used by the alternation logic, and its
-                            next pointer, and needs to be repointed at the item
-                            on the branch chain following the last branch we
-                            have optimized away.
-
-                            This could be either a BRANCH, in which case the
-                            subsequence is internal, or it could be the item
-                            following the branch sequence in which case the
-                            subsequence is at the end (which does not
-                            necessarily mean the first node is the start of the
-                            alternation).
-
-                            TRIE_TYPE(X) is a define which maps the optype to a
-                            trietype.
-
-                                optype          |  trietype
-                                ----------------+-----------
-                                NOTHING         | NOTHING
-                                EXACT           | EXACT
-                                EXACT_REQ8      | EXACT
-                                EXACTFU         | EXACTFU
-                                EXACTFU_REQ8    | EXACTFU
-                                EXACTFUP        | EXACTFU
-                                EXACTFAA        | EXACTFAA
-                                EXACTL          | EXACTL
-                                EXACTFLU8       | EXACTFLU8
-
-
-                        */
-#define TRIE_TYPE(X) ( ( NOTHING == (X) )                                   \
-                       ? NOTHING                                            \
-                       : ( EXACT == (X) || EXACT_REQ8 == (X) )             \
-                         ? EXACT                                            \
-                         : (     EXACTFU == (X)                             \
-                              || EXACTFU_REQ8 == (X)                       \
-                              || EXACTFUP == (X) )                          \
-                           ? EXACTFU                                        \
-                           : ( EXACTFAA == (X) )                            \
-                             ? EXACTFAA                                     \
-                             : ( EXACTL == (X) )                            \
-                               ? EXACTL                                     \
-                               : ( EXACTFLU8 == (X) )                       \
-                                 ? EXACTFLU8                                \
-                                 : 0 )
-
-                        /* dont use tail as the end marker for this traverse */
-                        for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
-                            regnode * const noper = REGNODE_AFTER( cur );
-                            U8 noper_type = OP( noper );
-                            U8 noper_trietype = TRIE_TYPE( noper_type );
-#if defined(DEBUGGING) || defined(NOJUMPTRIE)
-                            regnode * const noper_next = regnext( noper );
-                            U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
-                            U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
-#endif
-
-                            DEBUG_TRIE_COMPILE_r({
-                                regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                Perl_re_indentf( aTHX_  "- %d:%s (%d)",
-                                   depth+1,
-                                   REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
-
-                                regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
-                                Perl_re_printf( aTHX_  " -> %d:%s",
-                                    REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
-
-                                if ( noper_next ) {
-                                  regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
-                                  Perl_re_printf( aTHX_ "\t=> %d:%s\t",
-                                    REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
-                                }
-                                Perl_re_printf( aTHX_  "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
-                                   REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
-                                   REGNODE_NAME(trietype), REGNODE_NAME(noper_trietype), REGNODE_NAME(noper_next_trietype)
-                                );
-                            });
-
-                            /* Is noper a trieable nodetype that can be merged
-                             * with the current trie (if there is one)? */
-                            if ( noper_trietype
-                                  &&
-                                  (
-                                        ( noper_trietype == NOTHING )
-                                        || ( trietype == NOTHING )
-                                        || ( trietype == noper_trietype )
-                                  )
-#ifdef NOJUMPTRIE
-                                  && noper_next >= tail
-#endif
-                                  && count < U16_MAX)
-                            {
-                                /* Handle mergable triable node Either we are
-                                 * the first node in a new trieable sequence,
-                                 * in which case we do some bookkeeping,
-                                 * otherwise we update the end pointer. */
-                                if ( !first ) {
-                                    first = cur;
-                                    if ( noper_trietype == NOTHING ) {
-#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
-                                        regnode * const noper_next = regnext( noper );
-                                        U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
-                                        U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
-#endif
-
-                                        if ( noper_next_trietype ) {
-                                            trietype = noper_next_trietype;
-                                        } else if (noper_next_type)  {
-                                            /* a NOTHING regop is 1 regop wide.
-                                             * We need at least two for a trie
-                                             * so we can't merge this in */
-                                            first = NULL;
-                                        }
-                                    } else {
-                                        trietype = noper_trietype;
-                                    }
-                                } else {
-                                    if ( trietype == NOTHING )
-                                        trietype = noper_trietype;
-                                    prev = cur;
-                                }
-                                if (first)
-                                    count++;
-                            } /* end handle mergable triable node */
-                            else {
-                                /* handle unmergable node -
-                                 * noper may either be a triable node which can
-                                 * not be tried together with the current trie,
-                                 * or a non triable node */
-                                if ( prev ) {
-                                    /* If last is set and trietype is not
-                                     * NOTHING then we have found at least two
-                                     * triable branch sequences in a row of a
-                                     * similar trietype so we can turn them
-                                     * into a trie. If/when we allow NOTHING to
-                                     * start a trie sequence this condition
-                                     * will be required, and it isn't expensive
-                                     * so we leave it in for now. */
-                                    if ( trietype && trietype != NOTHING )
-                                        make_trie( pRExC_state,
-                                                startbranch, first, cur, tail,
-                                                count, trietype, depth+1 );
-                                    prev = NULL; /* note: we clear/update
-                                                    first, trietype etc below,
-                                                    so we dont do it here */
-                                }
-                                if ( noper_trietype
-#ifdef NOJUMPTRIE
-                                     && noper_next >= tail
-#endif
-                                ){
-                                    /* noper is triable, so we can start a new
-                                     * trie sequence */
-                                    count = 1;
-                                    first = cur;
-                                    trietype = noper_trietype;
-                                } else if (first) {
-                                    /* if we already saw a first but the
-                                     * current node is not triable then we have
-                                     * to reset the first information. */
-                                    count = 0;
-                                    first = NULL;
-                                    trietype = 0;
-                                }
-                            } /* end handle unmergable node */
-                        } /* loop over branches */
-                        DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
-                              depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
-                            Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
-                               REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
-                               REGNODE_NAME(trietype)
-                            );
-
-                        });
-                        if ( prev && trietype ) {
-                            if ( trietype != NOTHING ) {
-                                /* the last branch of the sequence was part of
-                                 * a trie, so we have to construct it here
-                                 * outside of the loop */
-                                made= make_trie( pRExC_state, startbranch,
-                                                 first, scan, tail, count,
-                                                 trietype, depth+1 );
-#ifdef TRIE_STUDY_OPT
-                                if ( ((made == MADE_EXACT_TRIE &&
-                                     startbranch == first)
-                                     || ( first_non_open == first )) &&
-                                     depth==0 ) {
-                                    flags |= SCF_TRIE_RESTUDY;
-                                    if ( startbranch == first
-                                         && scan >= tail )
-                                    {
-                                        RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN;
-                                    }
-                                }
-#endif
-                            } else {
-                                /* at this point we know whatever we have is a
-                                 * NOTHING sequence/branch AND if 'startbranch'
-                                 * is 'first' then we can turn the whole thing
-                                 * into a NOTHING
-                                 */
-                                if ( startbranch == first ) {
-                                    regnode *opt;
-                                    /* the entire thing is a NOTHING sequence,
-                                     * something like this: (?:|) So we can
-                                     * turn it into a plain NOTHING op. */
-                                    DEBUG_TRIE_COMPILE_r({
-                                        regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                        Perl_re_indentf( aTHX_  "- %s (%d) <NOTHING BRANCH SEQUENCE>\n",
-                                          depth+1,
-                                          SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
-
-                                    });
-                                    OP(startbranch)= NOTHING;
-                                    NEXT_OFF(startbranch)= tail - startbranch;
-                                    for ( opt= startbranch + 1; opt < tail ; opt++ )
-                                        OP(opt)= OPTIMIZED;
-                                }
-                            }
-                        } /* end if ( prev) */
-                    } /* TRIE_MAXBUF is non zero */
-                } /* do trie */
-                DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
-            }
-            else
-                scan = REGNODE_AFTER_opcode(scan,code);
-            continue;
-        } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
-            I32 paren = 0;
-            regnode *start = NULL;
-            regnode *end = NULL;
-            U32 my_recursed_depth= recursed_depth;
-
-            if (OP(scan) != SUSPEND) { /* GOSUB */
-                /* Do setup, note this code has side effects beyond
-                 * the rest of this block. Specifically setting
-                 * RExC_recurse[] must happen at least once during
-                 * study_chunk(). */
-                paren = ARG(scan);
-                RExC_recurse[ARG2L(scan)] = scan;
-                start = REGNODE_p(RExC_open_parens[paren]);
-                end   = REGNODE_p(RExC_close_parens[paren]);
-
-                /* NOTE we MUST always execute the above code, even
-                 * if we do nothing with a GOSUB */
-                if (
-                    ( flags & SCF_IN_DEFINE )
-                    ||
-                    (
-                        (is_inf_internal || is_inf || (data && data->flags & SF_IS_INF))
-                        &&
-                        ( (flags & (SCF_DO_STCLASS | SCF_DO_SUBSTR)) == 0 )
-                    )
-                ) {
-                    /* no need to do anything here if we are in a define. */
-                    /* or we are after some kind of infinite construct
-                     * so we can skip recursing into this item.
-                     * Since it is infinite we will not change the maxlen
-                     * or delta, and if we miss something that might raise
-                     * the minlen it will merely pessimise a little.
-                     *
-                     * Iow /(?(DEFINE)(?<foo>foo|food))a+(?&foo)/
-                     * might result in a minlen of 1 and not of 4,
-                     * but this doesn't make us mismatch, just try a bit
-                     * harder than we should.
-                     *
-                     * However we must assume this GOSUB is infinite, to
-                     * avoid wrongly applying other optimizations in the
-                     * enclosing scope - see GH 18096, for example.
-                     */
-                    is_inf = is_inf_internal = 1;
-                    scan= regnext(scan);
-                    continue;
-                }
-
-                if (
-                    !recursed_depth
-                    || !PAREN_TEST(recursed_depth - 1, paren)
-                ) {
-                    /* it is quite possible that there are more efficient ways
-                     * to do this. We maintain a bitmap per level of recursion
-                     * of which patterns we have entered so we can detect if a
-                     * pattern creates a possible infinite loop. When we
-                     * recurse down a level we copy the previous levels bitmap
-                     * down. When we are at recursion level 0 we zero the top
-                     * level bitmap. It would be nice to implement a different
-                     * more efficient way of doing this. In particular the top
-                     * level bitmap may be unnecessary.
-                     */
-                    if (!recursed_depth) {
-                        Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
-                    } else {
-                        Copy(PAREN_OFFSET(recursed_depth - 1),
-                             PAREN_OFFSET(recursed_depth),
-                             RExC_study_chunk_recursed_bytes, U8);
-                    }
-                    /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
-                    PAREN_SET(recursed_depth, paren);
-                    my_recursed_depth= recursed_depth + 1;
-                } else {
-                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
-                    /* some form of infinite recursion, assume infinite length
-                     * */
-                    if (flags & SCF_DO_SUBSTR) {
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        data->cur_is_floating = 1;
-                    }
-                    is_inf = is_inf_internal = 1;
-                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                        ssc_anything(data->start_class);
-                    flags &= ~SCF_DO_STCLASS;
-
-                    start= NULL; /* reset start so we dont recurse later on. */
-                }
-            } else {
-                paren = stopparen;
-                start = scan + 2;
-                end = regnext(scan);
-            }
-            if (start) {
-                scan_frame *newframe;
-                assert(end);
-                if (!RExC_frame_last) {
-                    Newxz(newframe, 1, scan_frame);
-                    SAVEDESTRUCTOR_X(S_unwind_scan_frames, newframe);
-                    RExC_frame_head= newframe;
-                    RExC_frame_count++;
-                } else if (!RExC_frame_last->next_frame) {
-                    Newxz(newframe, 1, scan_frame);
-                    RExC_frame_last->next_frame= newframe;
-                    newframe->prev_frame= RExC_frame_last;
-                    RExC_frame_count++;
-                } else {
-                    newframe= RExC_frame_last->next_frame;
-                }
-                RExC_frame_last= newframe;
-
-                newframe->next_regnode = regnext(scan);
-                newframe->last_regnode = last;
-                newframe->stopparen = stopparen;
-                newframe->prev_recursed_depth = recursed_depth;
-                newframe->this_prev_frame= frame;
-                newframe->in_gosub = (
-                    (frame && frame->in_gosub) || OP(scan) == GOSUB
-                );
-
-                DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
-                DEBUG_PEEP("fnew", scan, depth, flags);
-
-                frame = newframe;
-                scan =  start;
-                stopparen = paren;
-                last = end;
-                depth = depth + 1;
-                recursed_depth= my_recursed_depth;
-
-                continue;
-            }
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EXACT && ! isEXACTFish(OP(scan))) {
-            SSize_t bytelen = STR_LEN(scan), charlen;
-            UV uc;
-            assert(bytelen);
-            if (UTF) {
-                const U8 * const s = (U8*)STRING(scan);
-                uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
-                charlen = utf8_length(s, s + bytelen);
-            } else {
-                uc = *((U8*)STRING(scan));
-                charlen = bytelen;
-            }
-            min += charlen;
-            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
-                /* The code below prefers earlier match for fixed
-                   offset, later match for variable offset.  */
-                if (data->last_end == -1) { /* Update the start info. */
-                    data->last_start_min = data->pos_min;
-                    data->last_start_max =
-                        is_inf ? OPTIMIZE_INFTY
-                        : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
-                            ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
-                }
-                sv_catpvn(data->last_found, STRING(scan), bytelen);
-                if (UTF)
-                    SvUTF8_on(data->last_found);
-                {
-                    SV * const sv = data->last_found;
-                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
-                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
-                    if (mg && mg->mg_len >= 0)
-                        mg->mg_len += charlen;
-                }
-                data->last_end = data->pos_min + charlen;
-                data->pos_min += charlen; /* As in the first entry. */
-                data->flags &= ~SF_BEFORE_EOL;
-            }
-
-            /* ANDing the code point leaves at most it, and not in locale, and
-             * can't match null string */
-            if (flags & SCF_DO_STCLASS_AND) {
-                ssc_cp_and(data->start_class, uc);
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                ssc_clear_locale(data->start_class);
-            }
-            else if (flags & SCF_DO_STCLASS_OR) {
-                ssc_add_cp(data->start_class, uc);
-                ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-            }
-            flags &= ~SCF_DO_STCLASS;
-            DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EXACT) {
-            /* But OP != EXACT!, so is EXACTFish */
-            SSize_t bytelen = STR_LEN(scan), charlen;
-            const U8 * s = (U8*)STRING(scan);
-
-            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
-             * with the mask set to the complement of the bit that differs
-             * between upper and lower case, and the lowest code point of the
-             * pair (which the '&' forces) */
-            if (     bytelen == 1
-                &&   isALPHA_A(*s)
-                &&  (         OP(scan) == EXACTFAA
-                     || (     OP(scan) == EXACTFU
-                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
-                &&   mutate_ok
-            ) {
-                U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
-
-                OP(scan) = ANYOFM;
-                ARG_SET(scan, *s & mask);
-                FLAGS(scan) = mask;
-                /* We're not EXACTFish any more, so restudy.
-                 * Search for "restudy" in this file to find
-                 * a comment with details. */
-                continue;
-            }
-
-            /* Search for fixed substrings supports EXACT only. */
-            if (flags & SCF_DO_SUBSTR) {
-                assert(data);
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-            }
-            charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
-            if (unfolded_multi_char) {
-                RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
-            }
-            min += charlen - min_subtract;
-            assert (min >= 0);
-            if ((SSize_t)min_subtract < OPTIMIZE_INFTY
-                && delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
-            ) {
-                delta += min_subtract;
-            } else {
-                delta = OPTIMIZE_INFTY;
-            }
-            if (flags & SCF_DO_SUBSTR) {
-                data->pos_min += charlen - min_subtract;
-                if (data->pos_min < 0) {
-                    data->pos_min = 0;
-                }
-                if ((SSize_t)min_subtract < OPTIMIZE_INFTY
-                    && data->pos_delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
-                ) {
-                    data->pos_delta += min_subtract;
-                } else {
-                    data->pos_delta = OPTIMIZE_INFTY;
-                }
-                if (min_subtract) {
-                    data->cur_is_floating = 1; /* float */
-                }
-            }
-
-            if (flags & SCF_DO_STCLASS) {
-                SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
-
-                assert(EXACTF_invlist);
-                if (flags & SCF_DO_STCLASS_AND) {
-                    if (OP(scan) != EXACTFL)
-                        ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                    ANYOF_POSIXL_ZERO(data->start_class);
-                    ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
-                }
-                else {  /* SCF_DO_STCLASS_OR */
-                    ssc_union(data->start_class, EXACTF_invlist, FALSE);
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                    /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                flags &= ~SCF_DO_STCLASS;
-                SvREFCNT_dec(EXACTF_invlist);
-            }
-            DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
-        }
-        else if (REGNODE_VARIES(OP(scan))) {
-            SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
-            I32 fl = 0;
-            U32 f = flags;
-            regnode * const oscan = scan;
-            regnode_ssc this_class;
-            regnode_ssc *oclass = NULL;
-            I32 next_is_eval = 0;
-
-            switch (REGNODE_TYPE(OP(scan))) {
-            case WHILEM:                /* End of (?:...)* . */
-                scan = REGNODE_AFTER(scan);
-                goto finish;
-            case PLUS:
-                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
-                    next = REGNODE_AFTER(scan);
-                    if (   (     REGNODE_TYPE(OP(next)) == EXACT
-                            && ! isEXACTFish(OP(next)))
-                        || (flags & SCF_DO_STCLASS))
-                    {
-                        mincount = 1;
-                        maxcount = REG_INFTY;
-                        next = regnext(scan);
-                        scan = REGNODE_AFTER(scan);
-                        goto do_curly;
-                    }
-                }
-                if (flags & SCF_DO_SUBSTR)
-                    data->pos_min++;
-                /* This will bypass the formal 'min += minnext * mincount'
-                 * calculation in the do_curly path, so assumes min width
-                 * of the PLUS payload is exactly one. */
-                min++;
-                /* FALLTHROUGH */
-            case STAR:
-                next = REGNODE_AFTER(scan);
-
-                /* This temporary node can now be turned into EXACTFU, and
-                 * must, as regexec.c doesn't handle it */
-                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
-                    OP(next) = EXACTFU;
-                }
-
-                if (     STR_LEN(next) == 1
-                    &&   isALPHA_A(* STRING(next))
-                    && (         OP(next) == EXACTFAA
-                        || (     OP(next) == EXACTFU
-                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
-                    &&   mutate_ok
-                ) {
-                    /* These differ in just one bit */
-                    U8 mask = ~ ('A' ^ 'a');
-
-                    assert(isALPHA_A(* STRING(next)));
-
-                    /* Then replace it by an ANYOFM node, with
-                    * the mask set to the complement of the
-                    * bit that differs between upper and lower
-                    * case, and the lowest code point of the
-                    * pair (which the '&' forces) */
-                    OP(next) = ANYOFM;
-                    ARG_SET(next, *STRING(next) & mask);
-                    FLAGS(next) = mask;
-                }
-
-                if (flags & SCF_DO_STCLASS) {
-                    mincount = 0;
-                    maxcount = REG_INFTY;
-                    next = regnext(scan);
-                    scan = REGNODE_AFTER(scan);
-                    goto do_curly;
-                }
-                if (flags & SCF_DO_SUBSTR) {
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    /* Cannot extend fixed substrings */
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                scan = regnext(scan);
-                goto optimize_curly_tail;
-            case CURLY:
-                if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
-                    && (scan->flags == stopparen))
-                {
-                    mincount = 1;
-                    maxcount = 1;
-                } else {
-                    mincount = ARG1(scan);
-                    maxcount = ARG2(scan);
-                }
-                next = regnext(scan);
-                if (OP(scan) == CURLYX) {
-                    I32 lp = (data ? *(data->last_closep) : 0);
-                    scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
-                }
-                scan = REGNODE_AFTER(scan);
-                next_is_eval = (OP(scan) == EVAL);
-              do_curly:
-                if (flags & SCF_DO_SUBSTR) {
-                    if (mincount == 0)
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                    /* Cannot extend fixed substrings */
-                    pos_before = data->pos_min;
-                }
-                if (data) {
-                    fl = data->flags;
-                    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
-                    if (is_inf)
-                        data->flags |= SF_IS_INF;
-                }
-                if (flags & SCF_DO_STCLASS) {
-                    ssc_init(pRExC_state, &this_class);
-                    oclass = data->start_class;
-                    data->start_class = &this_class;
-                    f |= SCF_DO_STCLASS_AND;
-                    f &= ~SCF_DO_STCLASS_OR;
-                }
-                /* Exclude from super-linear cache processing any {n,m}
-                   regops for which the combination of input pos and regex
-                   pos is not enough information to determine if a match
-                   will be possible.
-
-                   For example, in the regex /foo(bar\s*){4,8}baz/ with the
-                   regex pos at the \s*, the prospects for a match depend not
-                   only on the input position but also on how many (bar\s*)
-                   repeats into the {4,8} we are. */
-               if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY))
-                    f &= ~SCF_WHILEM_VISITED_POS;
-
-                /* This will finish on WHILEM, setting scan, or on NULL: */
-                /* recurse study_chunk() on loop bodies */
-                minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
-                                  last, data, stopparen, recursed_depth, NULL,
-                                  (mincount == 0
-                                   ? (f & ~SCF_DO_SUBSTR)
-                                   : f)
-                                  , depth+1, mutate_ok);
-
-                if (data && data->flags & SCF_SEEN_ACCEPT) {
-                    if (mincount > 1)
-                        mincount = 1;
-                }
-
-                if (flags & SCF_DO_STCLASS)
-                    data->start_class = oclass;
-                if (mincount == 0 || minnext == 0) {
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        /* Switch to OR mode: cache the old value of
-                         * data->start_class */
-                        INIT_AND_WITHP;
-                        StructCopy(data->start_class, and_withp, regnode_ssc);
-                        flags &= ~SCF_DO_STCLASS_AND;
-                        StructCopy(&this_class, data->start_class, regnode_ssc);
-                        flags |= SCF_DO_STCLASS_OR;
-                        ANYOF_FLAGS(data->start_class)
-                                                |= SSC_MATCHES_EMPTY_STRING;
-                    }
-                } else {                /* Non-zero len */
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND)
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-                if (!scan)              /* It was not CURLYX, but CURLY. */
-                    scan = next;
-                if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
-                    /* ? quantifier ok, except for (?{ ... }) */
-                    && (next_is_eval || !(mincount == 0 && maxcount == 1))
-                    && (minnext == 0) && (deltanext == 0)
-                    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                    && maxcount <= REG_INFTY/3) /* Complement check for big
-                                                   count */
-                {
-                    _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
-                        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
-                            "Quantifier unexpected on zero-length expression "
-                            "in regex m/%" UTF8f "/",
-                             UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
-                                  RExC_precomp)));
-                }
-
-                if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
-                    || min >= SSize_t_MAX - minnext * mincount )
-                {
-                    FAIL("Regexp out of space");
-                }
-
-                min += minnext * mincount;
-                is_inf_internal |= deltanext == OPTIMIZE_INFTY
-                         || (maxcount == REG_INFTY && minnext + deltanext > 0);
-                is_inf |= is_inf_internal;
-                if (is_inf) {
-                    delta = OPTIMIZE_INFTY;
-                } else {
-                    delta += (minnext + deltanext) * maxcount
-                             - minnext * mincount;
-                }
-
-                if (data && data->flags & SCF_SEEN_ACCEPT) {
-                    if (flags & SCF_DO_SUBSTR) {
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        flags &= ~SCF_DO_SUBSTR;
-                    }
-                    if (stopmin > min)
-                        stopmin = min;
-                    DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
-                }
-                /* Try powerful optimization CURLYX => CURLYN. */
-                if (  OP(oscan) == CURLYX && data
-                      && data->flags & SF_IN_PAR
-                      && !(data->flags & SF_HAS_EVAL)
-                      && !deltanext && minnext == 1
-                      && mutate_ok
-                ) {
-                    /* Try to optimize to CURLYN.  */
-                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX);
-                    regnode * const nxt1 = nxt;
-#ifdef DEBUGGING
-                    regnode *nxt2;
-#endif
-
-                    /* Skip open. */
-                    nxt = regnext(nxt);
-                    if (!REGNODE_SIMPLE(OP(nxt))
-                        && !(REGNODE_TYPE(OP(nxt)) == EXACT
-                             && STR_LEN(nxt) == 1))
-                        goto nogo;
-#ifdef DEBUGGING
-                    nxt2 = nxt;
-#endif
-                    nxt = regnext(nxt);
-                    if (OP(nxt) != CLOSE)
-                        goto nogo;
-                    if (RExC_open_parens) {
-
-                        /*open->CURLYM*/
-                        RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
-
-                        /*close->while*/
-                        RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
-                    }
-                    /* Now we know that nxt2 is the only contents: */
-                    oscan->flags = (U8)PARNO(nxt);
-                    OP(oscan) = CURLYN;
-                    OP(nxt1) = NOTHING; /* was OPEN. */
-
-#ifdef DEBUGGING
-                    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
-                    NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
-                    NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
-                    OP(nxt) = OPTIMIZED;        /* was CLOSE. */
-                    OP(nxt + 1) = OPTIMIZED; /* was count. */
-                    NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
-#endif
-                }
-              nogo:
-
-                /* Try optimization CURLYX => CURLYM. */
-                if (  OP(oscan) == CURLYX && data
-                      && !(data->flags & SF_HAS_PAR)
-                      && !(data->flags & SF_HAS_EVAL)
-                      && !deltanext     /* atom is fixed width */
-                      && minnext != 0   /* CURLYM can't handle zero width */
-                         /* Nor characters whose fold at run-time may be
-                          * multi-character */
-                      && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
-                      && mutate_ok
-                ) {
-                    /* XXXX How to optimize if data == 0? */
-                    /* Optimize to a simpler form.  */
-                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX); /* OPEN */
-                    regnode *nxt2;
-
-                    OP(oscan) = CURLYM;
-                    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
-                            && (OP(nxt2) != WHILEM))
-                        nxt = nxt2;
-                    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
-                    /* Need to optimize away parenths. */
-                    if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
-                        /* Set the parenth number.  */
-                        /* note that we have changed the type of oscan to CURLYM here */
-                        regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/
-
-                        oscan->flags = (U8)PARNO(nxt);
-                        if (RExC_open_parens) {
-                             /*open->CURLYM*/
-                            RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
-
-                            /*close->NOTHING*/
-                            RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt2)
-                                                         + 1;
-                        }
-                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
-                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
-
-#ifdef DEBUGGING
-                        OP(nxt1 + 1) = OPTIMIZED; /* was count. */
-                        OP(nxt + 1) = OPTIMIZED; /* was count. */
-                        NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
-                        NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
-#endif
-#if 0
-                        while ( nxt1 && (OP(nxt1) != WHILEM)) {
-                            regnode *nnxt = regnext(nxt1);
-                            if (nnxt == nxt) {
-                                if (REGNODE_OFF_BY_ARG(OP(nxt1)))
-                                    ARG_SET(nxt1, nxt2 - nxt1);
-                                else if (nxt2 - nxt1 < U16_MAX)
-                                    NEXT_OFF(nxt1) = nxt2 - nxt1;
-                                else
-                                    OP(nxt) = NOTHING;  /* Cannot beautify */
-                            }
-                            nxt1 = nnxt;
-                        }
-#endif
-                        /* Optimize again: */
-                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
-                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
-                                    NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1, mutate_ok);
-                    }
-                    else
-                        oscan->flags = 0;
-                }
-                else if ((OP(oscan) == CURLYX)
-                         && (flags & SCF_WHILEM_VISITED_POS)
-                         /* See the comment on a similar expression above.
-                            However, this time it's not a subexpression
-                            we care about, but the expression itself. */
-                         && (maxcount == REG_INFTY)
-                         && data) {
-                    /* This stays as CURLYX, we can put the count/of pair. */
-                    /* Find WHILEM (as in regexec.c) */
-                    regnode *nxt = oscan + NEXT_OFF(oscan);
-
-                    if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
-                        nxt += ARG(nxt);
-                    nxt = REGNODE_BEFORE(nxt);
-                    if (nxt->flags & 0xf) {
-                        /* we've already set whilem count on this node */
-                    } else if (++data->whilem_c < 16) {
-                        assert(data->whilem_c <= RExC_whilem_seen);
-                        nxt->flags = (U8)(data->whilem_c
-                            | (RExC_whilem_seen << 4)); /* On WHILEM */
-                    }
-                }
-                if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
-                    pars++;
-                if (flags & SCF_DO_SUBSTR) {
-                    SV *last_str = NULL;
-                    STRLEN last_chrs = 0;
-                    int counted = mincount != 0;
-
-                    if (data->last_end > 0 && mincount != 0) { /* Ends with a
-                                                                  string. */
-                        SSize_t b = pos_before >= data->last_start_min
-                            ? pos_before : data->last_start_min;
-                        STRLEN l;
-                        const char * const s = SvPV_const(data->last_found, l);
-                        SSize_t old = b - data->last_start_min;
-                        assert(old >= 0);
-
-                        if (UTF)
-                            old = utf8_hop_forward((U8*)s, old,
-                                               (U8 *) SvEND(data->last_found))
-                                - (U8*)s;
-                        l -= old;
-                        /* Get the added string: */
-                        last_str = newSVpvn_utf8(s  + old, l, UTF);
-                        last_chrs = UTF ? utf8_length((U8*)(s + old),
-                                            (U8*)(s + old + l)) : l;
-                        if (deltanext == 0 && pos_before == b) {
-                            /* What was added is a constant string */
-                            if (mincount > 1) {
-
-                                SvGROW(last_str, (mincount * l) + 1);
-                                repeatcpy(SvPVX(last_str) + l,
-                                          SvPVX_const(last_str), l,
-                                          mincount - 1);
-                                SvCUR_set(last_str, SvCUR(last_str) * mincount);
-                                /* Add additional parts. */
-                                SvCUR_set(data->last_found,
-                                          SvCUR(data->last_found) - l);
-                                sv_catsv(data->last_found, last_str);
-                                {
-                                    SV * sv = data->last_found;
-                                    MAGIC *mg =
-                                        SvUTF8(sv) && SvMAGICAL(sv) ?
-                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
-                                    if (mg && mg->mg_len >= 0)
-                                        mg->mg_len += last_chrs * (mincount-1);
-                                }
-                                last_chrs *= mincount;
-                                data->last_end += l * (mincount - 1);
-                            }
-                        } else {
-                            /* start offset must point into the last copy */
-                            data->last_start_min += minnext * (mincount - 1);
-                            data->last_start_max =
-                              is_inf
-                               ? OPTIMIZE_INFTY
-                               : data->last_start_max +
-                                 (maxcount - 1) * (minnext + data->pos_delta);
-                        }
-                    }
-                    /* It is counted once already... */
-                    data->pos_min += minnext * (mincount - counted);
-#if 0
-    Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
-                              " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
-                              " maxcount=%" UVuf " mincount=%" UVuf
-                              " data->pos_delta=%" UVuf "\n",
-        (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext,
-        (UV)maxcount, (UV)mincount, (UV)data->pos_delta);
-    if (deltanext != OPTIMIZE_INFTY)
-        Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
-            (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
-            - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
-#endif
-                    if (deltanext == OPTIMIZE_INFTY
-                        || data->pos_delta == OPTIMIZE_INFTY
-                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
-                        data->pos_delta = OPTIMIZE_INFTY;
-                    else
-                        data->pos_delta += - counted * deltanext +
-                        (minnext + deltanext) * maxcount - minnext * mincount;
-                    if (mincount != maxcount) {
-                         /* Cannot extend fixed substrings found inside
-                            the group.  */
-                        scan_commit(pRExC_state, data, minlenp, is_inf);
-                        if (mincount && last_str) {
-                            SV * const sv = data->last_found;
-                            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
-                                mg_find(sv, PERL_MAGIC_utf8) : NULL;
-
-                            if (mg)
-                                mg->mg_len = -1;
-                            sv_setsv(sv, last_str);
-                            data->last_end = data->pos_min;
-                            data->last_start_min = data->pos_min - last_chrs;
-                            data->last_start_max = is_inf
-                                ? OPTIMIZE_INFTY
-                                : data->pos_min + data->pos_delta - last_chrs;
-                        }
-                        data->cur_is_floating = 1; /* float */
-                    }
-                    SvREFCNT_dec(last_str);
-                }
-                if (data && (fl & SF_HAS_EVAL))
-                    data->flags |= SF_HAS_EVAL;
-              optimize_curly_tail:
-                rck_elide_nothing(oscan);
-                continue;
-
-            default:
-                Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
-                                                                    OP(scan));
-            case REF:
-            case CLUMP:
-                if (flags & SCF_DO_SUBSTR) {
-                    /* Cannot expect anything... */
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                if (flags & SCF_DO_STCLASS_OR) {
-                    if (OP(scan) == CLUMP) {
-                        /* Actually is any start char, but very few code points
-                         * aren't start characters */
-                        ssc_match_all_cp(data->start_class);
-                    }
-                    else {
-                        ssc_anything(data->start_class);
-                    }
-                }
-                flags &= ~SCF_DO_STCLASS;
-                break;
-            }
-        }
-        else if (OP(scan) == LNBREAK) {
-            if (flags & SCF_DO_STCLASS) {
-                if (flags & SCF_DO_STCLASS_AND) {
-                    ssc_intersection(data->start_class,
-                                    PL_XPosix_ptrs[CC_VERTSPACE_], FALSE);
-                    ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class)
-                                                &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                else if (flags & SCF_DO_STCLASS_OR) {
-                    ssc_union(data->start_class,
-                              PL_XPosix_ptrs[CC_VERTSPACE_],
-                              FALSE);
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                    /* See commit msg for
-                     * 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class)
-                                                &= ~SSC_MATCHES_EMPTY_STRING;
-                }
-                flags &= ~SCF_DO_STCLASS;
-            }
-            min++;
-            if (delta != OPTIMIZE_INFTY)
-                delta++;    /* Because of the 2 char string cr-lf */
-            if (flags & SCF_DO_SUBSTR) {
-                /* Cannot expect anything... */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min += 1;
-                if (data->pos_delta != OPTIMIZE_INFTY) {
-                    data->pos_delta += 1;
-                }
-                data->cur_is_floating = 1; /* float */
-            }
-        }
-        else if (REGNODE_SIMPLE(OP(scan))) {
-
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min++;
-            }
-            min++;
-            if (flags & SCF_DO_STCLASS) {
-                bool invert = 0;
-                SV* my_invlist = NULL;
-                U8 namedclass;
-
-                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-
-                /* Some of the logic below assumes that switching
-                   locale on will only add false positives. */
-                switch (OP(scan)) {
-
-                default:
-#ifdef DEBUGGING
-                   Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
-                                                                     OP(scan));
-#endif
-                case SANY:
-                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                        ssc_match_all_cp(data->start_class);
-                    break;
-
-                case REG_ANY:
-                    {
-                        SV* REG_ANY_invlist = _new_invlist(2);
-                        REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist,
-                                                            '\n');
-                        if (flags & SCF_DO_STCLASS_OR) {
-                            ssc_union(data->start_class,
-                                      REG_ANY_invlist,
-                                      TRUE /* TRUE => invert, hence all but \n
-                                            */
-                                      );
-                        }
-                        else if (flags & SCF_DO_STCLASS_AND) {
-                            ssc_intersection(data->start_class,
-                                             REG_ANY_invlist,
-                                             TRUE  /* TRUE => invert */
-                                             );
-                            ssc_clear_locale(data->start_class);
-                        }
-                        SvREFCNT_dec_NN(REG_ANY_invlist);
-                    }
-                    break;
-
-                case ANYOFD:
-                case ANYOFL:
-                case ANYOFPOSIXL:
-                case ANYOFH:
-                case ANYOFHb:
-                case ANYOFHr:
-                case ANYOFHs:
-                case ANYOF:
-                    if (flags & SCF_DO_STCLASS_AND)
-                        ssc_and(pRExC_state, data->start_class,
-                                (regnode_charclass *) scan);
-                    else
-                        ssc_or(pRExC_state, data->start_class,
-                                                          (regnode_charclass *) scan);
-                    break;
-
-                case ANYOFHbbm:
-                  {
-                    SV* cp_list = get_ANYOFHbbm_contents(scan);
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case NANYOFM: /* NANYOFM already contains the inversion of the
-                                 input ANYOF data, so, unlike things like
-                                 NPOSIXA, don't change 'invert' to TRUE */
-                    /* FALLTHROUGH */
-                case ANYOFM:
-                  {
-                    SV* cp_list = get_ANYOFM_contents(scan);
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case ANYOFR:
-                case ANYOFRb:
-                  {
-                    SV* cp_list = NULL;
-
-                    cp_list = _add_range_to_invlist(cp_list,
-                                        ANYOFRbase(scan),
-                                        ANYOFRbase(scan) + ANYOFRdelta(scan));
-
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class, cp_list, invert);
-                    }
-                    else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, cp_list, invert);
-                    }
-
-                    SvREFCNT_dec_NN(cp_list);
-                    break;
-                  }
-
-                case NPOSIXL:
-                    invert = 1;
-                    /* FALLTHROUGH */
-
-                case POSIXL:
-                    namedclass = classnum_to_namedclass(FLAGS(scan)) + invert;
-                    if (flags & SCF_DO_STCLASS_AND) {
-                        bool was_there = cBOOL(
-                                          ANYOF_POSIXL_TEST(data->start_class,
-                                                                 namedclass));
-                        ANYOF_POSIXL_ZERO(data->start_class);
-                        if (was_there) {    /* Do an AND */
-                            ANYOF_POSIXL_SET(data->start_class, namedclass);
-                        }
-                        /* No individual code points can now match */
-                        data->start_class->invlist
-                                                = sv_2mortal(_new_invlist(0));
-                    }
-                    else {
-                        int complement = namedclass + ((invert) ? -1 : 1);
-
-                        assert(flags & SCF_DO_STCLASS_OR);
-
-                        /* If the complement of this class was already there,
-                         * the result is that they match all code points,
-                         * (\d + \D == everything).  Remove the classes from
-                         * future consideration.  Locale is not relevant in
-                         * this case */
-                        if (ANYOF_POSIXL_TEST(data->start_class, complement)) {
-                            ssc_match_all_cp(data->start_class);
-                            ANYOF_POSIXL_CLEAR(data->start_class, namedclass);
-                            ANYOF_POSIXL_CLEAR(data->start_class, complement);
-                        }
-                        else {  /* The usual case; just add this class to the
-                                   existing set */
-                            ANYOF_POSIXL_SET(data->start_class, namedclass);
-                        }
-                    }
-                    break;
-
-                case NPOSIXA:   /* For these, we always know the exact set of
-                                   what's matched */
-                    invert = 1;
-                    /* FALLTHROUGH */
-                case POSIXA:
-                    my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
-                    goto join_posix_and_ascii;
-
-                case NPOSIXD:
-                case NPOSIXU:
-                    invert = 1;
-                    /* FALLTHROUGH */
-                case POSIXD:
-                case POSIXU:
-                    my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
-
-                    /* NPOSIXD matches all upper Latin1 code points unless the
-                     * target string being matched is UTF-8, which is
-                     * unknowable until match time.  Since we are going to
-                     * invert, we want to get rid of all of them so that the
-                     * inversion will match all */
-                    if (OP(scan) == NPOSIXD) {
-                        _invlist_subtract(my_invlist, PL_UpperLatin1,
-                                          &my_invlist);
-                    }
-
-                  join_posix_and_ascii:
-
-                    if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class, my_invlist, invert);
-                        ssc_clear_locale(data->start_class);
-                    }
-                    else {
-                        assert(flags & SCF_DO_STCLASS_OR);
-                        ssc_union(data->start_class, my_invlist, invert);
-                    }
-                    SvREFCNT_dec(my_invlist);
-                }
-                if (flags & SCF_DO_STCLASS_OR)
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                flags &= ~SCF_DO_STCLASS;
-            }
-        }
-        else if (REGNODE_TYPE(OP(scan)) == EOL && flags & SCF_DO_SUBSTR) {
-            data->flags |= (OP(scan) == MEOL
-                            ? SF_BEFORE_MEOL
-                            : SF_BEFORE_SEOL);
-            scan_commit(pRExC_state, data, minlenp, is_inf);
-
-        }
-        else if (  REGNODE_TYPE(OP(scan)) == BRANCHJ
-                 /* Lookbehind, or need to calculate parens/evals/stclass: */
-                   && (scan->flags || data || (flags & SCF_DO_STCLASS))
-                   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
-        {
-            if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
-                || OP(scan) == UNLESSM )
-            {
-                /* Negative Lookahead/lookbehind
-                   In this case we can't do fixed string optimisation.
-                */
-
-                bool is_positive = OP(scan) == IFMATCH ? 1 : 0;
-                SSize_t deltanext, minnext;
-                SSize_t fake_last_close = 0;
-                regnode *fake_last_close_op = NULL;
-                regnode *cur_last_close_op;
-                regnode *nscan;
-                regnode_ssc intrnl;
-                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-
-                StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                if (data) {
-                    data_fake.whilem_c = data->whilem_c;
-                    data_fake.last_closep = data->last_closep;
-                    data_fake.last_close_opp = data->last_close_opp;
-                }
-                else {
-                    data_fake.last_closep = &fake_last_close;
-                    data_fake.last_close_opp = &fake_last_close_op;
-                }
-
-                /* remember the last_close_op we saw so we can see if
-                 * we are dealing with variable length lookbehind that
-                 * contains capturing buffers, which are considered
-                 * experimental */
-                cur_last_close_op= *(data_fake.last_close_opp);
-
-                data_fake.pos_delta = delta;
-                if ( flags & SCF_DO_STCLASS && !scan->flags
-                     && OP(scan) == IFMATCH ) { /* Lookahead */
-                    ssc_init(pRExC_state, &intrnl);
-                    data_fake.start_class = &intrnl;
-                    f |= SCF_DO_STCLASS_AND;
-                }
-                if (flags & SCF_WHILEM_VISITED_POS)
-                    f |= SCF_WHILEM_VISITED_POS;
-                next = regnext(scan);
-                nscan = REGNODE_AFTER(scan);
-
-                /* recurse study_chunk() for lookahead body */
-                minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
-                                      last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1,
-                                      mutate_ok);
-
-                if (scan->flags) {
-                    if (   deltanext < 0
-                        || deltanext > (I32) U8_MAX
-                        || minnext > (I32)U8_MAX
-                        || minnext + deltanext > (I32)U8_MAX)
-                    {
-                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
-                              (UV)U8_MAX);
-                    }
-
-                    /* The 'next_off' field has been repurposed to count the
-                     * additional starting positions to try beyond the initial
-                     * one.  (This leaves it at 0 for non-variable length
-                     * matches to avoid breakage for those not using this
-                     * extension) */
-                    if (deltanext)  {
-                        scan->next_off = deltanext;
-                        if (
-                            /* See a CLOSE op inside this lookbehind? */
-                            cur_last_close_op != *(data_fake.last_close_opp)
-                            /* and not doing restudy. see: restudied */
-                            && !(flags & SCF_TRIE_DOING_RESTUDY)
-                        ) {
-                            /* this is positive variable length lookbehind with
-                             * capture buffers inside of it */
-                            ckWARNexperimental_with_arg(RExC_parse,
-                                WARN_EXPERIMENTAL__VLB,
-                                "Variable length %s lookbehind with capturing is experimental",
-                                is_positive ? "positive" : "negative");
-                        }
-                    }
-                    scan->flags = (U8)minnext + deltanext;
-                }
-                if (data) {
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SF_HAS_EVAL)
-                        data->flags |= SF_HAS_EVAL;
-                    data->whilem_c = data_fake.whilem_c;
-                }
-                if (f & SCF_DO_STCLASS_AND) {
-                    if (flags & SCF_DO_STCLASS_OR) {
-                        /* OR before, AND after: ideally we would recurse with
-                         * data_fake to get the AND applied by study of the
-                         * remainder of the pattern, and then derecurse;
-                         * *** HACK *** for now just treat as "no information".
-                         * See [perl #56690].
-                         */
-                        ssc_init(pRExC_state, data->start_class);
-                    }  else {
-                        /* AND before and after: combine and continue.  These
-                         * assertions are zero-length, so can match an EMPTY
-                         * string */
-                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
-                        ANYOF_FLAGS(data->start_class)
-                                                   |= SSC_MATCHES_EMPTY_STRING;
-                    }
-                }
-                DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
-            }
-#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
-            else {
-                /* Positive Lookahead/lookbehind
-                   In this case we can do fixed string optimisation,
-                   but we must be careful about it. Note in the case of
-                   lookbehind the positions will be offset by the minimum
-                   length of the pattern, something we won't know about
-                   until after the recurse.
-                */
-                SSize_t deltanext, fake_last_close = 0;
-                regnode *last_close_op = NULL;
-                regnode *nscan;
-                regnode_ssc intrnl;
-                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                /* We use SAVEFREEPV so that when the full compile
-                    is finished perl will clean up the allocated
-                    minlens when it's all done. This way we don't
-                    have to worry about freeing them when we know
-                    they wont be used, which would be a pain.
-                 */
-                SSize_t *minnextp;
-                Newx( minnextp, 1, SSize_t );
-                SAVEFREEPV(minnextp);
-
-                if (data) {
-                    StructCopy(data, &data_fake, scan_data_t);
-                    if ((flags & SCF_DO_SUBSTR) && data->last_found) {
-                        f |= SCF_DO_SUBSTR;
-                        if (scan->flags)
-                            scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
-                        data_fake.last_found=newSVsv(data->last_found);
-                    }
-                }
-                else {
-                    data_fake.last_closep = &fake_last_close;
-                    data_fake.last_close_opp = &fake_last_close_opp;
-                }
-                data_fake.flags = 0;
-                data_fake.substrs[0].flags = 0;
-                data_fake.substrs[1].flags = 0;
-                data_fake.pos_delta = delta;
-                if (is_inf)
-                    data_fake.flags |= SF_IS_INF;
-                if ( flags & SCF_DO_STCLASS && !scan->flags
-                     && OP(scan) == IFMATCH ) { /* Lookahead */
-                    ssc_init(pRExC_state, &intrnl);
-                    data_fake.start_class = &intrnl;
-                    f |= SCF_DO_STCLASS_AND;
-                }
-                if (flags & SCF_WHILEM_VISITED_POS)
-                    f |= SCF_WHILEM_VISITED_POS;
-                next = regnext(scan);
-                nscan = REGNODE_AFTER(scan);
-
-                /* positive lookahead study_chunk() recursion */
-                *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
-                                        &deltanext, last, &data_fake,
-                                        stopparen, recursed_depth, NULL,
-                                        f, depth+1, mutate_ok);
-                if (scan->flags) {
-                    assert(0);  /* This code has never been tested since this
-                                   is normally not compiled */
-                    if (   deltanext < 0
-                        || deltanext > (I32) U8_MAX
-                        || *minnextp > (I32)U8_MAX
-                        || *minnextp + deltanext > (I32)U8_MAX)
-                    {
-                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
-                              (UV)U8_MAX);
-                    }
-
-                    if (deltanext) {
-                        scan->next_off = deltanext;
-                    }
-                    scan->flags = (U8)*minnextp + deltanext;
-                }
-
-                *minnextp += min;
-
-                if (f & SCF_DO_STCLASS_AND) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
-                    ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
-                }
-                if (data) {
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SF_HAS_EVAL)
-                        data->flags |= SF_HAS_EVAL;
-                    data->whilem_c = data_fake.whilem_c;
-                    if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
-                        int i;
-                        if (RExC_rx->minlen < *minnextp)
-                            RExC_rx->minlen = *minnextp;
-                        scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
-                        SvREFCNT_dec_NN(data_fake.last_found);
-
-                        for (i = 0; i < 2; i++) {
-                            if (data_fake.substrs[i].minlenp != minlenp) {
-                                data->substrs[i].min_offset =
-                                            data_fake.substrs[i].min_offset;
-                                data->substrs[i].max_offset =
-                                            data_fake.substrs[i].max_offset;
-                                data->substrs[i].minlenp =
-                                            data_fake.substrs[i].minlenp;
-                                data->substrs[i].lookbehind += scan->flags;
-                            }
-                        }
-                    }
-                }
-            }
-#endif
-        }
-        else if (OP(scan) == OPEN) {
-            if (stopparen != (I32)PARNO(scan))
-                pars++;
-        }
-        else if (OP(scan) == CLOSE) {
-            if (stopparen == (I32)PARNO(scan)) {
-                break;
-            }
-            if ((I32)PARNO(scan) == is_par) {
-                next = regnext(scan);
-
-                if ( next && (OP(next) != WHILEM) && next < last)
-                    is_par = 0;         /* Disable optimization */
-            }
-            if (data) {
-                *(data->last_closep) = PARNO(scan);
-                *(data->last_close_opp) = scan;
-            }
-        }
-        else if (OP(scan) == EVAL) {
-            if (data)
-                data->flags |= SF_HAS_EVAL;
-        }
-        else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                flags &= ~SCF_DO_SUBSTR;
-            }
-            if (OP(scan)==ACCEPT) {
-                /* m{(*ACCEPT)x} does not have to start with 'x' */
-                flags &= ~SCF_DO_STCLASS;
-                if (data)
-                    data->flags |= SCF_SEEN_ACCEPT;
-                if (stopmin > min)
-                    stopmin = min;
-            }
-        }
-        else if (OP(scan) == COMMIT) {
-            /* gh18770: m{abc(*COMMIT)xyz} must fail on "abc abcxyz", so we
-             * must not end up with "abcxyz" as a fixed substring else we'll
-             * skip straight to attempting to match at offset 4.
-             */
-            if (flags & SCF_DO_SUBSTR) {
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                flags &= ~SCF_DO_SUBSTR;
-            }
-        }
-        else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
-        {
-                if (flags & SCF_DO_SUBSTR) {
-                    scan_commit(pRExC_state, data, minlenp, is_inf);
-                    data->cur_is_floating = 1; /* float */
-                }
-                is_inf = is_inf_internal = 1;
-                if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
-                    ssc_anything(data->start_class);
-                flags &= ~SCF_DO_STCLASS;
-        }
-        else if (OP(scan) == GPOS) {
-            if (!(RExC_rx->intflags & PREGf_GPOS_FLOAT) &&
-                !(delta || is_inf || (data && data->pos_delta)))
-            {
-                if (!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR))
-                    RExC_rx->intflags |= PREGf_ANCH_GPOS;
-                if (RExC_rx->gofs < (STRLEN)min)
-                    RExC_rx->gofs = min;
-            } else {
-                RExC_rx->intflags |= PREGf_GPOS_FLOAT;
-                RExC_rx->gofs = 0;
-            }
-        }
-#ifdef TRIE_STUDY_OPT
-#ifdef FULL_TRIE_STUDY
-        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
-            /* NOTE - There is similar code to this block above for handling
-               BRANCH nodes on the initial study.  If you change stuff here
-               check there too. */
-            regnode *trie_node= scan;
-            regnode *tail= regnext(scan);
-            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
-            regnode_ssc accum;
-
-            if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
-                /* Cannot merge strings after this. */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-            }
-            if (flags & SCF_DO_STCLASS)
-                ssc_init_zero(pRExC_state, &accum);
-
-            if (!trie->jump) {
-                min1= trie->minlen;
-                max1= trie->maxlen;
-            } else {
-                const regnode *nextbranch= NULL;
-                U32 word;
-
-                for ( word=1 ; word <= trie->wordcount ; word++)
-                {
-                    SSize_t deltanext = 0, minnext = 0;
-                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
-                    SSize_t fake_last_close = 0;
-                    regnode *fake_last_close_op = NULL;
-                    regnode_ssc this_class;
-
-                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-                    if (data) {
-                        data_fake.whilem_c = data->whilem_c;
-                        data_fake.last_closep = data->last_closep;
-                        data_fake.last_close_opp = data->last_close_opp;
-                    }
-                    else {
-                        data_fake.last_closep = &fake_last_close;
-                        data_fake.last_close_opp = &fake_last_close_op;
-                    }
-                    data_fake.pos_delta = delta;
-                    if (flags & SCF_DO_STCLASS) {
-                        ssc_init(pRExC_state, &this_class);
-                        data_fake.start_class = &this_class;
-                        f |= SCF_DO_STCLASS_AND;
-                    }
-                    if (flags & SCF_WHILEM_VISITED_POS)
-                        f |= SCF_WHILEM_VISITED_POS;
-
-                    if (trie->jump[word]) {
-                        if (!nextbranch)
-                            nextbranch = trie_node + trie->jump[0];
-                        scan= trie_node + trie->jump[word];
-                        /* We go from the jump point to the branch that follows
-                           it. Note this means we need the vestigal unused
-                           branches even though they arent otherwise used. */
-                        /* optimise study_chunk() for TRIE */
-                        minnext = study_chunk(pRExC_state, &scan, minlenp,
-                            &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1,
-                            mutate_ok);
-                    }
-                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
-                        nextbranch= regnext((regnode*)nextbranch);
-
-                    if (min1 > (SSize_t)(minnext + trie->minlen))
-                        min1 = minnext + trie->minlen;
-                    if (deltanext == OPTIMIZE_INFTY) {
-                        is_inf = is_inf_internal = 1;
-                        max1 = OPTIMIZE_INFTY;
-                    } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
-                        max1 = minnext + deltanext + trie->maxlen;
-
-                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
-                        pars++;
-                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
-                        if ( stopmin > min + min1)
-                            stopmin = min + min1;
-                        flags &= ~SCF_DO_SUBSTR;
-                        if (data)
-                            data->flags |= SCF_SEEN_ACCEPT;
-                    }
-                    if (data) {
-                        if (data_fake.flags & SF_HAS_EVAL)
-                            data->flags |= SF_HAS_EVAL;
-                        data->whilem_c = data_fake.whilem_c;
-                    }
-                    if (flags & SCF_DO_STCLASS)
-                        ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
-                }
-                DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
-            }
-            if (flags & SCF_DO_SUBSTR) {
-                data->pos_min += min1;
-                data->pos_delta += max1 - min1;
-                if (max1 != min1 || is_inf)
-                    data->cur_is_floating = 1; /* float */
-            }
-            min += min1;
-            if (delta != OPTIMIZE_INFTY) {
-                if (OPTIMIZE_INFTY - (max1 - min1) >= delta)
-                    delta += max1 - min1;
-                else
-                    delta = OPTIMIZE_INFTY;
-            }
-            if (flags & SCF_DO_STCLASS_OR) {
-                ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                if (min1) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-            }
-            else if (flags & SCF_DO_STCLASS_AND) {
-                if (min1) {
-                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
-                    flags &= ~SCF_DO_STCLASS;
-                }
-                else {
-                    /* Switch to OR mode: cache the old value of
-                     * data->start_class */
-                    INIT_AND_WITHP;
-                    StructCopy(data->start_class, and_withp, regnode_ssc);
-                    flags &= ~SCF_DO_STCLASS_AND;
-                    StructCopy(&accum, data->start_class, regnode_ssc);
-                    flags |= SCF_DO_STCLASS_OR;
-                }
-            }
-            scan= tail;
-            DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
-            continue;
-        }
-#else
-        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
-            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            U8*bang=NULL;
-
-            min += trie->minlen;
-            delta += (trie->maxlen - trie->minlen);
-            flags &= ~SCF_DO_STCLASS; /* xxx */
-            if (flags & SCF_DO_SUBSTR) {
-                /* Cannot expect anything... */
-                scan_commit(pRExC_state, data, minlenp, is_inf);
-                data->pos_min += trie->minlen;
-                data->pos_delta += (trie->maxlen - trie->minlen);
-                if (trie->maxlen != trie->minlen)
-                    data->cur_is_floating = 1; /* float */
-            }
-            if (trie->jump) /* no more substrings -- for now /grr*/
-               flags &= ~SCF_DO_SUBSTR;
-        }
-
-#endif /* old or new */
-#endif /* TRIE_STUDY_OPT */
-
-        else if (OP(scan) == REGEX_SET) {
-            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
-                             " before optimization", REGNODE_NAME(REGEX_SET));
-        }
-
-        /* Else: zero-length, ignore. */
-        scan = regnext(scan);
-    }
-
-  finish:
-    if (frame) {
-        /* we need to unwind recursion. */
-        depth = depth - 1;
-
-        DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
-        DEBUG_PEEP("fend", scan, depth, flags);
-
-        /* restore previous context */
-        last = frame->last_regnode;
-        scan = frame->next_regnode;
-        stopparen = frame->stopparen;
-        recursed_depth = frame->prev_recursed_depth;
-
-        RExC_frame_last = frame->prev_frame;
-        frame = frame->this_prev_frame;
-        goto fake_study_recurse;
-    }
-
-    assert(!frame);
-    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
-
-    /* is this pattern infinite? Eg, consider /(a|b+)/ */
-    if (is_inf_internal)
-        delta = OPTIMIZE_INFTY;
-
-    /* deal with (*ACCEPT), Eg, consider /(foo(*ACCEPT)|bop)bar/ */
-    if (min > stopmin) {
-        /*
-        At this point 'min' represents the minimum length string we can
-        match while *ignoring* the implication of ACCEPT, and 'delta'
-        represents the difference between the minimum length and maximum
-        length, and if the pattern matches an infinitely long string
-        (consider the + and * quantifiers) then we use the special delta
-        value of OPTIMIZE_INFTY to represent it. 'stopmin' is the
-        minimum length that can be matched *and* accepted.
-
-        A pattern is accepted when matching was successful *and*
-        complete, and thus there is no further matching needing to be
-        done, no backtracking to occur, etc. Prior to the introduction
-        of ACCEPT the only opcode that signaled acceptance was the END
-        opcode, which is always the very last opcode in a regex program.
-        ACCEPT is thus conceptually an early successful return out of
-        the matching process. stopmin starts out as OPTIMIZE_INFTY to
-        represent "the entire pattern", and is ratched down to the
-        "current min" if necessary when an ACCEPT opcode is encountered.
-
-        Thus stopmin might be smaller than min if we saw an (*ACCEPT),
-        and we now need to account for it in both min and delta.
-        Consider that in a pattern /AB/ normally the min length it can
-        match can be computed as min(A)+min(B). But (*ACCEPT) means
-        that it might be something else, not even neccesarily min(A) at
-        all. Consider
-
-             A  = /(foo(*ACCEPT)|x+)/
-             B  = /whop/
-             AB = /(foo(*ACCEPT)|x+)whop/
-
-        The min for A is 1 for "x" and the delta for A is OPTIMIZE_INFTY
-        for "xxxxx...", its stopmin is 3 for "foo". The min for B is 4 for
-        "whop", and the delta of 0 as the pattern is of fixed length, the
-        stopmin would be OPTIMIZE_INFTY as it does not contain an ACCEPT.
-        When handling AB we expect to see a min of 5 for "xwhop", and a
-        delta of OPTIMIZE_INFTY for "xxxxx...whop", and a stopmin of 3
-        for "foo". This should result in a final min of 3 for "foo", and
-        a final delta of OPTIMIZE_INFTY for "xxxxx...whop".
-
-        In something like /(dude(*ACCEPT)|irk)x{3,7}/ we would have a
-        min of 6 for "irkxxx" and a delta of 4 for "irkxxxxxxx", and the
-        stop min would be 4 for "dude". This should result in a final
-        min of 4 for "dude", and a final delta of 6, for "irkxxxxxxx".
-
-        When min is smaller than stopmin then we can ignore it. In the
-        fragment /(x{10,20}(*ACCEPT)|a)b+/, we would have a min of 2,
-        and a delta of OPTIMIZE_INFTY, and a stopmin of 10. Obviously
-        the ACCEPT doesn't reduce the minimum length of the string that
-        might be matched, nor affect the maximum length.
-
-        In something like /foo(*ACCEPT)ba?r/ we would have a min of 5
-        for "foobr", a delta of 1 for "foobar", and a stopmin of 3 for
-        "foo". We currently turn this into a min of 3 for "foo" and a
-        delta of 3 for "foobar" even though technically "foobar" isn't
-        possible. ACCEPT affects some aspects of the optimizer, like
-        length computations and mandatory substring optimizations, but
-        there are other optimzations this routine perfoms that are not
-        affected and this compromise simplifies implementation.
-
-        It might be helpful to consider that this C function is called
-        recursively on the pattern in a bottom up fashion, and that the
-        min returned by a nested call may be marked as coming from an
-        ACCEPT, causing its callers to treat the returned min as a
-        stopmin as the recursion unwinds. Thus a single ACCEPT can affect
-        multiple calls into this function in different ways.
-        */
-
-        if (OPTIMIZE_INFTY - delta >= min - stopmin)
-            delta += min - stopmin;
-        else
-            delta = OPTIMIZE_INFTY;
-        min = stopmin;
-    }
-
-    *scanp = scan;
-    *deltap = delta;
-
-    if (flags & SCF_DO_SUBSTR && is_inf)
-        data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
-    if (is_par > (I32)U8_MAX)
-        is_par = 0;
-    if (is_par && pars==1 && data) {
-        data->flags |= SF_IN_PAR;
-        data->flags &= ~SF_HAS_PAR;
-    }
-    else if (pars && data) {
-        data->flags |= SF_HAS_PAR;
-        data->flags &= ~SF_IN_PAR;
-    }
-    if (flags & SCF_DO_STCLASS_OR)
-        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-    if (flags & SCF_TRIE_RESTUDY)
-        data->flags |=  SCF_TRIE_RESTUDY;
-
-
-    if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-        if (min > OPTIMIZE_INFTY - delta)
-            RExC_maxlen = OPTIMIZE_INFTY;
-        else if (RExC_maxlen < min + delta)
-            RExC_maxlen = min + delta;
-    }
-    DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
-    return min;
-}
-
 /* add a data member to the struct reg_data attached to this regex, it should
  * always return a non-zero return. the 's' argument is the type of the items
  * being added and the n is the number of items. The length of 's' should match
  * the number of items. */
-STATIC U32
-S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
+U32
+Perl_reg_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
 {
     U32 count = RExC_rxi->data ? RExC_rxi->data->count : 1;
 
-    PERL_ARGS_ASSERT_ADD_DATA;
+    PERL_ARGS_ASSERT_REG_ADD_DATA;
 
     /* in the below expression we have (count + n - 1), the minus one is there
      * because the struct that we allocate already contains a slot for 1 data
@@ -6958,9 +320,9 @@ S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
         /* when count == 1 it means we have not initialized anything.
          * we always fill the 0 slot of the data array with a '%' entry, which
          * means "zero" (all the other types are letters) which exists purely
-         * so the return from add_data is ALWAYS true, so we can tell it apart
+         * so the return from reg_add_data is ALWAYS true, so we can tell it apart
          * from a "no value" idx=0 in places where we would return an index
-         * into add_data.  This is particularly important with the new "single
+         * into reg_add_data.  This is particularly important with the new "single
          * pass, usually, but not always" strategy that we use, where the code
          * will use a 0 to represent "not able to compute this yet".
          */
@@ -7435,9 +797,9 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                     assert(n < pRExC_state->code_blocks->count);
                     src = &ri->code_blocks->cb[i];
                     dst = &pRExC_state->code_blocks->cb[n];
-                    dst->start      = src->start + offset;
-                    dst->end        = src->end   + offset;
-                    dst->block      = src->block;
+                    dst->start	    = src->start + offset;
+                    dst->end	    = src->end   + offset;
+                    dst->block	    = src->block;
                     dst->src_regex  = (REGEXP*) SvREFCNT_inc( (SV*)
                                             src->src_regex
                                                 ? src->src_regex
@@ -7689,9 +1051,9 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 
             assert(pat[src->start] == '(');
             assert(pat[src->end]   == ')');
-            dst->start      = src->start;
-            dst->end        = src->end;
-            dst->block      = src->block;
+            dst->start	    = src->start;
+            dst->end	    = src->end;
+            dst->block	    = src->block;
             dst->src_regex  = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
                                     : src->src_regex;
             dst++;
@@ -8236,7 +1598,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     RExC_rx->intflags = 0;
 
-    RExC_flags = rx_flags;      /* don't let top level (?i) bleed */
+    RExC_flags = rx_flags;	/* don't let top level (?i) bleed */
     RExC_parse_set(exp);
 
     /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
@@ -8423,11 +1785,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
 
     if (UTF)
-        SvUTF8_on(Rx);  /* Unicode in it? */
+        SvUTF8_on(Rx);	/* Unicode in it? */
     RExC_rxi->regstclass = NULL;
-    if (RExC_naughty >= TOO_NAUGHTY)    /* Probably an expensive pattern. */
+    if (RExC_naughty >= TOO_NAUGHTY)	/* Probably an expensive pattern. */
         RExC_rx->intflags |= PREGf_NAUGHTY;
-    scan = RExC_rxi->program + 1;               /* First BRANCH. */
+    scan = RExC_rxi->program + 1;		/* First BRANCH. */
 
     /* testing for BRANCH here tells us whether there is "must appear"
        data in the pattern. If there is then we can use it for optimisations */
@@ -8487,7 +1849,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* Ignore EXACT as we deal with it later. */
         if (REGNODE_TYPE(OP(first)) == EXACT) {
             if (! isEXACTFish(OP(first))) {
-                NOOP;   /* Empty, get anchored substr later. */
+                NOOP;	/* Empty, get anchored substr later. */
             }
             else
                 RExC_rxi->regstclass = first;
@@ -8581,7 +1943,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             ssc_init(pRExC_state, &ch_class);
             data.start_class = &ch_class;
             stclass_flag = SCF_DO_STCLASS_AND;
-        } else                          /* XXXX Check for BOUND? */
+        } else				/* XXXX Check for BOUND? */
             stclass_flag = 0;
         data.last_closep = &last_close;
         data.last_close_opp = &last_close_op;
@@ -8661,7 +2023,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
         {
-            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+            const U32 n = reg_add_data(pRExC_state, STR_WITH_LEN("f"));
 
             ssc_finalize(pRExC_state, data.start_class);
 
@@ -8670,7 +2032,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                        (regnode_ssc*)RExC_rxi->data->data[n],
                        regnode_ssc);
             RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
-            RExC_rx->intflags &= ~PREGf_SKIP;   /* Used in find_byclass(). */
+            RExC_rx->intflags &= ~PREGf_SKIP;	/* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
                       regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       Perl_re_printf( aTHX_
@@ -8745,7 +2107,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
         {
-            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+            const U32 n = reg_add_data(pRExC_state, STR_WITH_LEN("f"));
 
             ssc_finalize(pRExC_state, data.start_class);
 
@@ -8754,7 +2116,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                        (regnode_ssc*)RExC_rxi->data->data[n],
                        regnode_ssc);
             RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
-            RExC_rx->intflags &= ~PREGf_SKIP;   /* Used in find_byclass(). */
+            RExC_rx->intflags &= ~PREGf_SKIP;	/* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
                       regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       Perl_re_printf( aTHX_
@@ -8860,7 +2222,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
 #ifdef DEBUGGING
     if (RExC_paren_names) {
-        RExC_rxi->name_list_idx = add_data( pRExC_state, STR_WITH_LEN("a"));
+        RExC_rxi->name_list_idx = reg_add_data( pRExC_state, STR_WITH_LEN("a"));
         RExC_rxi->data->data[RExC_rxi->name_list_idx]
                                    = (void*)SvREFCNT_inc(RExC_paren_name_list);
     } else
@@ -8916,426 +2278,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 }
 
 
-SV*
-Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
-                    const U32 flags)
-{
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF;
-
-    PERL_UNUSED_ARG(value);
-
-    if (flags & RXapif_FETCH) {
-        return reg_named_buff_fetch(rx, key, flags);
-    } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
-        Perl_croak_no_modify();
-        return NULL;
-    } else if (flags & RXapif_EXISTS) {
-        return reg_named_buff_exists(rx, key, flags)
-            ? &PL_sv_yes
-            : &PL_sv_no;
-    } else if (flags & RXapif_REGNAMES) {
-        return reg_named_buff_all(rx, flags);
-    } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
-        return reg_named_buff_scalar(rx, flags);
-    } else {
-        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
-        return NULL;
-    }
-}
-
-SV*
-Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
-                         const U32 flags)
-{
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
-    PERL_UNUSED_ARG(lastkey);
-
-    if (flags & RXapif_FIRSTKEY)
-        return reg_named_buff_firstkey(rx, flags);
-    else if (flags & RXapif_NEXTKEY)
-        return reg_named_buff_nextkey(rx, flags);
-    else {
-        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter",
-                                            (int)flags);
-        return NULL;
-    }
-}
-
-SV*
-Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
-                          const U32 flags)
-{
-    SV *ret;
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
-        if (he_str) {
-            IV i;
-            SV* sv_dat=HeVAL(he_str);
-            I32 *nums=(I32*)SvPVX(sv_dat);
-            AV * const retarray = (flags & RXapif_ALL) ? newAV_alloc_x(SvIVX(sv_dat)) : NULL;
-            for ( i=0; i<SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->nparens) >= nums[i]
-                    && rx->offs[nums[i]].start != -1
-                    && rx->offs[nums[i]].end != -1)
-                {
-                    ret = newSVpvs("");
-                    CALLREG_NUMBUF_FETCH(r, nums[i], ret);
-                    if (!retarray)
-                        return ret;
-                } else {
-                    if (retarray)
-                        ret = newSV_type(SVt_NULL);
-                }
-                if (retarray)
-                    av_push_simple(retarray, ret);
-            }
-            if (retarray)
-                return newRV_noinc(MUTABLE_SV(retarray));
-        }
-    }
-    return NULL;
-}
-
-bool
-Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
-                           const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        if (flags & RXapif_ALL) {
-            return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
-        } else {
-            SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
-            if (sv) {
-                SvREFCNT_dec_NN(sv);
-                return TRUE;
-            } else {
-                return FALSE;
-            }
-        }
-    } else {
-        return FALSE;
-    }
-}
-
-SV*
-Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
-
-    if ( rx && RXp_PAREN_NAMES(rx) ) {
-        (void)hv_iterinit(RXp_PAREN_NAMES(rx));
-
-        return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
-    } else {
-        return FALSE;
-    }
-}
-
-SV*
-Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HV *hv = RXp_PAREN_NAMES(rx);
-        HE *temphe;
-        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
-            IV i;
-            IV parno = 0;
-            SV* sv_dat = HeVAL(temphe);
-            I32 *nums = (I32*)SvPVX(sv_dat);
-            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastparen) >= nums[i] &&
-                    rx->offs[nums[i]].start != -1 &&
-                    rx->offs[nums[i]].end != -1)
-                {
-                    parno = nums[i];
-                    break;
-                }
-            }
-            if (parno || flags & RXapif_ALL) {
-                return newSVhek(HeKEY_hek(temphe));
-            }
-        }
-    }
-    return NULL;
-}
-
-SV*
-Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
-{
-    SV *ret;
-    AV *av;
-    SSize_t length;
-    struct regexp *const rx = ReANY(r);
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
-            return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
-        } else if (flags & RXapif_ONE) {
-            ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
-            av = MUTABLE_AV(SvRV(ret));
-            length = av_count(av);
-            SvREFCNT_dec_NN(ret);
-            return newSViv(length);
-        } else {
-            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
-                                                (int)flags);
-            return NULL;
-        }
-    }
-    return &PL_sv_undef;
-}
-
-SV*
-Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
-{
-    struct regexp *const rx = ReANY(r);
-    AV *av = newAV();
-
-    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
-
-    if (rx && RXp_PAREN_NAMES(rx)) {
-        HV *hv= RXp_PAREN_NAMES(rx);
-        HE *temphe;
-        (void)hv_iterinit(hv);
-        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
-            IV i;
-            IV parno = 0;
-            SV* sv_dat = HeVAL(temphe);
-            I32 *nums = (I32*)SvPVX(sv_dat);
-            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastparen) >= nums[i] &&
-                    rx->offs[nums[i]].start != -1 &&
-                    rx->offs[nums[i]].end != -1)
-                {
-                    parno = nums[i];
-                    break;
-                }
-            }
-            if (parno || flags & RXapif_ALL) {
-                av_push_simple(av, newSVhek(HeKEY_hek(temphe)));
-            }
-        }
-    }
-
-    return newRV_noinc(MUTABLE_SV(av));
-}
-
-void
-Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
-                             SV * const sv)
-{
-    struct regexp *const rx = ReANY(r);
-    char *s = NULL;
-    SSize_t i = 0;
-    SSize_t s1, t1;
-    I32 n = paren;
-
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
-
-    if (      n == RX_BUFF_IDX_CARET_PREMATCH
-           || n == RX_BUFF_IDX_CARET_FULLMATCH
-           || n == RX_BUFF_IDX_CARET_POSTMATCH
-       )
-    {
-        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
-        if (!keepcopy) {
-            /* on something like
-             *    $r = qr/.../;
-             *    /$qr/p;
-             * the KEEPCOPY is set on the PMOP rather than the regex */
-            if (PL_curpm && r == PM_GETRE(PL_curpm))
-                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
-        }
-        if (!keepcopy)
-            goto ret_undef;
-    }
-
-    if (!rx->subbeg)
-        goto ret_undef;
-
-    if (n == RX_BUFF_IDX_CARET_FULLMATCH)
-        /* no need to distinguish between them any more */
-        n = RX_BUFF_IDX_FULLMATCH;
-
-    if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
-        && rx->offs[0].start != -1)
-    {
-        /* $`, ${^PREMATCH} */
-        i = rx->offs[0].start;
-        s = rx->subbeg;
-    }
-    else
-    if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
-        && rx->offs[0].end != -1)
-    {
-        /* $', ${^POSTMATCH} */
-        s = rx->subbeg - rx->suboffset + rx->offs[0].end;
-        i = rx->sublen + rx->suboffset - rx->offs[0].end;
-    }
-    else
-    if (inRANGE(n, 0, (I32)rx->nparens) &&
-        (s1 = rx->offs[n].start) != -1  &&
-        (t1 = rx->offs[n].end) != -1)
-    {
-        /* $&, ${^MATCH},  $1 ... */
-        i = t1 - s1;
-        s = rx->subbeg + s1 - rx->suboffset;
-    } else {
-        goto ret_undef;
-    }
-
-    assert(s >= rx->subbeg);
-    assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
-    if (i >= 0) {
-#ifdef NO_TAINT_SUPPORT
-        sv_setpvn(sv, s, i);
-#else
-        const int oldtainted = TAINT_get;
-        TAINT_NOT;
-        sv_setpvn(sv, s, i);
-        TAINT_set(oldtainted);
-#endif
-        if (RXp_MATCH_UTF8(rx))
-            SvUTF8_on(sv);
-        else
-            SvUTF8_off(sv);
-        if (TAINTING_get) {
-            if (RXp_MATCH_TAINTED(rx)) {
-                if (SvTYPE(sv) >= SVt_PVMG) {
-                    MAGIC* const mg = SvMAGIC(sv);
-                    MAGIC* mgt;
-                    TAINT;
-                    SvMAGIC_set(sv, mg->mg_moremagic);
-                    SvTAINT(sv);
-                    if ((mgt = SvMAGIC(sv))) {
-                        mg->mg_moremagic = mgt;
-                        SvMAGIC_set(sv, mg);
-                    }
-                } else {
-                    TAINT;
-                    SvTAINT(sv);
-                }
-            } else
-                SvTAINTED_off(sv);
-        }
-    } else {
-      ret_undef:
-        sv_set_undef(sv);
-        return;
-    }
-}
-
-void
-Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
-                                                         SV const * const value)
-{
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
-
-    PERL_UNUSED_ARG(rx);
-    PERL_UNUSED_ARG(paren);
-    PERL_UNUSED_ARG(value);
-
-    if (!PL_localizing)
-        Perl_croak_no_modify();
-}
-
-I32
-Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
-                              const I32 paren)
-{
-    struct regexp *const rx = ReANY(r);
-    I32 i;
-    I32 s1, t1;
-
-    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
-
-    if (   paren == RX_BUFF_IDX_CARET_PREMATCH
-        || paren == RX_BUFF_IDX_CARET_FULLMATCH
-        || paren == RX_BUFF_IDX_CARET_POSTMATCH
-    )
-    {
-        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
-        if (!keepcopy) {
-            /* on something like
-             *    $r = qr/.../;
-             *    /$qr/p;
-             * the KEEPCOPY is set on the PMOP rather than the regex */
-            if (PL_curpm && r == PM_GETRE(PL_curpm))
-                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
-        }
-        if (!keepcopy)
-            goto warn_undef;
-    }
-
-    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
-    switch (paren) {
-      case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
-      case RX_BUFF_IDX_PREMATCH:       /* $` */
-        if (rx->offs[0].start != -1) {
-                        i = rx->offs[0].start;
-                        if (i > 0) {
-                                s1 = 0;
-                                t1 = i;
-                                goto getlen;
-                        }
-            }
-        return 0;
-
-      case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
-      case RX_BUFF_IDX_POSTMATCH:       /* $' */
-            if (rx->offs[0].end != -1) {
-                        i = rx->sublen - rx->offs[0].end;
-                        if (i > 0) {
-                                s1 = rx->offs[0].end;
-                                t1 = rx->sublen;
-                                goto getlen;
-                        }
-            }
-        return 0;
-
-      default: /* $& / ${^MATCH}, $1, $2, ... */
-            if (paren <= (I32)rx->nparens &&
-            (s1 = rx->offs[paren].start) != -1 &&
-            (t1 = rx->offs[paren].end) != -1)
-            {
-            i = t1 - s1;
-            goto getlen;
-        } else {
-          warn_undef:
-            if (ckWARN(WARN_UNINITIALIZED))
-                report_uninit((const SV *)sv);
-            return 0;
-        }
-    }
-  getlen:
-    if (i > 0 && RXp_MATCH_UTF8(rx)) {
-        const char * const s = rx->subbeg - rx->suboffset + s1;
-        const U8 *ep;
-        STRLEN el;
-
-        i = t1 - s1;
-        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
-            i = el;
-    }
-    return i;
-}
 
 SV*
 Perl_reg_qr_package(pTHX_ REGEXP * const rx)
@@ -9458,1730 +2400,6 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
     Perl_re_printf( aTHX_ fmt "\n",args);                               \
 })
 
-/* This section of code defines the inversion list object and its methods.  The
- * interfaces are highly subject to change, so as much as possible is static to
- * this file.  An inversion list is here implemented as a malloc'd C UV array
- * as an SVt_INVLIST scalar.
- *
- * An inversion list for Unicode is an array of code points, sorted by ordinal
- * number.  Each element gives the code point that begins a range that extends
- * up-to but not including the code point given by the next element.  The final
- * element gives the first code point of a range that extends to the platform's
- * infinity.  The even-numbered elements (invlist[0], invlist[2], invlist[4],
- * ...) give ranges whose code points are all in the inversion list.  We say
- * that those ranges are in the set.  The odd-numbered elements give ranges
- * whose code points are not in the inversion list, and hence not in the set.
- * Thus, element [0] is the first code point in the list.  Element [1]
- * is the first code point beyond that not in the list; and element [2] is the
- * first code point beyond that that is in the list.  In other words, the first
- * range is invlist[0]..(invlist[1]-1), and all code points in that range are
- * in the inversion list.  The second range is invlist[1]..(invlist[2]-1), and
- * all code points in that range are not in the inversion list.  The third
- * range invlist[2]..(invlist[3]-1) gives code points that are in the inversion
- * list, and so forth.  Thus every element whose index is divisible by two
- * gives the beginning of a range that is in the list, and every element whose
- * index is not divisible by two gives the beginning of a range not in the
- * list.  If the final element's index is divisible by two, the inversion list
- * extends to the platform's infinity; otherwise the highest code point in the
- * inversion list is the contents of that element minus 1.
- *
- * A range that contains just a single code point N will look like
- *  invlist[i]   == N
- *  invlist[i+1] == N+1
- *
- * If N is UV_MAX (the highest representable code point on the machine), N+1 is
- * impossible to represent, so element [i+1] is omitted.  The single element
- * inversion list
- *  invlist[0] == UV_MAX
- * contains just UV_MAX, but is interpreted as matching to infinity.
- *
- * Taking the complement (inverting) an inversion list is quite simple, if the
- * first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element at the beginning of each inversion
- * list to always contain 0; there is an additional flag in the header which
- * indicates if the list begins at the 0, or is offset to begin at the next
- * element.  This means that the inversion list can be inverted without any
- * copying; just flip the flag.
- *
- * More about inversion lists can be found in "Unicode Demystified"
- * Chapter 13 by Richard Gillam, published by Addison-Wesley.
- *
- * The inversion list data structure is currently implemented as an SV pointing
- * to an array of UVs that the SV thinks are bytes.  This allows us to have an
- * array of UV whose memory management is automatically handled by the existing
- * facilities for SV's.
- *
- * Some of the methods should always be private to the implementation, and some
- * should eventually be made public */
-
-/* The header definitions are in F<invlist_inline.h> */
-
-#ifndef PERL_IN_XSUB_RE
-
-PERL_STATIC_INLINE UV*
-S__invlist_array_init(SV* const invlist, const bool will_have_0)
-{
-    /* Returns a pointer to the first element in the inversion list's array.
-     * This is called upon initialization of an inversion list.  Where the
-     * array begins depends on whether the list has the code point U+0000 in it
-     * or not.  The other parameter tells it whether the code that follows this
-     * call is about to put a 0 in the inversion list or not.  The first
-     * element is either the element reserved for 0, if TRUE, or the element
-     * after it, if FALSE */
-
-    bool* offset = get_invlist_offset_addr(invlist);
-    UV* zero_addr = (UV *) SvPVX(invlist);
-
-    PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
-
-    /* Must be empty */
-    assert(! _invlist_len(invlist));
-
-    *zero_addr = 0;
-
-    /* 1^1 = 0; 1^0 = 1 */
-    *offset = 1 ^ will_have_0;
-    return zero_addr + *offset;
-}
-
-STATIC void
-S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
-{
-    /* Replaces the inversion list in 'dest' with the one from 'src'.  It
-     * steals the list from 'src', so 'src' is made to have a NULL list.  This
-     * is similar to what SvSetMagicSV() would do, if it were implemented on
-     * inversion lists, though this routine avoids a copy */
-
-    const UV src_len          = _invlist_len(src);
-    const bool src_offset     = *get_invlist_offset_addr(src);
-    const STRLEN src_byte_len = SvLEN(src);
-    char * array              = SvPVX(src);
-
-#ifndef NO_TAINT_SUPPORT
-    const int oldtainted = TAINT_get;
-#endif
-
-    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
-
-    assert(is_invlist(src));
-    assert(is_invlist(dest));
-    assert(! invlist_is_iterating(src));
-    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
-
-    /* Make sure it ends in the right place with a NUL, as our inversion list
-     * manipulations aren't careful to keep this true, but sv_usepvn_flags()
-     * asserts it */
-    array[src_byte_len - 1] = '\0';
-
-    TAINT_NOT;      /* Otherwise it breaks */
-    sv_usepvn_flags(dest,
-                    (char *) array,
-                    src_byte_len - 1,
-
-                    /* This flag is documented to cause a copy to be avoided */
-                    SV_HAS_TRAILING_NUL);
-    TAINT_set(oldtainted);
-    SvPV_set(src, 0);
-    SvLEN_set(src, 0);
-    SvCUR_set(src, 0);
-
-    /* Finish up copying over the other fields in an inversion list */
-    *get_invlist_offset_addr(dest) = src_offset;
-    invlist_set_len(dest, src_len, src_offset);
-    *get_invlist_previous_index_addr(dest) = 0;
-    invlist_iterfinish(dest);
-}
-
-PERL_STATIC_INLINE IV*
-S_get_invlist_previous_index_addr(SV* invlist)
-{
-    /* Return the address of the IV that is reserved to hold the cached index
-     * */
-    PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
-
-    assert(is_invlist(invlist));
-
-    return &(((XINVLIST*) SvANY(invlist))->prev_index);
-}
-
-PERL_STATIC_INLINE IV
-S_invlist_previous_index(SV* const invlist)
-{
-    /* Returns cached index of previous search */
-
-    PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
-
-    return *get_invlist_previous_index_addr(invlist);
-}
-
-PERL_STATIC_INLINE void
-S_invlist_set_previous_index(SV* const invlist, const IV index)
-{
-    /* Caches <index> for later retrieval */
-
-    PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
-
-    assert(index == 0 || index < (int) _invlist_len(invlist));
-
-    *get_invlist_previous_index_addr(invlist) = index;
-}
-
-PERL_STATIC_INLINE void
-S_invlist_trim(SV* invlist)
-{
-    /* Free the not currently-being-used space in an inversion list */
-
-    /* But don't free up the space needed for the 0 UV that is always at the
-     * beginning of the list, nor the trailing NUL */
-    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
-
-    PERL_ARGS_ASSERT_INVLIST_TRIM;
-
-    assert(is_invlist(invlist));
-
-    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
-}
-
-PERL_STATIC_INLINE void
-S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
-{
-    PERL_ARGS_ASSERT_INVLIST_CLEAR;
-
-    assert(is_invlist(invlist));
-
-    invlist_set_len(invlist, 0, 0);
-    invlist_trim(invlist);
-}
-
-#endif /* ifndef PERL_IN_XSUB_RE */
-
-PERL_STATIC_INLINE bool
-S_invlist_is_iterating(const SV* const invlist)
-{
-    PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
-
-    /* get_invlist_iter_addr()'s sv is non-const only because it returns a
-     * value that can be used to modify the invlist, it doesn't modify the
-     * invlist itself */
-    return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX;
-}
-
-#ifndef PERL_IN_XSUB_RE
-
-PERL_STATIC_INLINE UV
-S_invlist_max(const SV* const invlist)
-{
-    /* Returns the maximum number of elements storable in the inversion list's
-     * array, without having to realloc() */
-
-    PERL_ARGS_ASSERT_INVLIST_MAX;
-
-    assert(is_invlist(invlist));
-
-    /* Assumes worst case, in which the 0 element is not counted in the
-     * inversion list, so subtracts 1 for that */
-    return SvLEN(invlist) == 0  /* This happens under _new_invlist_C_array */
-           ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
-           : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
-}
-
-STATIC void
-S_initialize_invlist_guts(pTHX_ SV* invlist, const Size_t initial_size)
-{
-    PERL_ARGS_ASSERT_INITIALIZE_INVLIST_GUTS;
-
-    /* First 1 is in case the zero element isn't in the list; second 1 is for
-     * trailing NUL */
-    SvGROW(invlist, TO_INTERNAL_SIZE(initial_size + 1) + 1);
-    invlist_set_len(invlist, 0, 0);
-
-    /* Force iterinit() to be used to get iteration to work */
-    invlist_iterfinish(invlist);
-
-    *get_invlist_previous_index_addr(invlist) = 0;
-    SvPOK_on(invlist);  /* This allows B to extract the PV */
-}
-
-SV*
-Perl__new_invlist(pTHX_ IV initial_size)
-{
-
-    /* Return a pointer to a newly constructed inversion list, with enough
-     * space to store 'initial_size' elements.  If that number is negative, a
-     * system default is used instead */
-
-    SV* new_list;
-
-    if (initial_size < 0) {
-        initial_size = 10;
-    }
-
-    new_list = newSV_type(SVt_INVLIST);
-    initialize_invlist_guts(new_list, initial_size);
-
-    return new_list;
-}
-
-SV*
-Perl__new_invlist_C_array(pTHX_ const UV* const list)
-{
-    /* Return a pointer to a newly constructed inversion list, initialized to
-     * point to <list>, which has to be in the exact correct inversion list
-     * form, including internal fields.  Thus this is a dangerous routine that
-     * should not be used in the wrong hands.  The passed in 'list' contains
-     * several header fields at the beginning that are not part of the
-     * inversion list body proper */
-
-    const STRLEN length = (STRLEN) list[0];
-    const UV version_id =          list[1];
-    const bool offset   =    cBOOL(list[2]);
-#define HEADER_LENGTH 3
-    /* If any of the above changes in any way, you must change HEADER_LENGTH
-     * (if appropriate) and regenerate INVLIST_VERSION_ID by running
-     *      perl -E 'say int(rand 2**31-1)'
-     */
-#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
-                                        data structure type, so that one being
-                                        passed in can be validated to be an
-                                        inversion list of the correct vintage.
-                                       */
-
-    SV* invlist = newSV_type(SVt_INVLIST);
-
-    PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
-
-    if (version_id != INVLIST_VERSION_ID) {
-        Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
-    }
-
-    /* The generated array passed in includes header elements that aren't part
-     * of the list proper, so start it just after them */
-    SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
-
-    SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
-                               shouldn't touch it */
-
-    *(get_invlist_offset_addr(invlist)) = offset;
-
-    /* The 'length' passed to us is the physical number of elements in the
-     * inversion list.  But if there is an offset the logical number is one
-     * less than that */
-    invlist_set_len(invlist, length  - offset, offset);
-
-    invlist_set_previous_index(invlist, 0);
-
-    /* Initialize the iteration pointer. */
-    invlist_iterfinish(invlist);
-
-    SvREADONLY_on(invlist);
-    SvPOK_on(invlist);
-
-    return invlist;
-}
-
-STATIC void
-S__append_range_to_invlist(pTHX_ SV* const invlist,
-                                 const UV start, const UV end)
-{
-   /* Subject to change or removal.  Append the range from 'start' to 'end' at
-    * the end of the inversion list.  The range must be above any existing
-    * ones. */
-
-    UV* array;
-    UV max = invlist_max(invlist);
-    UV len = _invlist_len(invlist);
-    bool offset;
-
-    PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
-
-    if (len == 0) { /* Empty lists must be initialized */
-        offset = start != 0;
-        array = _invlist_array_init(invlist, ! offset);
-    }
-    else {
-        /* Here, the existing list is non-empty. The current max entry in the
-         * list is generally the first value not in the set, except when the
-         * set extends to the end of permissible values, in which case it is
-         * the first entry in that final set, and so this call is an attempt to
-         * append out-of-order */
-
-        UV final_element = len - 1;
-        array = invlist_array(invlist);
-        if (   array[final_element] > start
-            || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
-        {
-            Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
-                     array[final_element], start,
-                     ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
-        }
-
-        /* Here, it is a legal append.  If the new range begins 1 above the end
-         * of the range below it, it is extending the range below it, so the
-         * new first value not in the set is one greater than the newly
-         * extended range.  */
-        offset = *get_invlist_offset_addr(invlist);
-        if (array[final_element] == start) {
-            if (end != UV_MAX) {
-                array[final_element] = end + 1;
-            }
-            else {
-                /* But if the end is the maximum representable on the machine,
-                 * assume that infinity was actually what was meant.  Just let
-                 * the range that this would extend to have no end */
-                invlist_set_len(invlist, len - 1, offset);
-            }
-            return;
-        }
-    }
-
-    /* Here the new range doesn't extend any existing set.  Add it */
-
-    len += 2;   /* Includes an element each for the start and end of range */
-
-    /* If wll overflow the existing space, extend, which may cause the array to
-     * be moved */
-    if (max < len) {
-        invlist_extend(invlist, len);
-
-        /* Have to set len here to avoid assert failure in invlist_array() */
-        invlist_set_len(invlist, len, offset);
-
-        array = invlist_array(invlist);
-    }
-    else {
-        invlist_set_len(invlist, len, offset);
-    }
-
-    /* The next item on the list starts the range, the one after that is
-     * one past the new range.  */
-    array[len - 2] = start;
-    if (end != UV_MAX) {
-        array[len - 1] = end + 1;
-    }
-    else {
-        /* But if the end is the maximum representable on the machine, just let
-         * the range have no end */
-        invlist_set_len(invlist, len - 1, offset);
-    }
-}
-
-SSize_t
-Perl__invlist_search(SV* const invlist, const UV cp)
-{
-    /* Searches the inversion list for the entry that contains the input code
-     * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
-     * return value is the index into the list's array of the range that
-     * contains <cp>, that is, 'i' such that
-     *  array[i] <= cp < array[i+1]
-     */
-
-    IV low = 0;
-    IV mid;
-    IV high = _invlist_len(invlist);
-    const IV highest_element = high - 1;
-    const UV* array;
-
-    PERL_ARGS_ASSERT__INVLIST_SEARCH;
-
-    /* If list is empty, return failure. */
-    if (UNLIKELY(high == 0)) {
-        return -1;
-    }
-
-    /* (We can't get the array unless we know the list is non-empty) */
-    array = invlist_array(invlist);
-
-    mid = invlist_previous_index(invlist);
-    assert(mid >=0);
-    if (UNLIKELY(mid > highest_element)) {
-        mid = highest_element;
-    }
-
-    /* <mid> contains the cache of the result of the previous call to this
-     * function (0 the first time).  See if this call is for the same result,
-     * or if it is for mid-1.  This is under the theory that calls to this
-     * function will often be for related code points that are near each other.
-     * And benchmarks show that caching gives better results.  We also test
-     * here if the code point is within the bounds of the list.  These tests
-     * replace others that would have had to be made anyway to make sure that
-     * the array bounds were not exceeded, and these give us extra information
-     * at the same time */
-    if (cp >= array[mid]) {
-        if (cp >= array[highest_element]) {
-            return highest_element;
-        }
-
-        /* Here, array[mid] <= cp < array[highest_element].  This means that
-         * the final element is not the answer, so can exclude it; it also
-         * means that <mid> is not the final element, so can refer to 'mid + 1'
-         * safely */
-        if (cp < array[mid + 1]) {
-            return mid;
-        }
-        high--;
-        low = mid + 1;
-    }
-    else { /* cp < aray[mid] */
-        if (cp < array[0]) { /* Fail if outside the array */
-            return -1;
-        }
-        high = mid;
-        if (cp >= array[mid - 1]) {
-            goto found_entry;
-        }
-    }
-
-    /* Binary search.  What we are looking for is <i> such that
-     *  array[i] <= cp < array[i+1]
-     * The loop below converges on the i+1.  Note that there may not be an
-     * (i+1)th element in the array, and things work nonetheless */
-    while (low < high) {
-        mid = (low + high) / 2;
-        assert(mid <= highest_element);
-        if (array[mid] <= cp) { /* cp >= array[mid] */
-            low = mid + 1;
-
-            /* We could do this extra test to exit the loop early.
-            if (cp < array[low]) {
-                return mid;
-            }
-            */
-        }
-        else { /* cp < array[mid] */
-            high = mid;
-        }
-    }
-
-  found_entry:
-    high--;
-    invlist_set_previous_index(invlist, high);
-    return high;
-}
-
-void
-Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
-                                         const bool complement_b, SV** output)
-{
-    /* Take the union of two inversion lists and point '*output' to it.  On
-     * input, '*output' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
-     * even 'a' or 'b').  If to an inversion list, the contents of the original
-     * list will be replaced by the union.  The first list, 'a', may be
-     * NULL, in which case a copy of the second list is placed in '*output'.
-     * If 'complement_b' is TRUE, the union is taken of the complement
-     * (inversion) of 'b' instead of b itself.
-     *
-     * The basis for this comes from "Unicode Demystified" Chapter 13 by
-     * Richard Gillam, published by Addison-Wesley, and explained at some
-     * length there.  The preface says to incorporate its examples into your
-     * code at your own risk.
-     *
-     * The algorithm is like a merge sort. */
-
-    const UV* array_a;    /* a's array */
-    const UV* array_b;
-    UV len_a;       /* length of a's array */
-    UV len_b;
-
-    SV* u;                      /* the resulting union */
-    UV* array_u;
-    UV len_u = 0;
-
-    UV i_a = 0;             /* current index into a's array */
-    UV i_b = 0;
-    UV i_u = 0;
-
-    /* running count, as explained in the algorithm source book; items are
-     * stopped accumulating and are output when the count changes to/from 0.
-     * The count is incremented when we start a range that's in an input's set,
-     * and decremented when we start a range that's not in a set.  So this
-     * variable can be 0, 1, or 2.  When it is 0 neither input is in their set,
-     * and hence nothing goes into the union; 1, just one of the inputs is in
-     * its set (and its current range gets added to the union); and 2 when both
-     * inputs are in their sets.  */
-    UV count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
-    assert(a != b);
-    assert(*output == NULL || is_invlist(*output));
-
-    len_b = _invlist_len(b);
-    if (len_b == 0) {
-
-        /* Here, 'b' is empty, hence it's complement is all possible code
-         * points.  So if the union includes the complement of 'b', it includes
-         * everything, and we need not even look at 'a'.  It's easiest to
-         * create a new inversion list that matches everything.  */
-        if (complement_b) {
-            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
-
-            if (*output == NULL) { /* If the output didn't exist, just point it
-                                      at the new list */
-                *output = everything;
-            }
-            else { /* Otherwise, replace its contents with the new list */
-                invlist_replace_list_destroys_src(*output, everything);
-                SvREFCNT_dec_NN(everything);
-            }
-
-            return;
-        }
-
-        /* Here, we don't want the complement of 'b', and since 'b' is empty,
-         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
-         * output will be empty */
-
-        if (a == NULL || _invlist_len(a) == 0) {
-            if (*output == NULL) {
-                *output = _new_invlist(0);
-            }
-            else {
-                invlist_clear(*output);
-            }
-            return;
-        }
-
-        /* Here, 'a' is not empty, but 'b' is, so 'a' entirely determines the
-         * union.  We can just return a copy of 'a' if '*output' doesn't point
-         * to an existing list */
-        if (*output == NULL) {
-            *output = invlist_clone(a, NULL);
-            return;
-        }
-
-        /* If the output is to overwrite 'a', we have a no-op, as it's
-         * already in 'a' */
-        if (*output == a) {
-            return;
-        }
-
-        /* Here, '*output' is to be overwritten by 'a' */
-        u = invlist_clone(a, NULL);
-        invlist_replace_list_destroys_src(*output, u);
-        SvREFCNT_dec_NN(u);
-
-        return;
-    }
-
-    /* Here 'b' is not empty.  See about 'a' */
-
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-
-        /* Here, 'a' is empty (and b is not).  That means the union will come
-         * entirely from 'b'.  If '*output' is NULL, we can directly return a
-         * clone of 'b'.  Otherwise, we replace the contents of '*output' with
-         * the clone */
-
-        SV ** dest = (*output == NULL) ? output : &u;
-        *dest = invlist_clone(b, NULL);
-        if (complement_b) {
-            _invlist_invert(*dest);
-        }
-
-        if (dest == &u) {
-            invlist_replace_list_destroys_src(*output, u);
-            SvREFCNT_dec_NN(u);
-        }
-
-        return;
-    }
-
-    /* Here both lists exist and are non-empty */
-    array_a = invlist_array(a);
-    array_b = invlist_array(b);
-
-    /* If are to take the union of 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* To complement, we invert: if the first element is 0, remove it.  To
-         * do this, we just pretend the array starts one later */
-        if (array_b[0] == 0) {
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    /* Size the union for the worst case: that the sets are completely
-     * disjoint */
-    u = _new_invlist(len_a + len_b);
-
-    /* Will contain U+0000 if either component does */
-    array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
-                                      || (len_b > 0 && array_b[0] == 0));
-
-    /* Go through each input list item by item, stopping when have exhausted
-     * one of them */
-    while (i_a < len_a && i_b < len_b) {
-        UV cp;      /* The element to potentially add to the union's array */
-        bool cp_in_set;   /* is it in the input list's set or not */
-
-        /* We need to take one or the other of the two inputs for the union.
-         * Since we are merging two sorted lists, we take the smaller of the
-         * next items.  In case of a tie, we take first the one that is in its
-         * set.  If we first took the one not in its set, it would decrement
-         * the count, possibly to 0 which would cause it to be output as ending
-         * the range, and the next time through we would take the same number,
-         * and output it again as beginning the next range.  By doing it the
-         * opposite way, there is no possibility that the count will be
-         * momentarily decremented to 0, and thus the two adjoining ranges will
-         * be seamlessly merged.  (In a tie and both are in the set or both not
-         * in the set, it doesn't matter which we take first.) */
-        if (       array_a[i_a] < array_b[i_b]
-            || (   array_a[i_a] == array_b[i_b]
-                && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
-        {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-            cp = array_a[i_a++];
-        }
-        else {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
-            cp = array_b[i_b++];
-        }
-
-        /* Here, have chosen which of the two inputs to look at.  Only output
-         * if the running count changes to/from 0, which marks the
-         * beginning/end of a range that's in the set */
-        if (cp_in_set) {
-            if (count == 0) {
-                array_u[i_u++] = cp;
-            }
-            count++;
-        }
-        else {
-            count--;
-            if (count == 0) {
-                array_u[i_u++] = cp;
-            }
-        }
-    }
-
-
-    /* The loop above increments the index into exactly one of the input lists
-     * each iteration, and ends when either index gets to its list end.  That
-     * means the other index is lower than its end, and so something is
-     * remaining in that one.  We decrement 'count', as explained below, if
-     * that list is in its set.  (i_a and i_b each currently index the element
-     * beyond the one we care about.) */
-    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-        || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
-    {
-        count--;
-    }
-
-    /* Above we decremented 'count' if the list that had unexamined elements in
-     * it was in its set.  This has made it so that 'count' being non-zero
-     * means there isn't anything left to output; and 'count' equal to 0 means
-     * that what is left to output is precisely that which is left in the
-     * non-exhausted input list.
-     *
-     * To see why, note first that the exhausted input obviously has nothing
-     * left to add to the union.  If it was in its set at its end, that means
-     * the set extends from here to the platform's infinity, and hence so does
-     * the union and the non-exhausted set is irrelevant.  The exhausted set
-     * also contributed 1 to 'count'.  If 'count' was 2, it got decremented to
-     * 1, but if it was 1, the non-exhausted set wasn't in its set, and so
-     * 'count' remains at 1.  This is consistent with the decremented 'count'
-     * != 0 meaning there's nothing left to add to the union.
-     *
-     * But if the exhausted input wasn't in its set, it contributed 0 to
-     * 'count', and the rest of the union will be whatever the other input is.
-     * If 'count' was 0, neither list was in its set, and 'count' remains 0;
-     * otherwise it gets decremented to 0.  This is consistent with 'count'
-     * == 0 meaning the remainder of the union is whatever is left in the
-     * non-exhausted list. */
-    if (count != 0) {
-        len_u = i_u;
-    }
-    else {
-        IV copy_count = len_a - i_a;
-        if (copy_count > 0) {   /* The non-exhausted input is 'a' */
-            Copy(array_a + i_a, array_u + i_u, copy_count, UV);
-        }
-        else { /* The non-exhausted input is b */
-            copy_count = len_b - i_b;
-            Copy(array_b + i_b, array_u + i_u, copy_count, UV);
-        }
-        len_u = i_u + copy_count;
-    }
-
-    /* Set the result to the final length, which can change the pointer to
-     * array_u, so re-find it.  (Note that it is unlikely that this will
-     * change, as we are shrinking the space, not enlarging it) */
-    if (len_u != _invlist_len(u)) {
-        invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
-        invlist_trim(u);
-        array_u = invlist_array(u);
-    }
-
-    if (*output == NULL) {  /* Simply return the new inversion list */
-        *output = u;
-    }
-    else {
-        /* Otherwise, overwrite the inversion list that was in '*output'.  We
-         * could instead free '*output', and then set it to 'u', but experience
-         * has shown [perl #127392] that if the input is a mortal, we can get a
-         * huge build-up of these during regex compilation before they get
-         * freed. */
-        invlist_replace_list_destroys_src(*output, u);
-        SvREFCNT_dec_NN(u);
-    }
-
-    return;
-}
-
-void
-Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
-                                               const bool complement_b, SV** i)
-{
-    /* Take the intersection of two inversion lists and point '*i' to it.  On
-     * input, '*i' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
-     * even 'a' or 'b').  If to an inversion list, the contents of the original
-     * list will be replaced by the intersection.  The first list, 'a', may be
-     * NULL, in which case '*i' will be an empty list.  If 'complement_b' is
-     * TRUE, the result will be the intersection of 'a' and the complement (or
-     * inversion) of 'b' instead of 'b' directly.
-     *
-     * The basis for this comes from "Unicode Demystified" Chapter 13 by
-     * Richard Gillam, published by Addison-Wesley, and explained at some
-     * length there.  The preface says to incorporate its examples into your
-     * code at your own risk.  In fact, it had bugs
-     *
-     * The algorithm is like a merge sort, and is essentially the same as the
-     * union above
-     */
-
-    const UV* array_a;          /* a's array */
-    const UV* array_b;
-    UV len_a;   /* length of a's array */
-    UV len_b;
-
-    SV* r;                   /* the resulting intersection */
-    UV* array_r;
-    UV len_r = 0;
-
-    UV i_a = 0;             /* current index into a's array */
-    UV i_b = 0;
-    UV i_r = 0;
-
-    /* running count of how many of the two inputs are postitioned at ranges
-     * that are in their sets.  As explained in the algorithm source book,
-     * items are stopped accumulating and are output when the count changes
-     * to/from 2.  The count is incremented when we start a range that's in an
-     * input's set, and decremented when we start a range that's not in a set.
-     * Only when it is 2 are we in the intersection. */
-    UV count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
-    assert(a != b);
-    assert(*i == NULL || is_invlist(*i));
-
-    /* Special case if either one is empty */
-    len_a = (a == NULL) ? 0 : _invlist_len(a);
-    if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
-        if (len_a != 0 && complement_b) {
-
-            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
-             * must be empty.  Here, also we are using 'b's complement, which
-             * hence must be every possible code point.  Thus the intersection
-             * is simply 'a'. */
-
-            if (*i == a) {  /* No-op */
-                return;
-            }
-
-            if (*i == NULL) {
-                *i = invlist_clone(a, NULL);
-                return;
-            }
-
-            r = invlist_clone(a, NULL);
-            invlist_replace_list_destroys_src(*i, r);
-            SvREFCNT_dec_NN(r);
-            return;
-        }
-
-        /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
-         * intersection must be empty */
-        if (*i == NULL) {
-            *i = _new_invlist(0);
-            return;
-        }
-
-        invlist_clear(*i);
-        return;
-    }
-
-    /* Here both lists exist and are non-empty */
-    array_a = invlist_array(a);
-    array_b = invlist_array(b);
-
-    /* If are to take the intersection of 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* To complement, we invert: if the first element is 0, remove it.  To
-         * do this, we just pretend the array starts one later */
-        if (array_b[0] == 0) {
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    /* Size the intersection for the worst case: that the intersection ends up
-     * fragmenting everything to be completely disjoint */
-    r= _new_invlist(len_a + len_b);
-
-    /* Will contain U+0000 iff both components do */
-    array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
-                                     && len_b > 0 && array_b[0] == 0);
-
-    /* Go through each list item by item, stopping when have exhausted one of
-     * them */
-    while (i_a < len_a && i_b < len_b) {
-        UV cp;      /* The element to potentially add to the intersection's
-                       array */
-        bool cp_in_set; /* Is it in the input list's set or not */
-
-        /* We need to take one or the other of the two inputs for the
-         * intersection.  Since we are merging two sorted lists, we take the
-         * smaller of the next items.  In case of a tie, we take first the one
-         * that is not in its set (a difference from the union algorithm).  If
-         * we first took the one in its set, it would increment the count,
-         * possibly to 2 which would cause it to be output as starting a range
-         * in the intersection, and the next time through we would take that
-         * same number, and output it again as ending the set.  By doing the
-         * opposite of this, there is no possibility that the count will be
-         * momentarily incremented to 2.  (In a tie and both are in the set or
-         * both not in the set, it doesn't matter which we take first.) */
-        if (       array_a[i_a] < array_b[i_b]
-            || (   array_a[i_a] == array_b[i_b]
-                && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
-        {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-            cp = array_a[i_a++];
-        }
-        else {
-            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
-            cp= array_b[i_b++];
-        }
-
-        /* Here, have chosen which of the two inputs to look at.  Only output
-         * if the running count changes to/from 2, which marks the
-         * beginning/end of a range that's in the intersection */
-        if (cp_in_set) {
-            count++;
-            if (count == 2) {
-                array_r[i_r++] = cp;
-            }
-        }
-        else {
-            if (count == 2) {
-                array_r[i_r++] = cp;
-            }
-            count--;
-        }
-
-    }
-
-    /* The loop above increments the index into exactly one of the input lists
-     * each iteration, and ends when either index gets to its list end.  That
-     * means the other index is lower than its end, and so something is
-     * remaining in that one.  We increment 'count', as explained below, if the
-     * exhausted list was in its set.  (i_a and i_b each currently index the
-     * element beyond the one we care about.) */
-    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-        || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
-    {
-        count++;
-    }
-
-    /* Above we incremented 'count' if the exhausted list was in its set.  This
-     * has made it so that 'count' being below 2 means there is nothing left to
-     * output; otheriwse what's left to add to the intersection is precisely
-     * that which is left in the non-exhausted input list.
-     *
-     * To see why, note first that the exhausted input obviously has nothing
-     * left to affect the intersection.  If it was in its set at its end, that
-     * means the set extends from here to the platform's infinity, and hence
-     * anything in the non-exhausted's list will be in the intersection, and
-     * anything not in it won't be.  Hence, the rest of the intersection is
-     * precisely what's in the non-exhausted list  The exhausted set also
-     * contributed 1 to 'count', meaning 'count' was at least 1.  Incrementing
-     * it means 'count' is now at least 2.  This is consistent with the
-     * incremented 'count' being >= 2 means to add the non-exhausted list to
-     * the intersection.
-     *
-     * But if the exhausted input wasn't in its set, it contributed 0 to
-     * 'count', and the intersection can't include anything further; the
-     * non-exhausted set is irrelevant.  'count' was at most 1, and doesn't get
-     * incremented.  This is consistent with 'count' being < 2 meaning nothing
-     * further to add to the intersection. */
-    if (count < 2) { /* Nothing left to put in the intersection. */
-        len_r = i_r;
-    }
-    else { /* copy the non-exhausted list, unchanged. */
-        IV copy_count = len_a - i_a;
-        if (copy_count > 0) {   /* a is the one with stuff left */
-            Copy(array_a + i_a, array_r + i_r, copy_count, UV);
-        }
-        else {  /* b is the one with stuff left */
-            copy_count = len_b - i_b;
-            Copy(array_b + i_b, array_r + i_r, copy_count, UV);
-        }
-        len_r = i_r + copy_count;
-    }
-
-    /* Set the result to the final length, which can change the pointer to
-     * array_r, so re-find it.  (Note that it is unlikely that this will
-     * change, as we are shrinking the space, not enlarging it) */
-    if (len_r != _invlist_len(r)) {
-        invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
-        invlist_trim(r);
-        array_r = invlist_array(r);
-    }
-
-    if (*i == NULL) { /* Simply return the calculated intersection */
-        *i = r;
-    }
-    else { /* Otherwise, replace the existing inversion list in '*i'.  We could
-              instead free '*i', and then set it to 'r', but experience has
-              shown [perl #127392] that if the input is a mortal, we can get a
-              huge build-up of these during regex compilation before they get
-              freed. */
-        if (len_r) {
-            invlist_replace_list_destroys_src(*i, r);
-        }
-        else {
-            invlist_clear(*i);
-        }
-        SvREFCNT_dec_NN(r);
-    }
-
-    return;
-}
-
-SV*
-Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
-{
-    /* Add the range from 'start' to 'end' inclusive to the inversion list's
-     * set.  A pointer to the inversion list is returned.  This may actually be
-     * a new list, in which case the passed in one has been destroyed.  The
-     * passed-in inversion list can be NULL, in which case a new one is created
-     * with just the one range in it.  The new list is not necessarily
-     * NUL-terminated.  Space is not freed if the inversion list shrinks as a
-     * result of this function.  The gain would not be large, and in many
-     * cases, this is called multiple times on a single inversion list, so
-     * anything freed may almost immediately be needed again.
-     *
-     * This used to mostly call the 'union' routine, but that is much more
-     * heavyweight than really needed for a single range addition */
-
-    UV* array;              /* The array implementing the inversion list */
-    UV len;                 /* How many elements in 'array' */
-    SSize_t i_s;            /* index into the invlist array where 'start'
-                               should go */
-    SSize_t i_e = 0;        /* And the index where 'end' should go */
-    UV cur_highest;         /* The highest code point in the inversion list
-                               upon entry to this function */
-
-    /* This range becomes the whole inversion list if none already existed */
-    if (invlist == NULL) {
-        invlist = _new_invlist(2);
-        _append_range_to_invlist(invlist, start, end);
-        return invlist;
-    }
-
-    /* Likewise, if the inversion list is currently empty */
-    len = _invlist_len(invlist);
-    if (len == 0) {
-        _append_range_to_invlist(invlist, start, end);
-        return invlist;
-    }
-
-    /* Starting here, we have to know the internals of the list */
-    array = invlist_array(invlist);
-
-    /* If the new range ends higher than the current highest ... */
-    cur_highest = invlist_highest(invlist);
-    if (end > cur_highest) {
-
-        /* If the whole range is higher, we can just append it */
-        if (start > cur_highest) {
-            _append_range_to_invlist(invlist, start, end);
-            return invlist;
-        }
-
-        /* Otherwise, add the portion that is higher ... */
-        _append_range_to_invlist(invlist, cur_highest + 1, end);
-
-        /* ... and continue on below to handle the rest.  As a result of the
-         * above append, we know that the index of the end of the range is the
-         * final even numbered one of the array.  Recall that the final element
-         * always starts a range that extends to infinity.  If that range is in
-         * the set (meaning the set goes from here to infinity), it will be an
-         * even index, but if it isn't in the set, it's odd, and the final
-         * range in the set is one less, which is even. */
-        if (end == UV_MAX) {
-            i_e = len;
-        }
-        else {
-            i_e = len - 2;
-        }
-    }
-
-    /* We have dealt with appending, now see about prepending.  If the new
-     * range starts lower than the current lowest ... */
-    if (start < array[0]) {
-
-        /* Adding something which has 0 in it is somewhat tricky, and uncommon.
-         * Let the union code handle it, rather than having to know the
-         * trickiness in two code places.  */
-        if (UNLIKELY(start == 0)) {
-            SV* range_invlist;
-
-            range_invlist = _new_invlist(2);
-            _append_range_to_invlist(range_invlist, start, end);
-
-            _invlist_union(invlist, range_invlist, &invlist);
-
-            SvREFCNT_dec_NN(range_invlist);
-
-            return invlist;
-        }
-
-        /* If the whole new range comes before the first entry, and doesn't
-         * extend it, we have to insert it as an additional range */
-        if (end < array[0] - 1) {
-            i_s = i_e = -1;
-            goto splice_in_new_range;
-        }
-
-        /* Here the new range adjoins the existing first range, extending it
-         * downwards. */
-        array[0] = start;
-
-        /* And continue on below to handle the rest.  We know that the index of
-         * the beginning of the range is the first one of the array */
-        i_s = 0;
-    }
-    else { /* Not prepending any part of the new range to the existing list.
-            * Find where in the list it should go.  This finds i_s, such that:
-            *     invlist[i_s] <= start < array[i_s+1]
-            */
-        i_s = _invlist_search(invlist, start);
-    }
-
-    /* At this point, any extending before the beginning of the inversion list
-     * and/or after the end has been done.  This has made it so that, in the
-     * code below, each endpoint of the new range is either in a range that is
-     * in the set, or is in a gap between two ranges that are.  This means we
-     * don't have to worry about exceeding the array bounds.
-     *
-     * Find where in the list the new range ends (but we can skip this if we
-     * have already determined what it is, or if it will be the same as i_s,
-     * which we already have computed) */
-    if (i_e == 0) {
-        i_e = (start == end)
-              ? i_s
-              : _invlist_search(invlist, end);
-    }
-
-    /* Here generally invlist[i_e] <= end < array[i_e+1].  But if invlist[i_e]
-     * is a range that goes to infinity there is no element at invlist[i_e+1],
-     * so only the first relation holds. */
-
-    if ( ! ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
-
-        /* Here, the ranges on either side of the beginning of the new range
-         * are in the set, and this range starts in the gap between them.
-         *
-         * The new range extends the range above it downwards if the new range
-         * ends at or above that range's start */
-        const bool extends_the_range_above = (   end == UV_MAX
-                                              || end + 1 >= array[i_s+1]);
-
-        /* The new range extends the range below it upwards if it begins just
-         * after where that range ends */
-        if (start == array[i_s]) {
-
-            /* If the new range fills the entire gap between the other ranges,
-             * they will get merged together.  Other ranges may also get
-             * merged, depending on how many of them the new range spans.  In
-             * the general case, we do the merge later, just once, after we
-             * figure out how many to merge.  But in the case where the new
-             * range exactly spans just this one gap (possibly extending into
-             * the one above), we do the merge here, and an early exit.  This
-             * is done here to avoid having to special case later. */
-            if (i_e - i_s <= 1) {
-
-                /* If i_e - i_s == 1, it means that the new range terminates
-                 * within the range above, and hence 'extends_the_range_above'
-                 * must be true.  (If the range above it extends to infinity,
-                 * 'i_s+2' will be above the array's limit, but 'len-i_s-2'
-                 * will be 0, so no harm done.) */
-                if (extends_the_range_above) {
-                    Move(array + i_s + 2, array + i_s, len - i_s - 2, UV);
-                    invlist_set_len(invlist,
-                                    len - 2,
-                                    *(get_invlist_offset_addr(invlist)));
-                    return invlist;
-                }
-
-                /* Here, i_e must == i_s.  We keep them in sync, as they apply
-                 * to the same range, and below we are about to decrement i_s
-                 * */
-                i_e--;
-            }
-
-            /* Here, the new range is adjacent to the one below.  (It may also
-             * span beyond the range above, but that will get resolved later.)
-             * Extend the range below to include this one. */
-            array[i_s] = (end == UV_MAX) ? UV_MAX : end + 1;
-            i_s--;
-            start = array[i_s];
-        }
-        else if (extends_the_range_above) {
-
-            /* Here the new range only extends the range above it, but not the
-             * one below.  It merges with the one above.  Again, we keep i_e
-             * and i_s in sync if they point to the same range */
-            if (i_e == i_s) {
-                i_e++;
-            }
-            i_s++;
-            array[i_s] = start;
-        }
-    }
-
-    /* Here, we've dealt with the new range start extending any adjoining
-     * existing ranges.
-     *
-     * If the new range extends to infinity, it is now the final one,
-     * regardless of what was there before */
-    if (UNLIKELY(end == UV_MAX)) {
-        invlist_set_len(invlist, i_s + 1, *(get_invlist_offset_addr(invlist)));
-        return invlist;
-    }
-
-    /* If i_e started as == i_s, it has also been dealt with,
-     * and been updated to the new i_s, which will fail the following if */
-    if (! ELEMENT_RANGE_MATCHES_INVLIST(i_e)) {
-
-        /* Here, the ranges on either side of the end of the new range are in
-         * the set, and this range ends in the gap between them.
-         *
-         * If this range is adjacent to (hence extends) the range above it, it
-         * becomes part of that range; likewise if it extends the range below,
-         * it becomes part of that range */
-        if (end + 1 == array[i_e+1]) {
-            i_e++;
-            array[i_e] = start;
-        }
-        else if (start <= array[i_e]) {
-            array[i_e] = end + 1;
-            i_e--;
-        }
-    }
-
-    if (i_s == i_e) {
-
-        /* If the range fits entirely in an existing range (as possibly already
-         * extended above), it doesn't add anything new */
-        if (ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
-            return invlist;
-        }
-
-        /* Here, no part of the range is in the list.  Must add it.  It will
-         * occupy 2 more slots */
-      splice_in_new_range:
-
-        invlist_extend(invlist, len + 2);
-        array = invlist_array(invlist);
-        /* Move the rest of the array down two slots. Don't include any
-         * trailing NUL */
-        Move(array + i_e + 1, array + i_e + 3, len - i_e - 1, UV);
-
-        /* Do the actual splice */
-        array[i_e+1] = start;
-        array[i_e+2] = end + 1;
-        invlist_set_len(invlist, len + 2, *(get_invlist_offset_addr(invlist)));
-        return invlist;
-    }
-
-    /* Here the new range crossed the boundaries of a pre-existing range.  The
-     * code above has adjusted things so that both ends are in ranges that are
-     * in the set.  This means everything in between must also be in the set.
-     * Just squash things together */
-    Move(array + i_e + 1, array + i_s + 1, len - i_e - 1, UV);
-    invlist_set_len(invlist,
-                    len - i_e + i_s,
-                    *(get_invlist_offset_addr(invlist)));
-
-    return invlist;
-}
-
-SV*
-Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
-                                 UV** other_elements_ptr)
-{
-    /* Create and return an inversion list whose contents are to be populated
-     * by the caller.  The caller gives the number of elements (in 'size') and
-     * the very first element ('element0').  This function will set
-     * '*other_elements_ptr' to an array of UVs, where the remaining elements
-     * are to be placed.
-     *
-     * Obviously there is some trust involved that the caller will properly
-     * fill in the other elements of the array.
-     *
-     * (The first element needs to be passed in, as the underlying code does
-     * things differently depending on whether it is zero or non-zero) */
-
-    SV* invlist = _new_invlist(size);
-    bool offset;
-
-    PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
-
-    invlist = add_cp_to_invlist(invlist, element0);
-    offset = *get_invlist_offset_addr(invlist);
-
-    invlist_set_len(invlist, size, offset);
-    *other_elements_ptr = invlist_array(invlist) + 1;
-    return invlist;
-}
-
-#endif
-
-#ifndef PERL_IN_XSUB_RE
-void
-Perl__invlist_invert(pTHX_ SV* const invlist)
-{
-    /* Complement the input inversion list.  This adds a 0 if the list didn't
-     * have a zero; removes it otherwise.  As described above, the data
-     * structure is set up so that this is very efficient */
-
-    PERL_ARGS_ASSERT__INVLIST_INVERT;
-
-    assert(! invlist_is_iterating(invlist));
-
-    /* The inverse of matching nothing is matching everything */
-    if (_invlist_len(invlist) == 0) {
-        _append_range_to_invlist(invlist, 0, UV_MAX);
-        return;
-    }
-
-    *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
-}
-
-SV*
-Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
-{
-    /* Return a new inversion list that is a copy of the input one, which is
-     * unchanged.  The new list will not be mortal even if the old one was. */
-
-    const STRLEN nominal_length = _invlist_len(invlist);
-    const STRLEN physical_length = SvCUR(invlist);
-    const bool offset = *(get_invlist_offset_addr(invlist));
-
-    PERL_ARGS_ASSERT_INVLIST_CLONE;
-
-    if (new_invlist == NULL) {
-        new_invlist = _new_invlist(nominal_length);
-    }
-    else {
-        sv_upgrade(new_invlist, SVt_INVLIST);
-        initialize_invlist_guts(new_invlist, nominal_length);
-    }
-
-    *(get_invlist_offset_addr(new_invlist)) = offset;
-    invlist_set_len(new_invlist, nominal_length, offset);
-    Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
-
-    return new_invlist;
-}
-
-#endif
-
-PERL_STATIC_INLINE UV
-S_invlist_lowest(SV* const invlist)
-{
-    /* Returns the lowest code point that matches an inversion list.  This API
-     * has an ambiguity, as it returns 0 under either the lowest is actually
-     * 0, or if the list is empty.  If this distinction matters to you, check
-     * for emptiness before calling this function */
-
-    UV len = _invlist_len(invlist);
-    UV *array;
-
-    PERL_ARGS_ASSERT_INVLIST_LOWEST;
-
-    if (len == 0) {
-        return 0;
-    }
-
-    array = invlist_array(invlist);
-
-    return array[0];
-}
-
-STATIC SV *
-S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
-{
-    /* Get the contents of an inversion list into a string SV so that they can
-     * be printed out.  If 'traditional_style' is TRUE, it uses the format
-     * traditionally done for debug tracing; otherwise it uses a format
-     * suitable for just copying to the output, with blanks between ranges and
-     * a dash between range components */
-
-    UV start, end;
-    SV* output;
-    const char intra_range_delimiter = (traditional_style ? '\t' : '-');
-    const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
-
-    if (traditional_style) {
-        output = newSVpvs("\n");
-    }
-    else {
-        output = newSVpvs("");
-    }
-
-    PERL_ARGS_ASSERT_INVLIST_CONTENTS;
-
-    assert(! invlist_is_iterating(invlist));
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (end == UV_MAX) {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
-                                          start, intra_range_delimiter,
-                                                 inter_range_delimiter);
-        }
-        else if (end != start) {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
-                                          start,
-                                                   intra_range_delimiter,
-                                                  end, inter_range_delimiter);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
-                                          start, inter_range_delimiter);
-        }
-    }
-
-    if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
-        SvCUR_set(output, SvCUR(output) - 1);
-    }
-
-    return output;
-}
-
-#ifndef PERL_IN_XSUB_RE
-void
-Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
-                         const char * const indent, SV* const invlist)
-{
-    /* Designed to be called only by do_sv_dump().  Dumps out the ranges of the
-     * inversion list 'invlist' to 'file' at 'level'  Each line is prefixed by
-     * the string 'indent'.  The output looks like this:
-         [0] 0x000A .. 0x000D
-         [2] 0x0085
-         [4] 0x2028 .. 0x2029
-         [6] 0x3104 .. INFTY
-     * This means that the first range of code points matched by the list are
-     * 0xA through 0xD; the second range contains only the single code point
-     * 0x85, etc.  An inversion list is an array of UVs.  Two array elements
-     * are used to define each range (except if the final range extends to
-     * infinity, only a single element is needed).  The array index of the
-     * first element for the corresponding range is given in brackets. */
-
-    UV start, end;
-    STRLEN count = 0;
-
-    PERL_ARGS_ASSERT__INVLIST_DUMP;
-
-    if (invlist_is_iterating(invlist)) {
-        Perl_dump_indent(aTHX_ level, file,
-             "%sCan't dump inversion list because is in middle of iterating\n",
-             indent);
-        return;
-    }
-
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (end == UV_MAX) {
-            Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n",
-                                   indent, (UV)count, start);
-        }
-        else if (end != start) {
-            Perl_dump_indent(aTHX_ level, file,
-                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
-                                indent, (UV)count, start,         end);
-        }
-        else {
-            Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
-                                            indent, (UV)count, start);
-        }
-        count += 2;
-    }
-}
-
-#endif
-
-#if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
-bool
-Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
-{
-    /* Return a boolean as to if the two passed in inversion lists are
-     * identical.  The final argument, if TRUE, says to take the complement of
-     * the second inversion list before doing the comparison */
-
-    const UV len_a = _invlist_len(a);
-    UV len_b = _invlist_len(b);
-
-    const UV* array_a = NULL;
-    const UV* array_b = NULL;
-
-    PERL_ARGS_ASSERT__INVLISTEQ;
-
-    /* This code avoids accessing the arrays unless it knows the length is
-     * non-zero */
-
-    if (len_a == 0) {
-        if (len_b == 0) {
-            return ! complement_b;
-        }
-    }
-    else {
-        array_a = invlist_array(a);
-    }
-
-    if (len_b != 0) {
-        array_b = invlist_array(b);
-    }
-
-    /* If are to compare 'a' with the complement of b, set it
-     * up so are looking at b's complement. */
-    if (complement_b) {
-
-        /* The complement of nothing is everything, so <a> would have to have
-         * just one element, starting at zero (ending at infinity) */
-        if (len_b == 0) {
-            return (len_a == 1 && array_a[0] == 0);
-        }
-        if (array_b[0] == 0) {
-
-            /* Otherwise, to complement, we invert.  Here, the first element is
-             * 0, just remove it.  To do this, we just pretend the array starts
-             * one later */
-
-            array_b++;
-            len_b--;
-        }
-        else {
-
-            /* But if the first element is not zero, we pretend the list starts
-             * at the 0 that is always stored immediately before the array. */
-            array_b--;
-            len_b++;
-        }
-    }
-
-    return    len_a == len_b
-           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
-
-}
-#endif
-
-/*
- * As best we can, determine the characters that can match the start of
- * the given EXACTF-ish node.  This is for use in creating ssc nodes, so there
- * can be false positive matches
- *
- * Returns the invlist as a new SV*; it is the caller's responsibility to
- * call SvREFCNT_dec() when done with it.
- */
-STATIC SV*
-S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
-{
-    const U8 * s = (U8*)STRING(node);
-    SSize_t bytelen = STR_LEN(node);
-    UV uc;
-    /* Start out big enough for 2 separate code points */
-    SV* invlist = _new_invlist(4);
-
-    PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST;
-
-    if (! UTF) {
-        uc = *s;
-
-        /* We punt and assume can match anything if the node begins
-         * with a multi-character fold.  Things are complicated.  For
-         * example, /ffi/i could match any of:
-         *  "\N{LATIN SMALL LIGATURE FFI}"
-         *  "\N{LATIN SMALL LIGATURE FF}I"
-         *  "F\N{LATIN SMALL LIGATURE FI}"
-         *  plus several other things; and making sure we have all the
-         *  possibilities is hard. */
-        if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + bytelen)) {
-            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else {
-            /* Any Latin1 range character can potentially match any
-             * other depending on the locale, and in Turkic locales, 'I' and
-             * 'i' can match U+130 and U+131 */
-            if (OP(node) == EXACTFL) {
-                _invlist_union(invlist, PL_Latin1, &invlist);
-                if (isALPHA_FOLD_EQ(uc, 'I')) {
-                    invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-                    invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-                }
-            }
-            else {
-                /* But otherwise, it matches at least itself.  We can
-                 * quickly tell if it has a distinct fold, and if so,
-                 * it matches that as well */
-                invlist = add_cp_to_invlist(invlist, uc);
-                if (IS_IN_SOME_FOLD_L1(uc))
-                    invlist = add_cp_to_invlist(invlist, PL_fold_latin1[uc]);
-            }
-
-            /* Some characters match above-Latin1 ones under /i.  This
-             * is true of EXACTFL ones when the locale is UTF-8 */
-            if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
-                && (! isASCII(uc) || ! inRANGE(OP(node), EXACTFAA,
-                                                         EXACTFAA_NO_TRIE)))
-            {
-                add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
-            }
-        }
-    }
-    else {  /* Pattern is UTF-8 */
-        U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
-        const U8* e = s + bytelen;
-        IV fc;
-
-        fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
-
-        /* The only code points that aren't folded in a UTF EXACTFish
-         * node are the problematic ones in EXACTFL nodes */
-        if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
-            /* We need to check for the possibility that this EXACTFL
-             * node begins with a multi-char fold.  Therefore we fold
-             * the first few characters of it so that we can make that
-             * check */
-            U8 *d = folded;
-            int i;
-
-            fc = -1;
-            for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
-                if (isASCII(*s)) {
-                    *(d++) = (U8) toFOLD(*s);
-                    if (fc < 0) {       /* Save the first fold */
-                        fc = *(d-1);
-                    }
-                    s++;
-                }
-                else {
-                    STRLEN len;
-                    UV fold = toFOLD_utf8_safe(s, e, d, &len);
-                    if (fc < 0) {       /* Save the first fold */
-                        fc = fold;
-                    }
-                    d += len;
-                    s += UTF8SKIP(s);
-                }
-            }
-
-            /* And set up so the code below that looks in this folded
-             * buffer instead of the node's string */
-            e = d;
-            s = folded;
-        }
-
-        /* When we reach here 's' points to the fold of the first
-         * character(s) of the node; and 'e' points to far enough along
-         * the folded string to be just past any possible multi-char
-         * fold.
-         *
-         * Like the non-UTF case above, we punt if the node begins with a
-         * multi-char fold  */
-
-        if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
-            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
-        }
-        else {  /* Single char fold */
-            unsigned int k;
-            U32 first_fold;
-            const U32 * remaining_folds;
-            Size_t folds_count;
-
-            /* It matches itself */
-            invlist = add_cp_to_invlist(invlist, fc);
-
-            /* ... plus all the things that fold to it, which are found in
-             * PL_utf8_foldclosures */
-            folds_count = _inverse_folds(fc, &first_fold,
-                                                &remaining_folds);
-            for (k = 0; k < folds_count; k++) {
-                UV c = (k == 0) ? first_fold : remaining_folds[k-1];
-
-                /* /aa doesn't allow folds between ASCII and non- */
-                if (   inRANGE(OP(node), EXACTFAA, EXACTFAA_NO_TRIE)
-                    && isASCII(c) != isASCII(fc))
-                {
-                    continue;
-                }
-
-                invlist = add_cp_to_invlist(invlist, c);
-            }
-
-            if (OP(node) == EXACTFL) {
-
-                /* If either [iI] are present in an EXACTFL node the above code
-                 * should have added its normal case pair, but under a Turkish
-                 * locale they could match instead the case pairs from it.  Add
-                 * those as potential matches as well */
-                if (isALPHA_FOLD_EQ(fc, 'I')) {
-                    invlist = add_cp_to_invlist(invlist,
-                                                LATIN_SMALL_LETTER_DOTLESS_I);
-                    invlist = add_cp_to_invlist(invlist,
-                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-                }
-                else if (fc == LATIN_SMALL_LETTER_DOTLESS_I) {
-                    invlist = add_cp_to_invlist(invlist, 'I');
-                }
-                else if (fc == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
-                    invlist = add_cp_to_invlist(invlist, 'i');
-                }
-            }
-        }
-    }
-
-    return invlist;
-}
-
-#undef HEADER_LENGTH
-#undef TO_INTERNAL_SIZE
-#undef FROM_INTERNAL_SIZE
-#undef INVLIST_VERSION_ID
-
-/* End of inversion list object */
 
 STATIC void
 S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
@@ -11451,12 +2669,6 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  * is a trifle forced, but the need to tie the tails of the branches to what
  * follows makes it hard to avoid.
  */
-#define REGTAIL(x,y,z) regtail((x),(y),(z),depth+1)
-#ifdef DEBUGGING
-#define REGTAIL_STUDY(x,y,z) regtail_study((x),(y),(z),depth+1)
-#else
-#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
-#endif
 
 STATIC regnode_offset
 S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
@@ -11484,7 +2696,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     }
 
     if (sv_dat) {
-        num = add_data( pRExC_state, STR_WITH_LEN("S"));
+        num = reg_add_data( pRExC_state, STR_WITH_LEN("S"));
         RExC_rxi->data->data[num]=(void*)sv_dat;
         SvREFCNT_inc_simple_void_NN(sv_dat);
     }
@@ -11708,7 +2920,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
         vFAIL("Too many nested open parens");
     }
 
-    *flagp = 0;                         /* Initialize. */
+    *flagp = 0;				/* Initialize. */
 
     /* Having this true makes it feasible to have a lot fewer tests for the
      * parse pointer being in scope.  For example, we can write
@@ -12034,7 +3246,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RExC_seen |= REG_VERBARG_SEEN;
             if (start_arg) {
                 SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
-                ARG(REGNODE_p(ret)) = add_data( pRExC_state,
+                ARG(REGNODE_p(ret)) = reg_add_data( pRExC_state,
                                         STR_WITH_LEN("S"));
                 RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
                 FLAGS(REGNODE_p(ret)) = 1;
@@ -12066,10 +3278,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             if (RExC_parse > RExC_end) {
                 paren = '\0';
             }
-            ret = 0;                    /* For look-ahead/behind. */
+            ret = 0;			/* For look-ahead/behind. */
             switch (paren) {
 
-            case 'P':   /* (?P...) variants for those used to PCRE/Python */
+            case 'P':	/* (?P...) variants for those used to PCRE/Python */
                 paren = *RExC_parse;
                 if ( paren == '<') {    /* (?P<...>) named capture */
                     RExC_parse_inc_by(1);
@@ -12412,13 +3624,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 RExC_parse_set(RExC_start + cb->end);
                 o = cb->block;
                 if (cb->src_regex) {
-                    n = add_data(pRExC_state, STR_WITH_LEN("rl"));
+                    n = reg_add_data(pRExC_state, STR_WITH_LEN("rl"));
                     RExC_rxi->data->data[n] =
                         (void*)SvREFCNT_inc((SV*)cb->src_regex);
                     RExC_rxi->data->data[n+1] = (void*)o;
                 }
                 else {
-                    n = add_data(pRExC_state,
+                    n = reg_add_data(pRExC_state,
                             (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
                     RExC_rxi->data->data[n] = (void*)o;
                 }
@@ -12511,7 +3723,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     }
                     RExC_parse_inc_by(1);
                     if (sv_dat) {
-                        num = add_data( pRExC_state, STR_WITH_LEN("S"));
+                        num = reg_add_data( pRExC_state, STR_WITH_LEN("S"));
                         RExC_rxi->data->data[num]=(void*)sv_dat;
                         SvREFCNT_inc_simple_void_NN(sv_dat);
                     }
@@ -12783,12 +3995,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     else if (paren == ':') {
         *flagp |= flags&SIMPLE;
     }
-    if (is_open) {                              /* Starts with OPEN. */
+    if (is_open) {				/* Starts with OPEN. */
         if (! REGTAIL(pRExC_state, ret, br)) {  /* OPEN -> first. */
             REQUIRE_BRANCHJ(flagp, 0);
         }
     }
-    else if (paren != '?')              /* Not Conditional */
+    else if (paren != '?')		/* Not Conditional */
         ret = br;
     *flagp |= flags & (HASWIDTH | POSTPONED);
     lastbr = br;
@@ -13005,7 +4217,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             vFAIL("Unmatched )");
         }
         else
-            FAIL("Junk on end of regexp");      /* "Can't happen". */
+            FAIL("Junk on end of regexp");	/* "Can't happen". */
         NOT_REACHED; /* NOTREACHED */
     }
 
@@ -13052,7 +4264,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         }
     }
 
-    *flagp = 0;                 /* Initialize. */
+    *flagp = 0;			/* Initialize. */
 
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                             FALSE /* Don't force to /x */ );
@@ -13082,7 +4294,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         chain = latest;
         c++;
     }
-    if (chain == 0) {   /* Loop ran zero times. */
+    if (chain == 0) {	/* Loop ran zero times. */
         chain = reg_node(pRExC_state, NOTHING);
         if (ret == 0)
             ret = chain;
@@ -14004,14 +5216,6 @@ S_backref_value(char *p, char *e)
     return I32_MAX;
 }
 
-#ifdef DEBUGGING
-#define REGNODE_GUTS(state,op,extra_size) \
-    regnode_guts_debug(state,op,extra_size)
-#else
-#define REGNODE_GUTS(state,op,extra_size) \
-    regnode_guts(state,extra_size)
-#endif
-
 
 /*
  - regatom - the lowest level
@@ -14093,7 +5297,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
     DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
-    *flagp = 0;         /* Initialize. */
+    *flagp = 0;		/* Initialize. */
 
     DEBUG_PARSE("atom");
 
@@ -14249,7 +5453,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else {
                 ret = reg_node(pRExC_state, SEOL);
             }
-            RExC_seen_zerolen++;                /* Do not optimize RE away */
+            RExC_seen_zerolen++;		/* Do not optimize RE away */
             goto finish_meta_pat;
         case 'z':
             if (RExC_pm_flags & PMf_WILDCARD) {
@@ -14259,7 +5463,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else {
                 ret = reg_node(pRExC_state, EOS);
             }
-            RExC_seen_zerolen++;                /* Do not optimize RE away */
+            RExC_seen_zerolen++;		/* Do not optimize RE away */
             goto finish_meta_pat;
         case 'C':
             vFAIL("\\C no longer supported");
@@ -16046,8 +7250,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 }
 
 
-STATIC void
-S_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
+void
+Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
 {
     /* Uses the inversion list '*invlist_ptr' to populate the ANYOF 'node'.  It
      * sets up the bitmap and any flags, removing those code points from the
@@ -16962,7 +8166,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
     /* Handle the (?[...]) construct to do set operations */
 
     U8 curchar;                     /* Current character being parsed */
-    UV start, end;                  /* End points of code point ranges */
+    UV start, end;	            /* End points of code point ranges */
     SV* final = NULL;               /* The end result inversion list */
     SV* result_string;              /* 'final' stringified */
     AV* stack;                      /* stack of operators and operands not yet
@@ -17698,8 +8902,8 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
 #undef IS_OPERATOR
 #undef IS_OPERAND
 
-STATIC void
-S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
+void
+Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
 {
     /* This adds the Latin1/above-Latin1 folding rules.
      *
@@ -18073,7 +9277,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     assert(RExC_parse <= RExC_end);
 
-    if (UCHARAT(RExC_parse) == '^') {   /* Complement the class */
+    if (UCHARAT(RExC_parse) == '^') {	/* Complement the class */
         RExC_parse_inc_by(1);
         invert = TRUE;
         allow_mutiple_chars = FALSE;
@@ -18236,16 +9440,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 U32 packed_warn;
                 U8 grok_c_char;
 
-            case 'w':   namedclass = ANYOF_WORDCHAR;    break;
-            case 'W':   namedclass = ANYOF_NWORDCHAR;   break;
-            case 's':   namedclass = ANYOF_SPACE;       break;
-            case 'S':   namedclass = ANYOF_NSPACE;      break;
-            case 'd':   namedclass = ANYOF_DIGIT;       break;
-            case 'D':   namedclass = ANYOF_NDIGIT;      break;
-            case 'v':   namedclass = ANYOF_VERTWS;      break;
-            case 'V':   namedclass = ANYOF_NVERTWS;     break;
-            case 'h':   namedclass = ANYOF_HORIZWS;     break;
-            case 'H':   namedclass = ANYOF_NHORIZWS;    break;
+            case 'w':	namedclass = ANYOF_WORDCHAR;	break;
+            case 'W':	namedclass = ANYOF_NWORDCHAR;	break;
+            case 's':	namedclass = ANYOF_SPACE;	break;
+            case 'S':	namedclass = ANYOF_NSPACE;	break;
+            case 'd':	namedclass = ANYOF_DIGIT;	break;
+            case 'D':	namedclass = ANYOF_NDIGIT;	break;
+            case 'v':	namedclass = ANYOF_VERTWS;	break;
+            case 'V':	namedclass = ANYOF_NVERTWS;	break;
+            case 'h':	namedclass = ANYOF_HORIZWS;	break;
+            case 'H':	namedclass = ANYOF_NHORIZWS;	break;
             case 'N':  /* Handle \N{NAME} in class */
                 {
                     const char * const backslash_N_beg = RExC_parse - 2;
@@ -18544,15 +9748,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                 named */
                 }
                 break;
-            case 'n':   value = '\n';                   break;
-            case 'r':   value = '\r';                   break;
-            case 't':   value = '\t';                   break;
-            case 'f':   value = '\f';                   break;
-            case 'b':   value = '\b';                   break;
-            case 'e':   value = ESC_NATIVE;             break;
-            case 'a':   value = '\a';                   break;
+            case 'n':	value = '\n';			break;
+            case 'r':	value = '\r';			break;
+            case 't':	value = '\t';			break;
+            case 'f':	value = '\f';			break;
+            case 'b':	value = '\b';			break;
+            case 'e':	value = ESC_NATIVE;             break;
+            case 'a':	value = '\a';                   break;
             case 'o':
-                RExC_parse--;   /* function expects to be pointed at the 'o' */
+                RExC_parse--;	/* function expects to be pointed at the 'o' */
                 if (! grok_bslash_o(&RExC_parse,
                                             RExC_end,
                                             &value,
@@ -18574,7 +9778,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
                 break;
             case 'x':
-                RExC_parse--;   /* function expects to be pointed at the 'x' */
+                RExC_parse--;	/* function expects to be pointed at the 'x' */
                 if (!  grok_bslash_x(&RExC_parse,
                                             RExC_end,
                                             &value,
@@ -18886,8 +10090,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         cp_list = add_cp_to_invlist(cp_list, '-');
                         element_count++;
                     } else
-                        range = 1;      /* yeah, it's a range! */
-                    continue;   /* but do it the next time */
+                        range = 1;	/* yeah, it's a range! */
+                    continue;	/* but do it the next time */
                 }
             }
         }
@@ -19277,7 +10481,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * ones already on the list */
     if (cp_foldable_list) {
         if (FOLD) {
-            UV start, end;      /* End points of code point ranges */
+            UV start, end;	/* End points of code point ranges */
 
             SV* fold_intersection = NULL;
             SV** use_list;
@@ -20711,8 +11915,8 @@ S_optimize_regclass(pTHX_
 
 #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
 
-STATIC void
-S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
+void
+Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
                 regnode* const node,
                 SV* const cp_list,
                 SV* const runtime_defns,
@@ -20724,7 +11928,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
      *      1) ANYOF_MATCHES_NONE_OUTSIDE_BITMAP_VALUE
      *      2) ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE
      *
-     * Otherwise, it sets the argument to the count returned by add_data(),
+     * Otherwise, it sets the argument to the count returned by reg_add_data(),
      * having allocated and stored an array, av, as follows:
      *  av[0] stores the inversion list defining this class as far as known at
      *        this time, or PL_sv_undef if nothing definite is now known.
@@ -20863,7 +12067,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
     }
 
     rv = newRV_noinc(MUTABLE_SV(av));
-    n = add_data(pRExC_state, STR_WITH_LEN("s"));
+    n = reg_add_data(pRExC_state, STR_WITH_LEN("s"));
     RExC_rxi->data->data[n] = (void*)rv;
     ARG_SET(node, n);
 }
@@ -21437,7 +12641,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
         StructCopy(--src, --dst, regnode);
     }
 
-    place = REGNODE_p(operand); /* Op node, where operand used to be. */
+    place = REGNODE_p(operand);	/* Op node, where operand used to be. */
     src = place + 1; /* NOT REGNODE_AFTER! */
     FLAGS(place) = 0;
     FILL_NODE(operand, op);
@@ -21548,7 +12752,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         regnode * const temp = regnext(REGNODE_p(scan));
 #ifdef EXPERIMENTAL_INPLACESCAN
         if (REGNODE_TYPE(OP(REGNODE_p(scan))) == EXACT) {
-            bool unfolded_multi_char;   /* Unexamined in this routine */
+            bool unfolded_multi_char;	/* Unexamined in this routine */
             if (join_exact(pRExC_state, scan, &min,
                            &unfolded_multi_char, 1, REGNODE_p(val), depth+1))
                 return TRUE; /* Was return EXACT */
@@ -21606,8 +12810,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 }
 #endif
 
-STATIC SV*
-S_get_ANYOFM_contents(pTHX_ const regnode * n) {
+SV*
+Perl_get_ANYOFM_contents(pTHX_ const regnode * n) {
 
     /* Returns an inversion list of all the code points matched by the
      * ANYOFM/NANYOFM node 'n' */
@@ -21639,8 +12843,8 @@ S_get_ANYOFM_contents(pTHX_ const regnode * n) {
     return cp_list;
 }
 
-STATIC SV *
-S_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
+SV *
+Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
     PERL_ARGS_ASSERT_GET_ANYOFHBBM_CONTENTS;
 
     SV * cp_list = NULL;
@@ -21655,716 +12859,11 @@ S_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
     return cp_list;
 }
 
-/*
- - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
- */
-#ifdef DEBUGGING
-
-static void
-S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
-{
-    int bit;
-    int set=0;
-
-    ASSUME(REG_INTFLAGS_NAME_SIZE <= sizeof(flags)*8);
-
-    for (bit=0; bit<REG_INTFLAGS_NAME_SIZE; bit++) {
-        if (flags & (1<<bit)) {
-            if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s", lead);
-            Perl_re_printf( aTHX_  "%s ", PL_reg_intflags_name[bit]);
-        }
-    }
-    if (lead)  {
-        if (set)
-            Perl_re_printf( aTHX_  "\n");
-        else
-            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
-    }
-}
-
-static void
-S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
-{
-    int bit;
-    int set=0;
-    regex_charset cs;
-
-    ASSUME(REG_EXTFLAGS_NAME_SIZE <= sizeof(flags)*8);
-
-    for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) {
-        if (flags & (1U<<bit)) {
-            if ((1U<<bit) & RXf_PMf_CHARSET) {  /* Output separately, below */
-                continue;
-            }
-            if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s", lead);
-            Perl_re_printf( aTHX_  "%s ", PL_reg_extflags_name[bit]);
-        }
-    }
-    if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
-            if (!set++ && lead) {
-                Perl_re_printf( aTHX_  "%s", lead);
-            }
-            switch (cs) {
-                case REGEX_UNICODE_CHARSET:
-                    Perl_re_printf( aTHX_  "UNICODE");
-                    break;
-                case REGEX_LOCALE_CHARSET:
-                    Perl_re_printf( aTHX_  "LOCALE");
-                    break;
-                case REGEX_ASCII_RESTRICTED_CHARSET:
-                    Perl_re_printf( aTHX_  "ASCII-RESTRICTED");
-                    break;
-                case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                    Perl_re_printf( aTHX_  "ASCII-MORE_RESTRICTED");
-                    break;
-                default:
-                    Perl_re_printf( aTHX_  "UNKNOWN CHARACTER SET");
-                    break;
-            }
-    }
-    if (lead)  {
-        if (set)
-            Perl_re_printf( aTHX_  "\n");
-        else
-            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
-    }
-}
-#endif
-
-void
-Perl_regdump(pTHX_ const regexp *r)
-{
-#ifdef DEBUGGING
-    int i;
-    SV * const sv = sv_newmortal();
-    SV *dsv= sv_newmortal();
-    RXi_GET_DECL(r, ri);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REGDUMP;
-
-    (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
-
-    /* Header fields of interest. */
-    for (i = 0; i < 2; i++) {
-        if (r->substrs->data[i].substr) {
-            RE_PV_QUOTED_DECL(s, 0, dsv,
-                            SvPVX_const(r->substrs->data[i].substr),
-                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
-                            PL_dump_re_max_len);
-            Perl_re_printf( aTHX_
-                          "%s %s%s at %" IVdf "..%" UVuf " ",
-                          i ? "floating" : "anchored",
-                          s,
-                          RE_SV_TAIL(r->substrs->data[i].substr),
-                          (IV)r->substrs->data[i].min_offset,
-                          (UV)r->substrs->data[i].max_offset);
-        }
-        else if (r->substrs->data[i].utf8_substr) {
-            RE_PV_QUOTED_DECL(s, 1, dsv,
-                            SvPVX_const(r->substrs->data[i].utf8_substr),
-                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
-                            30);
-            Perl_re_printf( aTHX_
-                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
-                          i ? "floating" : "anchored",
-                          s,
-                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
-                          (IV)r->substrs->data[i].min_offset,
-                          (UV)r->substrs->data[i].max_offset);
-        }
-    }
-
-    if (r->check_substr || r->check_utf8)
-        Perl_re_printf( aTHX_
-                      (const char *)
-                      (   r->check_substr == r->substrs->data[1].substr
-                       && r->check_utf8   == r->substrs->data[1].utf8_substr
-                       ? "(checking floating" : "(checking anchored"));
-    if (r->intflags & PREGf_NOSCAN)
-        Perl_re_printf( aTHX_  " noscan");
-    if (r->extflags & RXf_CHECK_ALL)
-        Perl_re_printf( aTHX_  " isall");
-    if (r->check_substr || r->check_utf8)
-        Perl_re_printf( aTHX_  ") ");
-
-    if (ri->regstclass) {
-        regprop(r, sv, ri->regstclass, NULL, NULL);
-        Perl_re_printf( aTHX_  "stclass %s ", SvPVX_const(sv));
-    }
-    if (r->intflags & PREGf_ANCH) {
-        Perl_re_printf( aTHX_  "anchored");
-        if (r->intflags & PREGf_ANCH_MBOL)
-            Perl_re_printf( aTHX_  "(MBOL)");
-        if (r->intflags & PREGf_ANCH_SBOL)
-            Perl_re_printf( aTHX_  "(SBOL)");
-        if (r->intflags & PREGf_ANCH_GPOS)
-            Perl_re_printf( aTHX_  "(GPOS)");
-        Perl_re_printf( aTHX_ " ");
-    }
-    if (r->intflags & PREGf_GPOS_SEEN)
-        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
-    if (r->intflags & PREGf_SKIP)
-        Perl_re_printf( aTHX_  "plus ");
-    if (r->intflags & PREGf_IMPLICIT)
-        Perl_re_printf( aTHX_  "implicit ");
-    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
-    if (r->extflags & RXf_EVAL_SEEN)
-        Perl_re_printf( aTHX_  "with eval ");
-    Perl_re_printf( aTHX_  "\n");
-    DEBUG_FLAGS_r({
-        regdump_extflags("r->extflags: ", r->extflags);
-        regdump_intflags("r->intflags: ", r->intflags);
-    });
-#else
-    PERL_ARGS_ASSERT_REGDUMP;
-    PERL_UNUSED_CONTEXT;
-    PERL_UNUSED_ARG(r);
-#endif  /* DEBUGGING */
-}
-
-/* Should be synchronized with ANYOF_ #defines in regcomp.h */
-#ifdef DEBUGGING
-
-#  if   CC_WORDCHAR_ != 0 || CC_DIGIT_ != 1        || CC_ALPHA_ != 2    \
-     || CC_LOWER_ != 3    || CC_UPPER_ != 4        || CC_PUNCT_ != 5    \
-     || CC_PRINT_ != 6    || CC_ALPHANUMERIC_ != 7 || CC_GRAPH_ != 8    \
-     || CC_CASED_ != 9    || CC_SPACE_ != 10       || CC_BLANK_ != 11   \
-     || CC_XDIGIT_ != 12  || CC_CNTRL_ != 13       || CC_ASCII_ != 14   \
-     || CC_VERTSPACE_ != 15
-#   error Need to adjust order of anyofs[]
-#  endif
-static const char * const anyofs[] = {
-    "\\w",
-    "\\W",
-    "\\d",
-    "\\D",
-    "[:alpha:]",
-    "[:^alpha:]",
-    "[:lower:]",
-    "[:^lower:]",
-    "[:upper:]",
-    "[:^upper:]",
-    "[:punct:]",
-    "[:^punct:]",
-    "[:print:]",
-    "[:^print:]",
-    "[:alnum:]",
-    "[:^alnum:]",
-    "[:graph:]",
-    "[:^graph:]",
-    "[:cased:]",
-    "[:^cased:]",
-    "\\s",
-    "\\S",
-    "[:blank:]",
-    "[:^blank:]",
-    "[:xdigit:]",
-    "[:^xdigit:]",
-    "[:cntrl:]",
-    "[:^cntrl:]",
-    "[:ascii:]",
-    "[:^ascii:]",
-    "\\v",
-    "\\V"
-};
-#endif
-
-/*
-- regprop - printable representation of opcode, with run time support
-*/
-
-void
-Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
-{
-#ifdef DEBUGGING
-    U8 k;
-    const U8 op = OP(o);
-    RXi_GET_DECL(prog, progi);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_REGPROP;
-
-    SvPVCLEAR(sv);
-
-    if (op > REGNODE_MAX) {          /* regnode.type is unsigned */
-        if (pRExC_state) {  /* This gives more info, if we have it */
-            FAIL3("panic: corrupted regexp opcode %d > %d",
-                  (int)op, (int)REGNODE_MAX);
-        }
-        else {
-            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
-                             (int)op, (int)REGNODE_MAX);
-        }
-    }
-    sv_catpv(sv, REGNODE_NAME(op)); /* Take off const! */
-
-    k = REGNODE_TYPE(op);
-
-    if (k == EXACT) {
-        sv_catpvs(sv, " ");
-        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
-         * is a crude hack but it may be the best for now since
-         * we have no flag "this EXACTish node was UTF-8"
-         * --jhi */
-        pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
-                  PL_colors[0], PL_colors[1],
-                  PERL_PV_ESCAPE_UNI_DETECT |
-                  PERL_PV_ESCAPE_NONASCII   |
-                  PERL_PV_PRETTY_ELLIPSES   |
-                  PERL_PV_PRETTY_LTGT       |
-                  PERL_PV_PRETTY_NOCLEAR
-                  );
-    } else if (k == TRIE) {
-        /* print the details of the trie in dumpuntil instead, as
-         * progi->data isn't available here */
-        const U32 n = ARG(o);
-        const reg_ac_data * const ac = IS_TRIE_AC(op) ?
-               (reg_ac_data *)progi->data->data[n] :
-               NULL;
-        const reg_trie_data * const trie
-            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
-
-        Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags));
-        DEBUG_TRIE_COMPILE_r({
-          if (trie->jump)
-            sv_catpvs(sv, "(JUMP)");
-          Perl_sv_catpvf(aTHX_ sv,
-            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
-            (UV)trie->startstate,
-            (IV)trie->statecount-1, /* -1 because of the unused 0 element */
-            (UV)trie->wordcount,
-            (UV)trie->minlen,
-            (UV)trie->maxlen,
-            (UV)TRIE_CHARCOUNT(trie),
-            (UV)trie->uniquecharcount
-          );
-        });
-        if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
-            sv_catpvs(sv, "[");
-            (void) put_charclass_bitmap_innards(sv,
-                                                ((IS_ANYOF_TRIE(op))
-                                                 ? ANYOF_BITMAP(o)
-                                                 : TRIE_BITMAP(trie)),
-                                                NULL,
-                                                NULL,
-                                                NULL,
-                                                0,
-                                                FALSE
-                                               );
-            sv_catpvs(sv, "]");
-        }
-    } else if (k == CURLY) {
-        U32 lo = ARG1(o), hi = ARG2(o);
-        if (op == CURLYM || op == CURLYN || op == CURLYX)
-            Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
-        Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
-        if (hi == REG_INFTY)
-            sv_catpvs(sv, "INFTY");
-        else
-            Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi);
-        sv_catpvs(sv, "}");
-    }
-    else if (k == WHILEM && o->flags)                   /* Ordinal/of */
-        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
-    else if (k == REF || k == OPEN || k == CLOSE
-             || k == GROUPP || op == ACCEPT)
-    {
-        AV *name_list= NULL;
-        U32 parno= (op == ACCEPT)              ? (U32)ARG2L(o) :
-                   (op == OPEN || op == CLOSE) ? (U32)PARNO(o) :
-                                                 (U32)ARG(o);
-        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
-        if ( RXp_PAREN_NAMES(prog) ) {
-            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
-        } else if ( pRExC_state ) {
-            name_list= RExC_paren_name_list;
-        }
-        if ( name_list ) {
-            if ( k != REF || (op < REFN)) {
-                SV **name= av_fetch_simple(name_list, parno, 0 );
-                if (name)
-                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-            }
-            else
-            if (parno > 0) {
-                /* parno must always be larger than 0 for this block
-                 * as it represents a slot into the data array, which
-                 * has the 0 slot reserved for a placeholder so any valid
-                 * index into it is always true, eg non-zero
-                 * see the '%' "what" type and the implementation of
-                 * S_add_data()
-                 */
-                SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
-                I32 *nums=(I32*)SvPVX(sv_dat);
-                SV **name= av_fetch_simple(name_list, nums[0], 0 );
-                I32 n;
-                if (name) {
-                    for ( n=0; n<SvIVX(sv_dat); n++ ) {
-                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
-                                    (n ? "," : ""), (IV)nums[n]);
-                    }
-                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-                }
-            }
-        }
-        if ( k == REF && reginfo) {
-            U32 n = ARG(o);  /* which paren pair */
-            I32 ln = prog->offs[n].start;
-            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
-                Perl_sv_catpvf(aTHX_ sv, ": FAIL");
-            else if (ln == prog->offs[n].end)
-                Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
-            else {
-                const char *s = reginfo->strbeg + ln;
-                Perl_sv_catpvf(aTHX_ sv, ": ");
-                Perl_pv_pretty( aTHX_ sv, s, prog->offs[n].end - prog->offs[n].start, 32, 0, 0,
-                    PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
-            }
-        }
-    } else if (k == GOSUB) {
-        AV *name_list= NULL;
-        if ( RXp_PAREN_NAMES(prog) ) {
-            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
-        } else if ( pRExC_state ) {
-            name_list= RExC_paren_name_list;
-        }
-
-        /* Paren and offset */
-        Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
-                (int)((o + (int)ARG2L(o)) - progi->program) );
-        if (name_list) {
-            SV **name= av_fetch_simple(name_list, ARG(o), 0 );
-            if (name)
-                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
-        }
-    }
-    else if (k == LOGICAL)
-        /* 2: embedded, otherwise 1 */
-        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
-    else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
-        U8 flags;
-        char * bitmap;
-        U8 do_sep = 0;    /* Do we need to separate various components of the
-                             output? */
-        /* Set if there is still an unresolved user-defined property */
-        SV *unresolved                = NULL;
-
-        /* Things that are ignored except when the runtime locale is UTF-8 */
-        SV *only_utf8_locale_invlist = NULL;
-
-        /* Code points that don't fit in the bitmap */
-        SV *nonbitmap_invlist = NULL;
-
-        /* And things that aren't in the bitmap, but are small enough to be */
-        SV* bitmap_range_not_in_bitmap = NULL;
-
-        bool inverted;
-
-        if (k != ANYOF) {
-            flags = 0;
-            bitmap = NULL;
-        }
-        else {
-            flags = ANYOF_FLAGS(o);
-            bitmap = ANYOF_BITMAP(o);
-        }
-
-        if (op == ANYOFL || op == ANYOFPOSIXL) {
-            if ((flags & ANYOFL_UTF8_LOCALE_REQD)) {
-                sv_catpvs(sv, "{utf8-locale-reqd}");
-            }
-            if (flags & ANYOFL_FOLD) {
-                sv_catpvs(sv, "{i}");
-            }
-        }
-
-        inverted = flags & ANYOF_INVERT;
-
-        /* If there is stuff outside the bitmap, get it */
-        if (k == ANYOFR) {
-
-            /* For a single range, split into the parts inside vs outside the
-             * bitmap. */
-            UV start = ANYOFRbase(o);
-            UV end   = ANYOFRbase(o) + ANYOFRdelta(o);
-
-            if (start < NUM_ANYOF_CODE_POINTS) {
-                if (end < NUM_ANYOF_CODE_POINTS) {
-                    bitmap_range_not_in_bitmap
-                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
-                                                  start, end);
-                }
-                else {
-                    bitmap_range_not_in_bitmap
-                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
-                                                  start, NUM_ANYOF_CODE_POINTS);
-                    start = NUM_ANYOF_CODE_POINTS;
-                }
-            }
-
-            if (start >= NUM_ANYOF_CODE_POINTS) {
-                nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
-                                                ANYOFRbase(o),
-                                                ANYOFRbase(o) + ANYOFRdelta(o));
-            }
-        }
-        else if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(o)) {
-            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
-                                                      NUM_ANYOF_CODE_POINTS,
-                                                      UV_MAX);
-        }
-        else if (ANYOF_HAS_AUX(o)) {
-                (void) GET_REGCLASS_AUX_DATA(prog, o, FALSE,
-                                                &unresolved,
-                                                &only_utf8_locale_invlist,
-                                                &nonbitmap_invlist);
-
-            /* The aux data may contain stuff that could fit in the bitmap.
-             * This could come from a user-defined property being finally
-             * resolved when this call was done; or much more likely because
-             * there are matches that require UTF-8 to be valid, and so aren't
-             * in the bitmap (or ANYOFR).  This is teased apart later */
-            _invlist_intersection(nonbitmap_invlist,
-                                  PL_InBitmap,
-                                  &bitmap_range_not_in_bitmap);
-            /* Leave just the things that don't fit into the bitmap */
-            _invlist_subtract(nonbitmap_invlist,
-                              PL_InBitmap,
-                              &nonbitmap_invlist);
-        }
-
-        /* Ready to start outputting.  First, the initial left bracket */
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-
-        if (   bitmap
-            || bitmap_range_not_in_bitmap
-            || only_utf8_locale_invlist
-            || unresolved)
-        {
-            /* Then all the things that could fit in the bitmap */
-            do_sep = put_charclass_bitmap_innards(
-                                    sv,
-                                    bitmap,
-                                    bitmap_range_not_in_bitmap,
-                                    only_utf8_locale_invlist,
-                                    o,
-                                    flags,
-
-                                    /* Can't try inverting for a
-                                                   * better display if there
-                                                   * are things that haven't
-                                                   * been resolved */
-                                    (unresolved != NULL || k == ANYOFR));
-            SvREFCNT_dec(bitmap_range_not_in_bitmap);
-
-            /* If there are user-defined properties which haven't been defined
-             * yet, output them.  If the result is not to be inverted, it is
-             * clearest to output them in a separate [] from the bitmap range
-             * stuff.  If the result is to be complemented, we have to show
-             * everything in one [], as the inversion applies to the whole
-             * thing.  Use {braces} to separate them from anything in the
-             * bitmap and anything above the bitmap. */
-            if (unresolved) {
-                if (inverted) {
-                    if (! do_sep) { /* If didn't output anything in the bitmap
-                                     */
-                        sv_catpvs(sv, "^");
-                    }
-                    sv_catpvs(sv, "{");
-                }
-                else if (do_sep) {
-                    Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1],
-                                                      PL_colors[0]);
-                }
-                sv_catsv(sv, unresolved);
-                if (inverted) {
-                    sv_catpvs(sv, "}");
-                }
-                do_sep = ! inverted;
-            }
-            else if (     do_sep == 2
-                     && ! nonbitmap_invlist
-                     &&   ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(o))
-            {
-                /* Here, the display shows the class as inverted, and
-                 * everything above the lower display should also match, but
-                 * there is no indication of that.  Add this range so the code
-                 * below will add it to the display */
-                _invlist_union_complement_2nd(nonbitmap_invlist,
-                                              PL_InBitmap,
-                                              &nonbitmap_invlist);
-            }
-        }
-
-        /* And, finally, add the above-the-bitmap stuff */
-        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
-            SV* contents;
-
-            /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len > 256)
-                                    ? PL_dump_re_max_len
-                                    : 256;
-
-            /* This is output in a separate [] */
-            if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
-            }
-
-            /* And, for easy of understanding, it is shown in the
-             * uncomplemented form if possible.  The one exception being if
-             * there are unresolved items, where the inversion has to be
-             * delayed until runtime */
-            if (inverted && ! unresolved) {
-                _invlist_invert(nonbitmap_invlist);
-                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
-            }
-
-            contents = invlist_contents(nonbitmap_invlist,
-                                        FALSE /* output suitable for catsv */
-                                       );
-
-            /* If the output is shorter than the permissible maximum, just do it. */
-            if (SvCUR(contents) <= dump_len) {
-                sv_catsv(sv, contents);
-            }
-            else {
-                const char * contents_string = SvPVX(contents);
-                STRLEN i = dump_len;
-
-                /* Otherwise, start at the permissible max and work back to the
-                 * first break possibility */
-                while (i > 0 && contents_string[i] != ' ') {
-                    i--;
-                }
-                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
-                                       find a legal break */
-                    i = dump_len;
-                }
-
-                sv_catpvn(sv, contents_string, i);
-                sv_catpvs(sv, "...");
-            }
-
-            SvREFCNT_dec_NN(contents);
-            SvREFCNT_dec_NN(nonbitmap_invlist);
-        }
-
-        /* And finally the matching, closing ']' */
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        if (op == ANYOFHs) {
-            Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1));
-        }
-        else if (REGNODE_TYPE(op) != ANYOF) {
-            U8 lowest = (op != ANYOFHr)
-                         ? FLAGS(o)
-                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
-            U8 highest = (op == ANYOFHr)
-                         ? HIGHEST_ANYOF_HRx_BYTE(FLAGS(o))
-                         : (op == ANYOFH || op == ANYOFR)
-                           ? 0xFF
-                           : lowest;
-#ifndef EBCDIC
-            if (op != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
-#endif
-            {
-                Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
-                if (lowest != highest) {
-                    Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
-                }
-                Perl_sv_catpvf(aTHX_ sv, ")");
-            }
-        }
-
-        SvREFCNT_dec(unresolved);
-    }
-    else if (k == ANYOFM) {
-        SV * cp_list = get_ANYOFM_contents(o);
-
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-        if (op == NANYOFM) {
-            _invlist_invert(cp_list);
-        }
-
-        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE);
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        SvREFCNT_dec(cp_list);
-    }
-    else if (k == ANYOFHbbm) {
-        SV * cp_list = get_ANYOFHbbm_contents(o);
-        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-
-        sv_catsv(sv, invlist_contents(cp_list,
-                                      FALSE /* output suitable for catsv */
-                                     ));
-        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
-
-        SvREFCNT_dec(cp_list);
-    }
-    else if (k == POSIXD || k == NPOSIXD) {
-        U8 index = FLAGS(o) * 2;
-        if (index < C_ARRAY_LENGTH(anyofs)) {
-            if (*anyofs[index] != '[')  {
-                sv_catpvs(sv, "[");
-            }
-            sv_catpv(sv, anyofs[index]);
-            if (*anyofs[index] != '[')  {
-                sv_catpvs(sv, "]");
-            }
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
-        }
-    }
-    else if (k == BOUND || k == NBOUND) {
-        /* Must be synced with order of 'bound_type' in regcomp.h */
-        const char * const bounds[] = {
-            "",      /* Traditional */
-            "{gcb}",
-            "{lb}",
-            "{sb}",
-            "{wb}"
-        };
-        assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
-        sv_catpv(sv, bounds[FLAGS(o)]);
-    }
-    else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
-        Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
-        if (o->next_off) {
-            Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
-        }
-        Perl_sv_catpvf(aTHX_ sv, "]");
-    }
-    else if (op == SBOL)
-        Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
-
-    /* add on the verb argument if there is one */
-    if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) {
-        if ( ARG(o) )
-            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
-                       SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
-        else
-            sv_catpvs(sv, ":NULL");
-    }
-#else
-    PERL_UNUSED_CONTEXT;
-    PERL_UNUSED_ARG(sv);
-    PERL_UNUSED_ARG(o);
-    PERL_UNUSED_ARG(prog);
-    PERL_UNUSED_ARG(reginfo);
-    PERL_UNUSED_ARG(pRExC_state);
-#endif  /* DEBUGGING */
-}
-
 
 
 SV *
 Perl_re_intuit_string(pTHX_ REGEXP * const r)
-{                               /* Assume that RE_INTUIT is set */
+{				/* Assume that RE_INTUIT is set */
     /* Returns an SV containing a string that must appear in the target for it
      * to match, or NULL if nothing is known that must match.
      *
@@ -22686,7 +13185,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                 }
                 break;
             case '%':
-                /* NO-OP a '%' data contains a null pointer, so that add_data
+                /* NO-OP a '%' data contains a null pointer, so that reg_add_data
                  * always returns non-zero, this should only ever happen in the
                  * 0 index */
                 assert(n==0);
@@ -22703,9 +13202,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     Safefree(ri);
 }
 
-#define av_dup_inc(s, t)        MUTABLE_AV(sv_dup_inc((const SV *)s, t))
-#define hv_dup_inc(s, t)        MUTABLE_HV(sv_dup_inc((const SV *)s, t))
-#define SAVEPVN(p, n)   ((p) ? savepvn(p, n) : NULL)
+#define av_dup_inc(s, t)	MUTABLE_AV(sv_dup_inc((const SV *)s, t))
+#define hv_dup_inc(s, t)	MUTABLE_HV(sv_dup_inc((const SV *)s, t))
+#define SAVEPVN(p, n)	((p) ? savepvn(p, n) : NULL)
 
 /*
 =for apidoc re_dup_guts
@@ -22921,7 +13420,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                 break;
             case '%':
                 /* this is a placeholder type, it exists purely so that
-                 * add_data always returns a non-zero value, this type of
+                 * reg_add_data always returns a non-zero value, this type of
                  * entry should ONLY be present in the 0 slot of the array */
                 assert(i == 0);
                 d->data[i]= ri->data->data[i];
@@ -23014,778 +13513,6 @@ Perl_save_re_context(pTHX)
 }
 #endif
 
-#ifdef DEBUGGING
-
-STATIC void
-S_put_code_point(pTHX_ SV *sv, UV c)
-{
-    PERL_ARGS_ASSERT_PUT_CODE_POINT;
-
-    if (c > 255) {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
-    }
-    else if (isPRINT(c)) {
-        const char string = (char) c;
-
-        /* We use {phrase} as metanotation in the class, so also escape literal
-         * braces */
-        if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
-            sv_catpvs(sv, "\\");
-        sv_catpvn(sv, &string, 1);
-    }
-    else if (isMNEMONIC_CNTRL(c)) {
-        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
-    }
-    else {
-        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
-    }
-}
-
-STATIC void
-S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
-{
-    /* Appends to 'sv' a displayable version of the range of code points from
-     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
-     * that have them, when they occur at the beginning or end of the range.
-     * It uses hex to output the remaining code points, unless 'allow_literals'
-     * is true, in which case the printable ASCII ones are output as-is (though
-     * some of these will be escaped by put_code_point()).
-     *
-     * NOTE:  This is designed only for printing ranges of code points that fit
-     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
-     */
-
-    const unsigned int min_range_count = 3;
-
-    assert(start <= end);
-
-    PERL_ARGS_ASSERT_PUT_RANGE;
-
-    while (start <= end) {
-        UV this_end;
-        const char * format;
-
-        if (    end - start < min_range_count
-            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
-        {
-            /* Output a range of 1 or 2 chars individually, or longer ranges
-             * when printable */
-            for (; start <= end; start++) {
-                put_code_point(sv, start);
-            }
-            break;
-        }
-
-        /* If permitted by the input options, and there is a possibility that
-         * this range contains a printable literal, look to see if there is
-         * one. */
-        if (allow_literals && start <= MAX_PRINT_A) {
-
-            /* If the character at the beginning of the range isn't an ASCII
-             * printable, effectively split the range into two parts:
-             *  1) the portion before the first such printable,
-             *  2) the rest
-             * and output them separately. */
-            if (! isPRINT_A(start)) {
-                UV temp_end = start + 1;
-
-                /* There is no point looking beyond the final possible
-                 * printable, in MAX_PRINT_A */
-                UV max = MIN(end, MAX_PRINT_A);
-
-                while (temp_end <= max && ! isPRINT_A(temp_end)) {
-                    temp_end++;
-                }
-
-                /* Here, temp_end points to one beyond the first printable if
-                 * found, or to one beyond 'max' if not.  If none found, make
-                 * sure that we use the entire range */
-                if (temp_end > MAX_PRINT_A) {
-                    temp_end = end + 1;
-                }
-
-                /* Output the first part of the split range: the part that
-                 * doesn't have printables, with the parameter set to not look
-                 * for literals (otherwise we would infinitely recurse) */
-                put_range(sv, start, temp_end - 1, FALSE);
-
-                /* The 2nd part of the range (if any) starts here. */
-                start = temp_end;
-
-                /* We do a continue, instead of dropping down, because even if
-                 * the 2nd part is non-empty, it could be so short that we want
-                 * to output it as individual characters, as tested for at the
-                 * top of this loop.  */
-                continue;
-            }
-
-            /* Here, 'start' is a printable ASCII.  If it is an alphanumeric,
-             * output a sub-range of just the digits or letters, then process
-             * the remaining portion as usual. */
-            if (isALPHANUMERIC_A(start)) {
-                UV mask = (isDIGIT_A(start))
-                           ? CC_DIGIT_
-                             : isUPPER_A(start)
-                               ? CC_UPPER_
-                               : CC_LOWER_;
-                UV temp_end = start + 1;
-
-                /* Find the end of the sub-range that includes just the
-                 * characters in the same class as the first character in it */
-                while (temp_end <= end && generic_isCC_A_(temp_end, mask)) {
-                    temp_end++;
-                }
-                temp_end--;
-
-                /* For short ranges, don't duplicate the code above to output
-                 * them; just call recursively */
-                if (temp_end - start < min_range_count) {
-                    put_range(sv, start, temp_end, FALSE);
-                }
-                else {  /* Output as a range */
-                    put_code_point(sv, start);
-                    sv_catpvs(sv, "-");
-                    put_code_point(sv, temp_end);
-                }
-                start = temp_end + 1;
-                continue;
-            }
-
-            /* We output any other printables as individual characters */
-            if (isPUNCT_A(start) || isSPACE_A(start)) {
-                while (start <= end && (isPUNCT_A(start)
-                                        || isSPACE_A(start)))
-                {
-                    put_code_point(sv, start);
-                    start++;
-                }
-                continue;
-            }
-        } /* End of looking for literals */
-
-        /* Here is not to output as a literal.  Some control characters have
-         * mnemonic names.  Split off any of those at the beginning and end of
-         * the range to print mnemonically.  It isn't possible for many of
-         * these to be in a row, so this won't overwhelm with output */
-        if (   start <= end
-            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
-        {
-            while (isMNEMONIC_CNTRL(start) && start <= end) {
-                put_code_point(sv, start);
-                start++;
-            }
-
-            /* If this didn't take care of the whole range ... */
-            if (start <= end) {
-
-                /* Look backwards from the end to find the final non-mnemonic
-                 * */
-                UV temp_end = end;
-                while (isMNEMONIC_CNTRL(temp_end)) {
-                    temp_end--;
-                }
-
-                /* And separately output the interior range that doesn't start
-                 * or end with mnemonics */
-                put_range(sv, start, temp_end, FALSE);
-
-                /* Then output the mnemonic trailing controls */
-                start = temp_end + 1;
-                while (start <= end) {
-                    put_code_point(sv, start);
-                    start++;
-                }
-                break;
-            }
-        }
-
-        /* As a final resort, output the range or subrange as hex. */
-
-        if (start >= NUM_ANYOF_CODE_POINTS) {
-            this_end = end;
-        }
-        else {  /* Have to split range at the bitmap boundary */
-            this_end = (end < NUM_ANYOF_CODE_POINTS)
-                        ? end
-                        : NUM_ANYOF_CODE_POINTS - 1;
-        }
-#if NUM_ANYOF_CODE_POINTS > 256
-        format = (this_end < 256)
-                 ? "\\x%02" UVXf "-\\x%02" UVXf
-                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
-#else
-        format = "\\x%02" UVXf "-\\x%02" UVXf;
-#endif
-        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
-        Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE_STMT;
-        break;
-    }
-}
-
-STATIC void
-S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
-{
-    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
-     * 'invlist' */
-
-    UV start, end;
-    bool allow_literals = TRUE;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;
-
-    /* Generally, it is more readable if printable characters are output as
-     * literals, but if a range (nearly) spans all of them, it's best to output
-     * it as a single range.  This code will use a single range if all but 2
-     * ASCII printables are in it */
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-
-        /* If the range starts beyond the final printable, it doesn't have any
-         * in it */
-        if (start > MAX_PRINT_A) {
-            break;
-        }
-
-        /* In both ASCII and EBCDIC, a SPACE is the lowest printable.  To span
-         * all but two, the range must start and end no later than 2 from
-         * either end */
-        if (start < ' ' + 2 && end > MAX_PRINT_A - 2) {
-            if (end > MAX_PRINT_A) {
-                end = MAX_PRINT_A;
-            }
-            if (start < ' ') {
-                start = ' ';
-            }
-            if (end - start >= MAX_PRINT_A - ' ' - 2) {
-                allow_literals = FALSE;
-            }
-            break;
-        }
-    }
-    invlist_iterfinish(invlist);
-
-    /* Here we have figured things out.  Output each range */
-    invlist_iterinit(invlist);
-    while (invlist_iternext(invlist, &start, &end)) {
-        if (start >= NUM_ANYOF_CODE_POINTS) {
-            break;
-        }
-        put_range(sv, start, end, allow_literals);
-    }
-    invlist_iterfinish(invlist);
-
-    return;
-}
-
-STATIC SV*
-S_put_charclass_bitmap_innards_common(pTHX_
-        SV* invlist,            /* The bitmap */
-        SV* posixes,            /* Under /l, things like [:word:], \S */
-        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
-        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
-        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
-        const bool invert       /* Is the result to be inverted? */
-)
-{
-    /* Create and return an SV containing a displayable version of the bitmap
-     * and associated information determined by the input parameters.  If the
-     * output would have been only the inversion indicator '^', NULL is instead
-     * returned. */
-
-    SV * output;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
-
-    if (invert) {
-        output = newSVpvs("^");
-    }
-    else {
-        output = newSVpvs("");
-    }
-
-    /* First, the code points in the bitmap that are unconditionally there */
-    put_charclass_bitmap_innards_invlist(output, invlist);
-
-    /* Traditionally, these have been placed after the main code points */
-    if (posixes) {
-        sv_catsv(output, posixes);
-    }
-
-    if (only_utf8 && _invlist_len(only_utf8)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, only_utf8);
-    }
-
-    if (not_utf8 && _invlist_len(not_utf8)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, not_utf8);
-    }
-
-    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
-        Perl_sv_catpvf(aTHX_ output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
-        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);
-
-        /* This is the only list in this routine that can legally contain code
-         * points outside the bitmap range.  The call just above to
-         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
-         * output them here.  There's about a half-dozen possible, and none in
-         * contiguous ranges longer than 2 */
-        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
-            UV start, end;
-            SV* above_bitmap = NULL;
-
-            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);
-
-            invlist_iterinit(above_bitmap);
-            while (invlist_iternext(above_bitmap, &start, &end)) {
-                UV i;
-
-                for (i = start; i <= end; i++) {
-                    put_code_point(output, i);
-                }
-            }
-            invlist_iterfinish(above_bitmap);
-            SvREFCNT_dec_NN(above_bitmap);
-        }
-    }
-
-    if (invert && SvCUR(output) == 1) {
-        return NULL;
-    }
-
-    return output;
-}
-
-STATIC U8
-S_put_charclass_bitmap_innards(pTHX_ SV *sv,
-                                     char *bitmap,
-                                     SV *nonbitmap_invlist,
-                                     SV *only_utf8_locale_invlist,
-                                     const regnode * const node,
-                                     const U8 flags,
-                                     const bool force_as_is_display)
-{
-    /* Appends to 'sv' a displayable version of the innards of the bracketed
-     * character class defined by the other arguments:
-     *  'bitmap' points to the bitmap, or NULL if to ignore that.
-     *  'nonbitmap_invlist' is an inversion list of the code points that are in
-     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
-     *      none.  The reasons for this could be that they require some
-     *      condition such as the target string being or not being in UTF-8
-     *      (under /d), or because they came from a user-defined property that
-     *      was not resolved at the time of the regex compilation (under /u)
-     *  'only_utf8_locale_invlist' is an inversion list of the code points that
-     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
-     *  'node' is the regex pattern ANYOF node.  It is needed only when the
-     *      above two parameters are not null, and is passed so that this
-     *      routine can tease apart the various reasons for them.
-     *  'flags' is the flags field of 'node'
-     *  'force_as_is_display' is TRUE if this routine should definitely NOT try
-     *      to invert things to see if that leads to a cleaner display.  If
-     *      FALSE, this routine is free to use its judgment about doing this.
-     *
-     * It returns 0 if nothing was actually output.  (It may be that
-     *              the bitmap, etc is empty.)
-     *            1 if the output wasn't inverted (didn't begin with a '^')
-     *            2 if the output was inverted (did begin with a '^')
-     *
-     * When called for outputting the bitmap of a non-ANYOF node, just pass the
-     * bitmap, with the succeeding parameters set to NULL, and the final one to
-     * FALSE.
-     */
-
-    /* In general, it tries to display the 'cleanest' representation of the
-     * innards, choosing whether to display them inverted or not, regardless of
-     * whether the class itself is to be inverted.  However,  there are some
-     * cases where it can't try inverting, as what actually matches isn't known
-     * until runtime, and hence the inversion isn't either. */
-
-    bool inverting_allowed = ! force_as_is_display;
-
-    int i;
-    STRLEN orig_sv_cur = SvCUR(sv);
-
-    SV* invlist;            /* Inversion list we accumulate of code points that
-                               are unconditionally matched */
-    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
-                               UTF-8 */
-    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
-                             */
-    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
-    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
-                                       is UTF-8 */
-
-    SV* as_is_display;      /* The output string when we take the inputs
-                               literally */
-    SV* inverted_display;   /* The output string when we invert the inputs */
-
-    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
-                                                   to match? */
-    /* We are biased in favor of displaying things without them being inverted,
-     * as that is generally easier to understand */
-    const int bias = 5;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
-
-    /* Start off with whatever code points are passed in.  (We clone, so we
-     * don't change the caller's list) */
-    if (nonbitmap_invlist) {
-        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
-        invlist = invlist_clone(nonbitmap_invlist, NULL);
-    }
-    else {  /* Worst case size is every other code point is matched */
-        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
-    }
-
-    if (flags) {
-        if (OP(node) == ANYOFD) {
-
-            /* This flag indicates that the code points below 0x100 in the
-             * nonbitmap list are precisely the ones that match only when the
-             * target is UTF-8 (they should all be non-ASCII). */
-            if (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES) {
-                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
-                _invlist_subtract(invlist, only_utf8, &invlist);
-            }
-
-            /* And this flag for matching all non-ASCII 0xFF and below */
-            if (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared) {
-                not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
-            }
-        }
-        else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
-
-            /* If either of these flags are set, what matches isn't
-             * determinable except during execution, so don't know enough here
-             * to invert */
-            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
-                inverting_allowed = FALSE;
-            }
-
-            /* What the posix classes match also varies at runtime, so these
-             * will be output symbolically. */
-            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
-                int i;
-
-                posixes = newSVpvs("");
-                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
-                    if (ANYOF_POSIXL_TEST(node, i)) {
-                        sv_catpv(posixes, anyofs[i]);
-                    }
-                }
-            }
-        }
-    }
-
-    /* Accumulate the bit map into the unconditional match list */
-    if (bitmap) {
-        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-            if (BITMAP_TEST(bitmap, i)) {
-                int start = i++;
-                for (;
-                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
-                     i++)
-                { /* empty */ }
-                invlist = _add_range_to_invlist(invlist, start, i-1);
-            }
-        }
-    }
-
-    /* Make sure that the conditional match lists don't have anything in them
-     * that match unconditionally; otherwise the output is quite confusing.
-     * This could happen if the code that populates these misses some
-     * duplication. */
-    if (only_utf8) {
-        _invlist_subtract(only_utf8, invlist, &only_utf8);
-    }
-    if (not_utf8) {
-        _invlist_subtract(not_utf8, invlist, &not_utf8);
-    }
-
-    if (only_utf8_locale_invlist) {
-
-        /* Since this list is passed in, we have to make a copy before
-         * modifying it */
-        only_utf8_locale = invlist_clone(only_utf8_locale_invlist, NULL);
-
-        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
-
-        /* And, it can get really weird for us to try outputting an inverted
-         * form of this list when it has things above the bitmap, so don't even
-         * try */
-        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
-            inverting_allowed = FALSE;
-        }
-    }
-
-    /* Calculate what the output would be if we take the input as-is */
-    as_is_display = put_charclass_bitmap_innards_common(invlist,
-                                                    posixes,
-                                                    only_utf8,
-                                                    not_utf8,
-                                                    only_utf8_locale,
-                                                    invert);
-
-    /* If have to take the output as-is, just do that */
-    if (! inverting_allowed) {
-        if (as_is_display) {
-            sv_catsv(sv, as_is_display);
-            SvREFCNT_dec_NN(as_is_display);
-        }
-    }
-    else { /* But otherwise, create the output again on the inverted input, and
-              use whichever version is shorter */
-
-        int inverted_bias, as_is_bias;
-
-        /* We will apply our bias to whichever of the results doesn't have
-         * the '^' */
-        bool trial_invert;
-        if (invert) {
-            trial_invert = FALSE;
-            as_is_bias = bias;
-            inverted_bias = 0;
-        }
-        else {
-            trial_invert = TRUE;
-            as_is_bias = 0;
-            inverted_bias = bias;
-        }
-
-        /* Now invert each of the lists that contribute to the output,
-         * excluding from the result things outside the possible range */
-
-        /* For the unconditional inversion list, we have to add in all the
-         * conditional code points, so that when inverted, they will be gone
-         * from it */
-        _invlist_union(only_utf8, invlist, &invlist);
-        _invlist_union(not_utf8, invlist, &invlist);
-        _invlist_union(only_utf8_locale, invlist, &invlist);
-        _invlist_invert(invlist);
-        _invlist_intersection(invlist, PL_InBitmap, &invlist);
-
-        if (only_utf8) {
-            _invlist_invert(only_utf8);
-            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
-        }
-        else if (not_utf8) {
-
-            /* If a code point matches iff the target string is not in UTF-8,
-             * then complementing the result has it not match iff not in UTF-8,
-             * which is the same thing as matching iff it is UTF-8. */
-            only_utf8 = not_utf8;
-            not_utf8 = NULL;
-        }
-
-        if (only_utf8_locale) {
-            _invlist_invert(only_utf8_locale);
-            _invlist_intersection(only_utf8_locale,
-                                  PL_InBitmap,
-                                  &only_utf8_locale);
-        }
-
-        inverted_display = put_charclass_bitmap_innards_common(
-                                            invlist,
-                                            posixes,
-                                            only_utf8,
-                                            not_utf8,
-                                            only_utf8_locale, trial_invert);
-
-        /* Use the shortest representation, taking into account our bias
-         * against showing it inverted */
-        if (   inverted_display
-            && (   ! as_is_display
-                || (  SvCUR(inverted_display) + inverted_bias
-                    < SvCUR(as_is_display)    + as_is_bias)))
-        {
-            sv_catsv(sv, inverted_display);
-            invert = ! invert;
-        }
-        else if (as_is_display) {
-            sv_catsv(sv, as_is_display);
-        }
-
-        SvREFCNT_dec(as_is_display);
-        SvREFCNT_dec(inverted_display);
-    }
-
-    SvREFCNT_dec_NN(invlist);
-    SvREFCNT_dec(only_utf8);
-    SvREFCNT_dec(not_utf8);
-    SvREFCNT_dec(posixes);
-    SvREFCNT_dec(only_utf8_locale);
-
-    U8 did_output_something = (bool) (SvCUR(sv) > orig_sv_cur);
-    if (did_output_something) {
-        /* Distinguish between non and inverted cases */
-        did_output_something += invert;
-    }
-
-    return did_output_something;
-}
-
-#define CLEAR_OPTSTART                                                       \
-    if (optstart) STMT_START {                                               \
-        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                           \
-                              " (%" IVdf " nodes)\n", (IV)(node - optstart))); \
-        optstart=NULL;                                                       \
-    } STMT_END
-
-#define DUMPUNTIL(b,e)                                                       \
-                    CLEAR_OPTSTART;                                          \
-                    node=dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
-
-STATIC const regnode *
-S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
-            const regnode *last, const regnode *plast,
-            SV* sv, I32 indent, U32 depth)
-{
-    const regnode *next;
-    const regnode *optstart= NULL;
-
-    RXi_GET_DECL(r, ri);
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
-    PERL_ARGS_ASSERT_DUMPUNTIL;
-
-#ifdef DEBUG_DUMPUNTIL
-    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n", indent, node-start,
-        last ? last-start : 0, plast ? plast-start : 0);
-#endif
-
-    if (plast && plast < last)
-        last= plast;
-
-    while (node && (!last || node < last)) {
-        const U8 op = OP(node);
-
-        if (op == CLOSE || op == SRCLOSE || op == WHILEM)
-            indent--;
-        next = regnext((regnode *)node);
-        const regnode *after = regnode_after((regnode *)node,0);
-
-        /* Where, what. */
-        if (op == OPTIMIZED) {
-            if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
-                optstart = node;
-            else
-                goto after_print;
-        } else
-            CLEAR_OPTSTART;
-
-        regprop(r, sv, node, NULL, NULL);
-        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
-                      (int)(2*indent + 1), "", SvPVX_const(sv));
-
-        if (op != OPTIMIZED) {
-            if (next == NULL)           /* Next ptr. */
-                Perl_re_printf( aTHX_  " (0)");
-            else if (REGNODE_TYPE(op) == BRANCH
-                     && REGNODE_TYPE(OP(next)) != BRANCH )
-                Perl_re_printf( aTHX_  " (FAIL)");
-            else
-                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
-            Perl_re_printf( aTHX_ "\n");
-        }
-
-      after_print:
-        if (REGNODE_TYPE(op) == BRANCHJ) {
-            assert(next);
-            const regnode *nnode = (OP(next) == LONGJMP
-                                   ? regnext((regnode *)next)
-                                   : next);
-            if (last && nnode > last)
-                nnode = last;
-            DUMPUNTIL(after, nnode);
-        }
-        else if (REGNODE_TYPE(op) == BRANCH) {
-            assert(next);
-            DUMPUNTIL(after, next);
-        }
-        else if ( REGNODE_TYPE(op)  == TRIE ) {
-            const regnode *this_trie = node;
-            const U32 n = ARG(node);
-            const reg_ac_data * const ac = op>=AHOCORASICK ?
-               (reg_ac_data *)ri->data->data[n] :
-               NULL;
-            const reg_trie_data * const trie =
-                (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
-#ifdef DEBUGGING
-            AV *const trie_words
-                           = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
-#endif
-            const regnode *nextbranch= NULL;
-            I32 word_idx;
-            SvPVCLEAR(sv);
-            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
-                SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);
-
-                Perl_re_indentf( aTHX_  "%s ",
-                    indent+3,
-                    elem_ptr
-                    ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), PL_dump_re_max_len,
-                                PL_colors[0], PL_colors[1],
-                                (SvUTF8(*elem_ptr)
-                                 ? PERL_PV_ESCAPE_UNI
-                                 : 0)
-                                | PERL_PV_PRETTY_ELLIPSES
-                                | PERL_PV_PRETTY_LTGT
-                            )
-                    : "???"
-                );
-                if (trie->jump) {
-                    U16 dist= trie->jump[word_idx+1];
-                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
-                               (UV)((dist ? this_trie + dist : next) - start));
-                    if (dist) {
-                        if (!nextbranch)
-                            nextbranch= this_trie + trie->jump[0];
-                        DUMPUNTIL(this_trie + dist, nextbranch);
-                    }
-                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
-                        nextbranch= regnext((regnode *)nextbranch);
-                } else {
-                    Perl_re_printf( aTHX_  "\n");
-                }
-            }
-            if (last && next > last)
-                node= last;
-            else
-                node= next;
-        }
-        else if ( op == CURLY ) {   /* "next" might be very big: optimizer */
-            DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
-        }
-        else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
-            assert(next);
-            DUMPUNTIL(after, next);
-        }
-        else if ( op == PLUS || op == STAR) {
-            DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
-        }
-        else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
-            /* Literal string, where present. */
-            node = (const regnode *)REGNODE_AFTER_varies(node);
-        }
-        else {
-            node = REGNODE_AFTER_opcode(node,op);
-        }
-        if (op == CURLYX || op == OPEN || op == SROPEN)
-            indent++;
-        if (REGNODE_TYPE(op) == END)
-            break;
-    }
-    CLEAR_OPTSTART;
-#ifdef DEBUG_DUMPUNTIL
-    Perl_re_printf( aTHX_  "--- %d\n", (int)indent);
-#endif
-    return node;
-}
-
-#endif  /* DEBUGGING */
-
 #ifndef PERL_IN_XSUB_RE
 
 #  include "uni_keywords.h"
@@ -23939,7 +13666,7 @@ warning when none was present before might cause breakage, for little gain.  So
 khw left this code in, but not enabled.  Tests were never added.
 
 embed.fnc entry:
-Ei      |const char *|get_extended_utf8_msg|const UV cp
+Ei	|const char *|get_extended_utf8_msg|const UV cp
 
 PERL_STATIC_INLINE const char *
 S_get_extended_utf8_msg(pTHX_ const UV cp)
diff --git a/regcomp.h b/regcomp.h
index 5829da90dd..306c732b75 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -64,7 +64,7 @@
 typedef struct regexp_internal {
         regnode *regstclass;    /* Optional startclass as identified or constructed
                                    by the optimiser */
-        struct reg_data *data;  /* Additional miscellaneous data used by the program.
+        struct reg_data *data;	/* Additional miscellaneous data used by the program.
                                    Used to make it easier to clone and free arbitrary
                                    data that the regops need. Often the ARG field of
                                    a regop is an index into this structure. NOTE the
@@ -76,10 +76,10 @@ typedef struct regexp_internal {
                                    only valid when RXp_PAREN_NAMES(prog) is true,
                                    0 means "no value" like any other index into the
                                    data array.*/
-        regnode program[1];     /* Unwarranted chumminess with compiler. */
+        regnode program[1];	/* Unwarranted chumminess with compiler. */
 } regexp_internal;
 
-#define RXi_SET(x,y) (x)->pprivate = (void*)(y)
+#define RXi_SET(x,y) (x)->pprivate = (void*)(y)   
 #define RXi_GET(x)   ((regexp_internal *)((x)->pprivate))
 #define RXi_GET_DECL(r,ri) regexp_internal *ri = RXi_GET(r)
 #define RXi_GET_DECL_NULL(r,ri) regexp_internal *ri = (r) ? RXi_GET(r) : NULL
@@ -92,12 +92,12 @@ typedef struct regexp_internal {
 #define RXp_INTFLAGS(rx)        ((rx)->intflags)
 #define RX_INTFLAGS(prog)        RXp_INTFLAGS(ReANY(prog))
 
-#define PREGf_SKIP              0x00000001
-#define PREGf_IMPLICIT          0x00000002 /* Converted .* to ^.* */
-#define PREGf_NAUGHTY           0x00000004 /* how exponential is this pattern? */
-#define PREGf_VERBARG_SEEN      0x00000008
-#define PREGf_CUTGROUP_SEEN     0x00000010
-#define PREGf_USE_RE_EVAL       0x00000020 /* compiled with "use re 'eval'" */
+#define PREGf_SKIP		0x00000001
+#define PREGf_IMPLICIT		0x00000002 /* Converted .* to ^.* */
+#define PREGf_NAUGHTY		0x00000004 /* how exponential is this pattern? */
+#define PREGf_VERBARG_SEEN	0x00000008
+#define PREGf_CUTGROUP_SEEN	0x00000010
+#define PREGf_USE_RE_EVAL	0x00000020 /* compiled with "use re 'eval'" */
 /* these used to be extflags, but are now intflags */
 #define PREGf_NOSCAN            0x00000040
                                 /* spare */
@@ -154,14 +154,14 @@ typedef struct regexp_internal {
  */
 
 struct regnode_string {
-    U8  str_len;
+    U8	str_len;
     U8  type;
     U16 next_off;
     char string[1];
 };
 
 struct regnode_lstring { /* Constructed this way to keep the string aligned. */
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     U32 str_len;    /* Only 18 bits allowed before would overflow 'next_off' */
@@ -169,17 +169,17 @@ struct regnode_lstring { /* Constructed this way to keep the string aligned. */
 };
 
 struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */
-    U8  str_len;
+    U8	str_len;
     U8  type;
     U16 next_off;
     U32 arg1;                           /* set by set_ANYOF_arg() */
     char string[1];
 };
 
-/* Argument bearing node - workhorse,
+/* Argument bearing node - workhorse, 
    arg1 is often for the data field */
 struct regnode_1 {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     U32 arg1;
@@ -201,7 +201,7 @@ struct regnode_1 {
  * then use inline functions to copy the data in or out.
  * */
 struct regnode_p {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     char arg1_sv_ptr_bytes[sizeof(SV *)];
@@ -209,7 +209,7 @@ struct regnode_p {
 
 /* Similar to a regnode_1 but with an extra signed argument */
 struct regnode_2L {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     U32 arg1;
@@ -218,7 +218,7 @@ struct regnode_2L {
 
 /* 'Two field' -- Two 16 bit unsigned args */
 struct regnode_2 {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     U16 arg1;
@@ -233,13 +233,13 @@ struct regnode_2 {
  * The array is a bitmap capable of representing any possible continuation
  * byte. */
 struct regnode_bbm {
-    U8  first_byte;
+    U8	first_byte;
     U8  type;
     U16 next_off;
     U8 bitmap[REGNODE_BBM_BITMAP_LEN];
 };
 
-#define ANYOF_BITMAP_SIZE       (NUM_ANYOF_CODE_POINTS / CHARBITS)
+#define ANYOF_BITMAP_SIZE	(NUM_ANYOF_CODE_POINTS / CHARBITS)
 
 /* Note that these form structs which are supersets of the next smaller one, by
  * appending fields.  Alignment problems can occur if one of those optional
@@ -254,21 +254,21 @@ struct regnode_bbm {
 
 /* also used by trie */
 struct regnode_charclass {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
     U32 arg1;                           /* set by set_ANYOF_arg() */
-    char bitmap[ANYOF_BITMAP_SIZE];     /* only compile-time */
+    char bitmap[ANYOF_BITMAP_SIZE];	/* only compile-time */
 };
 
 /* has runtime (locale) \d, \w, ..., [:posix:] classes */
 struct regnode_charclass_posixl {
-    U8  flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
+    U8	flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
     U8  type;
     U16 next_off;
     U32 arg1;
-    char bitmap[ANYOF_BITMAP_SIZE];             /* both compile-time ... */
-    U32 classflags;                             /* and run-time */
+    char bitmap[ANYOF_BITMAP_SIZE];		/* both compile-time ... */
+    U32 classflags;	                        /* and run-time */
 };
 
 /* A synthetic start class (SSC); is a regnode_charclass_posixl_fold, plus an
@@ -285,12 +285,12 @@ struct regnode_charclass_posixl {
  * never a next node.
  */
 struct regnode_ssc {
-    U8  flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
+    U8	flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
     U8  type;
     U16 next_off;
     U32 arg1;
-    char bitmap[ANYOF_BITMAP_SIZE];     /* both compile-time ... */
-    U32 classflags;                     /* ... and run-time */
+    char bitmap[ANYOF_BITMAP_SIZE];	/* both compile-time ... */
+    U32 classflags;	                /* ... and run-time */
 
     /* Auxiliary, only used during construction; NULL afterwards: list of code
      * points matched */
@@ -376,15 +376,15 @@ struct regnode_ssc {
 #undef OPERAND
 #undef STRING
 
-#define OP(p)           ((p)->type)
-#define FLAGS(p)        ((p)->flags)    /* Caution: Doesn't apply to all      \
+#define	OP(p)		((p)->type)
+#define FLAGS(p)	((p)->flags)	/* Caution: Doesn't apply to all      \
                                            regnode types.  For some, it's the \
                                            character set of the regnode */
-#define STR_LENs(p)     (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8)  \
+#define	STR_LENs(p)	(__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8)  \
                                     ((struct regnode_string *)p)->str_len)
-#define STRINGs(p)      (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8)  \
+#define	STRINGs(p)	(__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8)  \
                                     ((struct regnode_string *)p)->string)
-#define OPERANDs(p)     STRINGs(p)
+#define	OPERANDs(p)	STRINGs(p)
 
 #define PARNO(p)        ARG(p)          /* APPLIES for OPEN and CLOSE only */
 
@@ -400,21 +400,21 @@ struct regnode_ssc {
  * node to be an ARG2L, using the second 32 bit field for the length, and not
  * using the flags nor next_off fields at all.  One could have an llstring node
  * and even an lllstring type. */
-#define STR_LENl(p)     (__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_REQ8)  \
+#define	STR_LENl(p)	(__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_REQ8)  \
                                     (((struct regnode_lstring *)p)->str_len))
-#define STRINGl(p)      (__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_REQ8)  \
+#define	STRINGl(p)	(__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_REQ8)  \
                                     (((struct regnode_lstring *)p)->string))
-#define OPERANDl(p)     STRINGl(p)
+#define	OPERANDl(p)	STRINGl(p)
 
-#define STR_LEN(p)      ((OP(p) == LEXACT || OP(p) == LEXACT_REQ8)           \
+#define	STR_LEN(p)	((OP(p) == LEXACT || OP(p) == LEXACT_REQ8)           \
                                                ? STR_LENl(p) : STR_LENs(p))
-#define STRING(p)       ((OP(p) == LEXACT || OP(p) == LEXACT_REQ8)           \
+#define	STRING(p)	((OP(p) == LEXACT || OP(p) == LEXACT_REQ8)           \
                                                ? STRINGl(p)  : STRINGs(p))
-#define OPERAND(p)      STRING(p)
+#define	OPERAND(p)	STRING(p)
 
 /* The number of (smallest) regnode equivalents that a string of length l bytes
  * occupies - Used by the REGNODE_AFTER() macros and functions. */
-#define STR_SZ(l)       (((l) + sizeof(regnode) - 1) / sizeof(regnode))
+#define STR_SZ(l)	(((l) + sizeof(regnode) - 1) / sizeof(regnode))
 
 #define setSTR_LEN(p,v)                                                     \
     STMT_START{                                                             \
@@ -431,18 +431,18 @@ struct regnode_ssc {
 #undef NODE_ALIGN
 #undef ARG_LOC
 
-#define NODE_ALIGN(node)
-#define ARG_LOC(p)      (((struct regnode_1 *)p)->arg1)
+#define	NODE_ALIGN(node)
+#define	ARG_LOC(p)	(((struct regnode_1 *)p)->arg1)
 #define ARGp_BYTES_LOC(p)  (((struct regnode_p *)p)->arg1_sv_ptr_bytes)
-#define ARG1_LOC(p)     (((struct regnode_2 *)p)->arg1)
-#define ARG2_LOC(p)     (((struct regnode_2 *)p)->arg2)
-#define ARG2L_LOC(p)    (((struct regnode_2L *)p)->arg2)
+#define	ARG1_LOC(p)	(((struct regnode_2 *)p)->arg1)
+#define	ARG2_LOC(p)	(((struct regnode_2 *)p)->arg2)
+#define ARG2L_LOC(p)	(((struct regnode_2L *)p)->arg2)
 
 
 /* These should no longer be used directly in most cases. Please use
  * the REGNODE_AFTER() macros instead. */
-#define NODE_STEP_REGNODE       1       /* sizeof(regnode)/sizeof(regnode) */
-#define EXTRA_STEP_2ARGS        EXTRA_SIZE(struct regnode_2)
+#define NODE_STEP_REGNODE	1	/* sizeof(regnode)/sizeof(regnode) */
+#define EXTRA_STEP_2ARGS	EXTRA_SIZE(struct regnode_2)
 
 /* Core macros for computing "the regnode after this one". See also
  * Perl_regnode_after() in reginline.h
@@ -735,7 +735,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
 
 /* If this is set, the result of the match should be complemented.  regexec.c
  * is expecting this to be in the low bit.  Never in an SSC */
-#define ANYOF_INVERT                            0x01
+#define ANYOF_INVERT		                0x01
 
 /* For the SSC node only, which cannot be inverted, so is shared with that bit.
  * This is used only during regex compilation. */
@@ -799,7 +799,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
 #define ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared 0x80
 #define ANYOF_WARN_SUPER__shared                      0x80
 
-#define ANYOF_FLAGS_ALL         ((U8) ~(0x10|0x20))
+#define ANYOF_FLAGS_ALL		((U8) ~(0x10|0x20))
 
 #define ANYOF_LOCALE_FLAGS (  ANYOFL_FOLD               \
                             | ANYOF_MATCHES_POSIXL      \
@@ -868,44 +868,44 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
 #   error Problem with handy.h CC_foo_ #defines
 #endif
 
-#define ANYOF_HORIZWS   ((ANYOF_POSIXL_MAX)+2) /* = (ANYOF_NVERTWS + 1) */
-#define ANYOF_NHORIZWS  ((ANYOF_POSIXL_MAX)+3)
+#define ANYOF_HORIZWS	((ANYOF_POSIXL_MAX)+2) /* = (ANYOF_NVERTWS + 1) */
+#define ANYOF_NHORIZWS	((ANYOF_POSIXL_MAX)+3)
 
 #define ANYOF_UNIPROP   ((ANYOF_POSIXL_MAX)+4)  /* Used to indicate a Unicode
                                                    property: \p{} or \P{} */
 
 /* Backward source code compatibility. */
 
-#define ANYOF_ALNUML     ANYOF_ALNUM
-#define ANYOF_NALNUML    ANYOF_NALNUM
-#define ANYOF_SPACEL     ANYOF_SPACE
-#define ANYOF_NSPACEL    ANYOF_NSPACE
+#define ANYOF_ALNUML	 ANYOF_ALNUM
+#define ANYOF_NALNUML	 ANYOF_NALNUM
+#define ANYOF_SPACEL	 ANYOF_SPACE
+#define ANYOF_NSPACEL	 ANYOF_NSPACE
 #define ANYOF_ALNUM ANYOF_WORDCHAR
 #define ANYOF_NALNUM ANYOF_NWORDCHAR
 
 /* Utility macros for the bitmap and classes of ANYOF */
 
-#define BITMAP_BYTE(p, c)       (( (U8*) (p)) [ ( ( (UV) (c)) >> 3) ] )
-#define BITMAP_BIT(c)           (1U << ((c) & 7))
-#define BITMAP_TEST(p, c)       (BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c)))
+#define BITMAP_BYTE(p, c)	(( (U8*) (p)) [ ( ( (UV) (c)) >> 3) ] )
+#define BITMAP_BIT(c)	        (1U << ((c) & 7))
+#define BITMAP_TEST(p, c)	(BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c)))
 
-#define ANYOF_FLAGS(p)          ((p)->flags)
+#define ANYOF_FLAGS(p)		((p)->flags)
 
-#define ANYOF_BIT(c)            BITMAP_BIT(c)
+#define ANYOF_BIT(c)		BITMAP_BIT(c)
 
 #define ANYOF_POSIXL_BITMAP(p)  (((regnode_charclass_posixl*) (p))->classflags)
 
-#define POSIXL_SET(field, c)    ((field) |= (1U << (c)))
-#define ANYOF_POSIXL_SET(p, c)  POSIXL_SET(ANYOF_POSIXL_BITMAP(p), (c))
+#define POSIXL_SET(field, c)	((field) |= (1U << (c)))
+#define ANYOF_POSIXL_SET(p, c)	POSIXL_SET(ANYOF_POSIXL_BITMAP(p), (c))
 
 #define POSIXL_CLEAR(field, c) ((field) &= ~ (1U <<(c)))
 #define ANYOF_POSIXL_CLEAR(p, c) POSIXL_CLEAR(ANYOF_POSIXL_BITMAP(p), (c))
 
-#define POSIXL_TEST(field, c)   ((field) & (1U << (c)))
-#define ANYOF_POSIXL_TEST(p, c) POSIXL_TEST(ANYOF_POSIXL_BITMAP(p), (c))
+#define POSIXL_TEST(field, c)	((field) & (1U << (c)))
+#define ANYOF_POSIXL_TEST(p, c)	POSIXL_TEST(ANYOF_POSIXL_BITMAP(p), (c))
 
-#define POSIXL_ZERO(field)      STMT_START { (field) = 0; } STMT_END
-#define ANYOF_POSIXL_ZERO(ret)  POSIXL_ZERO(ANYOF_POSIXL_BITMAP(ret))
+#define POSIXL_ZERO(field)	STMT_START { (field) = 0; } STMT_END
+#define ANYOF_POSIXL_ZERO(ret)	POSIXL_ZERO(ANYOF_POSIXL_BITMAP(ret))
 
 #define ANYOF_POSIXL_SET_TO_BITMAP(p, bits)                                 \
                 STMT_START { ANYOF_POSIXL_BITMAP(p) = (bits); } STMT_END
@@ -938,25 +938,25 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
 
 #define ANYOF_POSIXL_AND(source, dest) STMT_START { (dest)->classflags &= (source)->classflags ; } STMT_END
 
-#define ANYOF_BITMAP_ZERO(ret)  Zero(((regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
-#define ANYOF_BITMAP(p)         ((regnode_charclass*)(p))->bitmap
-#define ANYOF_BITMAP_BYTE(p, c) BITMAP_BYTE(ANYOF_BITMAP(p), c)
-#define ANYOF_BITMAP_SET(p, c)  (ANYOF_BITMAP_BYTE(p, c) |=  ANYOF_BIT(c))
-#define ANYOF_BITMAP_CLEAR(p,c) (ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c))
-#define ANYOF_BITMAP_TEST(p, c) cBOOL(ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c))
+#define ANYOF_BITMAP_ZERO(ret)	Zero(((regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
+#define ANYOF_BITMAP(p)		((regnode_charclass*)(p))->bitmap
+#define ANYOF_BITMAP_BYTE(p, c)	BITMAP_BYTE(ANYOF_BITMAP(p), c)
+#define ANYOF_BITMAP_SET(p, c)	(ANYOF_BITMAP_BYTE(p, c) |=  ANYOF_BIT(c))
+#define ANYOF_BITMAP_CLEAR(p,c)	(ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c))
+#define ANYOF_BITMAP_TEST(p, c)	cBOOL(ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c))
 
-#define ANYOF_BITMAP_SETALL(p)          \
+#define ANYOF_BITMAP_SETALL(p)		\
         memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
-#define ANYOF_BITMAP_CLEARALL(p)        \
+#define ANYOF_BITMAP_CLEARALL(p)	\
         Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
 
 /*
  * Utility definitions.
  */
 #ifndef CHARMASK
-#  define UCHARAT(p)    ((int)*(const U8*)(p))
+#  define UCHARAT(p)	((int)*(const U8*)(p))
 #else
-#  define UCHARAT(p)    ((int)*(p)&CHARMASK)
+#  define UCHARAT(p)	((int)*(p)&CHARMASK)
 #endif
 
 /* Number of regnode equivalents that 'guy' occupies beyond the size of the
@@ -991,11 +991,11 @@ START_EXTERN_C
 #ifndef DOINIT
 EXTCONST regexp_engine PL_core_reg_engine;
 #else /* DOINIT */
-EXTCONST regexp_engine PL_core_reg_engine = {
+EXTCONST regexp_engine PL_core_reg_engine = { 
         Perl_re_compile,
         Perl_regexec_flags,
         Perl_re_intuit_start,
-        Perl_re_intuit_string,
+        Perl_re_intuit_string, 
         Perl_regfree_internal,
         Perl_reg_numbered_buff_fetch,
         Perl_reg_numbered_buff_store,
@@ -1003,9 +1003,9 @@ EXTCONST regexp_engine PL_core_reg_engine = {
         Perl_reg_named_buff,
         Perl_reg_named_buff_iter,
         Perl_reg_qr_package,
-#if defined(USE_ITHREADS)
+#if defined(USE_ITHREADS)        
         Perl_regdupe_internal,
-#endif
+#endif        
         Perl_re_op_compile
 };
 #endif /* DOINIT */
@@ -1058,10 +1058,10 @@ struct reg_data {
 #define check_offset_max substrs->data[2].max_offset
 #define check_end_shift substrs->data[2].end_shift
 
-#define RX_ANCHORED_SUBSTR(rx)  (ReANY(rx)->anchored_substr)
-#define RX_ANCHORED_UTF8(rx)    (ReANY(rx)->anchored_utf8)
-#define RX_FLOAT_SUBSTR(rx)     (ReANY(rx)->float_substr)
-#define RX_FLOAT_UTF8(rx)       (ReANY(rx)->float_utf8)
+#define RX_ANCHORED_SUBSTR(rx)	(ReANY(rx)->anchored_substr)
+#define RX_ANCHORED_UTF8(rx)	(ReANY(rx)->anchored_utf8)
+#define RX_FLOAT_SUBSTR(rx)	(ReANY(rx)->float_substr)
+#define RX_FLOAT_UTF8(rx)	(ReANY(rx)->float_utf8)
 
 /* trie related stuff */
 
@@ -1099,12 +1099,12 @@ struct _reg_trie_state {
 
 /* info per word; indexed by wordnum */
 typedef struct {
-    U16  prev;  /* previous word in acceptance chain; eg in
+    U16  prev;	/* previous word in acceptance chain; eg in
                  * zzz|abc|ab/ after matching the chars abc, the
                  * accepted word is #2, and the previous accepted
                  * word is #3 */
-    U32 len;    /* how many chars long is this word? */
-    U32 accept; /* accept state for this word */
+    U32 len;	/* how many chars long is this word? */
+    U32 accept;	/* accept state for this word */
 } reg_trie_wordinfo;
 
 
@@ -1123,7 +1123,7 @@ struct _reg_trie_data {
     reg_trie_state  *states;         /* state data */
     reg_trie_trans  *trans;          /* array of transition elements */
     char            *bitmap;         /* stclass bitmap */
-    U16             *jump;           /* optional 1 indexed array of offsets before tail
+    U16 	    *jump;           /* optional 1 indexed array of offsets before tail 
                                         for the node following a given word. */
     reg_trie_wordinfo *wordinfo;     /* array of info per word */
     U16             uniquecharcount; /* unique chars in trie (width of trans table) */
@@ -1131,7 +1131,7 @@ struct _reg_trie_data {
     STRLEN          minlen;          /* minimum length of words in trie - build/opt only? */
     STRLEN          maxlen;          /* maximum length of words in trie - build/opt only? */
     U32             prefixlen;       /* #chars in common prefix */
-    U32             statecount;      /* Build only - number of states in the states array
+    U32             statecount;      /* Build only - number of states in the states array 
                                         (including the unused zero state) */
     U32             wordcount;       /* Build only */
 #ifdef DEBUGGING
@@ -1166,11 +1166,11 @@ typedef struct _reg_ac_data reg_ac_data;
    This is simpler than refactoring all of it as wed end up with
    three different sets... */
 
-#define TRIE_BITMAP(p)          (((reg_trie_data *)(p))->bitmap)
-#define TRIE_BITMAP_BYTE(p, c)  BITMAP_BYTE(TRIE_BITMAP(p), c)
-#define TRIE_BITMAP_SET(p, c)   (TRIE_BITMAP_BYTE(p, c) |=  ANYOF_BIT((U8)c))
-#define TRIE_BITMAP_CLEAR(p,c)  (TRIE_BITMAP_BYTE(p, c) &= ~ANYOF_BIT((U8)c))
-#define TRIE_BITMAP_TEST(p, c)  (TRIE_BITMAP_BYTE(p, c) &   ANYOF_BIT((U8)c))
+#define TRIE_BITMAP(p)		(((reg_trie_data *)(p))->bitmap)
+#define TRIE_BITMAP_BYTE(p, c)	BITMAP_BYTE(TRIE_BITMAP(p), c)
+#define TRIE_BITMAP_SET(p, c)	(TRIE_BITMAP_BYTE(p, c) |=  ANYOF_BIT((U8)c))
+#define TRIE_BITMAP_CLEAR(p,c)	(TRIE_BITMAP_BYTE(p, c) &= ~ANYOF_BIT((U8)c))
+#define TRIE_BITMAP_TEST(p, c)	(TRIE_BITMAP_BYTE(p, c) &   ANYOF_BIT((U8)c))
 
 #define IS_ANYOF_TRIE(op) ((op)==TRIEC || (op)==AHOCORASICKC)
 #define IS_TRIE_AC(op) ((op)>=AHOCORASICK)
@@ -1201,7 +1201,7 @@ The three groups are: Compile, Execute, Extra. There is room for a
 further group, as currently only the low three bytes are used.
 
     Compile Options:
-
+    
     PARSE
     PEEP
     TRIE
@@ -1363,7 +1363,7 @@ re.pm, especially to the documentation.
 
 #define RE_SV_DUMPLEN(ItEm) (SvCUR(ItEm) - (SvTAIL(ItEm)!=0))
 #define RE_SV_TAIL(ItEm) (SvTAIL(ItEm) ? "$" : "")
-
+    
 #else /* if not DEBUGGING */
 
 #define DECLARE_AND_GET_RE_DEBUG_FLAGS  dNOOP
@@ -1423,7 +1423,7 @@ typedef enum {
 #define REGNODE_ARG_LEN_VARIES(node)    (PL_regnode_info[(node)].arg_len_varies)
 #define REGNODE_NAME(node)              (PL_regnode_name[(node)])
 
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_REGEX_ENGINE)
 #include "reginline.h"
 #endif
 
diff --git a/regcomp.sym b/regcomp.sym
index a552cc3ea1..ddc8397daf 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -1,6 +1,6 @@
 # regcomp.sym
 #
-# File has two sections, divided by a line of dashes '-'.
+# File has two sections, divided by a line of dashes '-'. 
 #
 # Lines beginning with # are ignored, except for those that start with #*
 # which are included in pod/perldebguts.pod.  # within a line may be part
@@ -10,7 +10,7 @@
 #
 # Note that the order in this file is important.
 #
-# Format for first section:
+# Format for first section: 
 # NAME \s+ TYPE, arg-description [struct regnode suffix] [flags] [longjump] ; DESCRIPTION
 #   arg-description is currently unused
 #   suffix is appended to 'struct_regnode_' giving which one to use.  If empty,
@@ -278,8 +278,8 @@ GOSUB       GOSUB,      num/ofs 2L    ; recurse to paren arg1 at (signed) ofs ar
 
 #*Special conditionals
 GROUPPN     GROUPPN,    no-sv 1   ; Whether the group matched.
-INSUBP      INSUBP,     num 1     ; Whether we are in a specific recurse.
-DEFINEP     DEFINEP,    none 1    ; Never execute directly.
+INSUBP      INSUBP,     num 1     ; Whether we are in a specific recurse.  
+DEFINEP     DEFINEP,    none 1    ; Never execute directly.               
 
 #*Backtracking Verbs
 ENDLIKE     ENDLIKE,    none      ; Used only for the type field of verbs
@@ -288,7 +288,7 @@ ACCEPT      ENDLIKE,    no-sv/num 2L   ; Accepts the current matched string, wit
 
 #*Verbs With Arguments
 VERB        VERB,       no-sv 1   ; Used only for the type field of verbs
-PRUNE       VERB,       no-sv 1   ; Pattern fails at this startpoint if no-backtracking through this
+PRUNE       VERB,       no-sv 1   ; Pattern fails at this startpoint if no-backtracking through this 
 MARKPOINT   VERB,       no-sv 1   ; Push the current location for rollback by cut.
 SKIP        VERB,       no-sv 1   ; On failure skip forward (to the mark) before retrying
 COMMIT      VERB,       no-sv 1   ; Pattern fails outright if backtracking through this
diff --git a/regcomp_debug.c b/regcomp_debug.c
new file mode 100644
index 0000000000..c7c42f6941
--- /dev/null
+++ b/regcomp_debug.c
@@ -0,0 +1,1625 @@
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
+#include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
+#define PERL_IN_REGCOMP_DEBUG_C
+#include "perl.h"
+
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
+
+#include "invlist_inline.h"
+#include "unicode_constants.h"
+#include "regcomp_internal.h"
+
+#ifdef DEBUGGING
+
+int
+Perl_re_printf(pTHX_ const char *fmt, ...)
+{
+    va_list ap;
+    int result;
+    PerlIO *f= Perl_debug_log;
+    PERL_ARGS_ASSERT_RE_PRINTF;
+    va_start(ap, fmt);
+    result = PerlIO_vprintf(f, fmt, ap);
+    va_end(ap);
+    return result;
+}
+
+int
+Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
+{
+    va_list ap;
+    int result;
+    PerlIO *f= Perl_debug_log;
+    PERL_ARGS_ASSERT_RE_INDENTF;
+    va_start(ap, depth);
+    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
+    result = PerlIO_vprintf(f, fmt, ap);
+    va_end(ap);
+    return result;
+}
+
+void
+Perl_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
+                                    const char *close_str)
+{
+    PERL_ARGS_ASSERT_DEBUG_SHOW_STUDY_FLAGS;
+    if (!flags)
+        return;
+
+    Perl_re_printf( aTHX_  "%s", open_str);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
+    Perl_re_printf( aTHX_  "%s", close_str);
+}
+
+void
+Perl_debug_studydata(pTHX_ const char *where, scan_data_t *data,
+                    U32 depth, int is_inf,
+                    SSize_t min, SSize_t stopmin, SSize_t delta)
+{
+    PERL_ARGS_ASSERT_DEBUG_STUDYDATA;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    DEBUG_OPTIMISE_MORE_r({
+        if (!data)
+            return;
+        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+            depth,
+            where,
+            min, stopmin, delta,
+            (IV)data->pos_min,
+            (IV)data->pos_delta,
+            (UV)data->flags
+        );
+
+        Perl_debug_show_study_flags(aTHX_ data->flags," [","]");
+
+        Perl_re_printf( aTHX_
+            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
+            (IV)data->whilem_c,
+            (IV)(data->last_closep ? *((data)->last_closep) : -1),
+            is_inf ? "INF " : ""
+        );
+
+        if (data->last_found) {
+            int i;
+            Perl_re_printf(aTHX_
+                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
+                    SvPVX_const(data->last_found),
+                    (IV)data->last_end,
+                    (IV)data->last_start_min,
+                    (IV)data->last_start_max
+            );
+
+            for (i = 0; i < 2; i++) {
+                Perl_re_printf(aTHX_
+                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
+                    data->cur_is_floating == i ? "*" : "",
+                    i ? "Float" : "Fixed",
+                    SvPVX_const(data->substrs[i].str),
+                    (IV)data->substrs[i].min_offset,
+                    (IV)data->substrs[i].max_offset
+                );
+                Perl_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+            }
+        }
+
+        Perl_re_printf( aTHX_ "\n");
+    });
+}
+
+
+void
+Perl_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
+                regnode *scan, U32 depth, U32 flags)
+{
+    PERL_ARGS_ASSERT_DEBUG_PEEP;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    DEBUG_OPTIMISE_r({
+        regnode *Next;
+
+        if (!scan)
+            return;
+        Next = regnext(scan);
+        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
+        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
+            depth,
+            str,
+            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
+            Next ? (REG_NODE_NUM(Next)) : 0 );
+        Perl_debug_show_study_flags(aTHX_ flags," [ ","]");
+        Perl_re_printf( aTHX_  "\n");
+   });
+}
+
+#endif /* DEBUGGING */
+
+/*
+ - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
+ */
+#ifdef DEBUGGING
+
+static void
+S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
+{
+    int bit;
+    int set=0;
+
+    ASSUME(REG_INTFLAGS_NAME_SIZE <= sizeof(flags)*8);
+
+    for (bit=0; bit<REG_INTFLAGS_NAME_SIZE; bit++) {
+        if (flags & (1<<bit)) {
+            if (!set++ && lead)
+                Perl_re_printf( aTHX_  "%s", lead);
+            Perl_re_printf( aTHX_  "%s ", PL_reg_intflags_name[bit]);
+        }
+    }
+    if (lead)  {
+        if (set)
+            Perl_re_printf( aTHX_  "\n");
+        else
+            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
+    }
+}
+
+static void
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
+{
+    int bit;
+    int set=0;
+    regex_charset cs;
+
+    ASSUME(REG_EXTFLAGS_NAME_SIZE <= sizeof(flags)*8);
+
+    for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) {
+        if (flags & (1U<<bit)) {
+            if ((1U<<bit) & RXf_PMf_CHARSET) {  /* Output separately, below */
+                continue;
+            }
+            if (!set++ && lead)
+                Perl_re_printf( aTHX_  "%s", lead);
+            Perl_re_printf( aTHX_  "%s ", PL_reg_extflags_name[bit]);
+        }
+    }
+    if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
+            if (!set++ && lead) {
+                Perl_re_printf( aTHX_  "%s", lead);
+            }
+            switch (cs) {
+                case REGEX_UNICODE_CHARSET:
+                    Perl_re_printf( aTHX_  "UNICODE");
+                    break;
+                case REGEX_LOCALE_CHARSET:
+                    Perl_re_printf( aTHX_  "LOCALE");
+                    break;
+                case REGEX_ASCII_RESTRICTED_CHARSET:
+                    Perl_re_printf( aTHX_  "ASCII-RESTRICTED");
+                    break;
+                case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
+                    Perl_re_printf( aTHX_  "ASCII-MORE_RESTRICTED");
+                    break;
+                default:
+                    Perl_re_printf( aTHX_  "UNKNOWN CHARACTER SET");
+                    break;
+            }
+    }
+    if (lead)  {
+        if (set)
+            Perl_re_printf( aTHX_  "\n");
+        else
+            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
+    }
+}
+#endif
+
+void
+Perl_regdump(pTHX_ const regexp *r)
+{
+#ifdef DEBUGGING
+    int i;
+    SV * const sv = sv_newmortal();
+    SV *dsv= sv_newmortal();
+    RXi_GET_DECL(r, ri);
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_REGDUMP;
+
+    (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
+
+    /* Header fields of interest. */
+    for (i = 0; i < 2; i++) {
+        if (r->substrs->data[i].substr) {
+            RE_PV_QUOTED_DECL(s, 0, dsv,
+                            SvPVX_const(r->substrs->data[i].substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
+                            PL_dump_re_max_len);
+            Perl_re_printf( aTHX_
+                          "%s %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+        else if (r->substrs->data[i].utf8_substr) {
+            RE_PV_QUOTED_DECL(s, 1, dsv,
+                            SvPVX_const(r->substrs->data[i].utf8_substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+    }
+
+    if (r->check_substr || r->check_utf8)
+        Perl_re_printf( aTHX_
+                      (const char *)
+                      (   r->check_substr == r->substrs->data[1].substr
+                       && r->check_utf8   == r->substrs->data[1].utf8_substr
+                       ? "(checking floating" : "(checking anchored"));
+    if (r->intflags & PREGf_NOSCAN)
+        Perl_re_printf( aTHX_  " noscan");
+    if (r->extflags & RXf_CHECK_ALL)
+        Perl_re_printf( aTHX_  " isall");
+    if (r->check_substr || r->check_utf8)
+        Perl_re_printf( aTHX_  ") ");
+
+    if (ri->regstclass) {
+        regprop(r, sv, ri->regstclass, NULL, NULL);
+        Perl_re_printf( aTHX_  "stclass %s ", SvPVX_const(sv));
+    }
+    if (r->intflags & PREGf_ANCH) {
+        Perl_re_printf( aTHX_  "anchored");
+        if (r->intflags & PREGf_ANCH_MBOL)
+            Perl_re_printf( aTHX_  "(MBOL)");
+        if (r->intflags & PREGf_ANCH_SBOL)
+            Perl_re_printf( aTHX_  "(SBOL)");
+        if (r->intflags & PREGf_ANCH_GPOS)
+            Perl_re_printf( aTHX_  "(GPOS)");
+        Perl_re_printf( aTHX_ " ");
+    }
+    if (r->intflags & PREGf_GPOS_SEEN)
+        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
+    if (r->intflags & PREGf_SKIP)
+        Perl_re_printf( aTHX_  "plus ");
+    if (r->intflags & PREGf_IMPLICIT)
+        Perl_re_printf( aTHX_  "implicit ");
+    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
+    if (r->extflags & RXf_EVAL_SEEN)
+        Perl_re_printf( aTHX_  "with eval ");
+    Perl_re_printf( aTHX_  "\n");
+    DEBUG_FLAGS_r({
+        regdump_extflags("r->extflags: ", r->extflags);
+        regdump_intflags("r->intflags: ", r->intflags);
+    });
+#else
+    PERL_ARGS_ASSERT_REGDUMP;
+    PERL_UNUSED_CONTEXT;
+    PERL_UNUSED_ARG(r);
+#endif  /* DEBUGGING */
+}
+
+/* Should be synchronized with ANYOF_ #defines in regcomp.h */
+#ifdef DEBUGGING
+
+#  if   CC_WORDCHAR_ != 0 || CC_DIGIT_ != 1        || CC_ALPHA_ != 2    \
+     || CC_LOWER_ != 3    || CC_UPPER_ != 4        || CC_PUNCT_ != 5    \
+     || CC_PRINT_ != 6    || CC_ALPHANUMERIC_ != 7 || CC_GRAPH_ != 8    \
+     || CC_CASED_ != 9    || CC_SPACE_ != 10       || CC_BLANK_ != 11   \
+     || CC_XDIGIT_ != 12  || CC_CNTRL_ != 13       || CC_ASCII_ != 14   \
+     || CC_VERTSPACE_ != 15
+#   error Need to adjust order of anyofs[]
+#  endif
+static const char * const anyofs[] = {
+    "\\w",
+    "\\W",
+    "\\d",
+    "\\D",
+    "[:alpha:]",
+    "[:^alpha:]",
+    "[:lower:]",
+    "[:^lower:]",
+    "[:upper:]",
+    "[:^upper:]",
+    "[:punct:]",
+    "[:^punct:]",
+    "[:print:]",
+    "[:^print:]",
+    "[:alnum:]",
+    "[:^alnum:]",
+    "[:graph:]",
+    "[:^graph:]",
+    "[:cased:]",
+    "[:^cased:]",
+    "\\s",
+    "\\S",
+    "[:blank:]",
+    "[:^blank:]",
+    "[:xdigit:]",
+    "[:^xdigit:]",
+    "[:cntrl:]",
+    "[:^cntrl:]",
+    "[:ascii:]",
+    "[:^ascii:]",
+    "\\v",
+    "\\V"
+};
+#endif
+
+/*
+- regprop - printable representation of opcode, with run time support
+*/
+
+void
+Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
+{
+#ifdef DEBUGGING
+    U8 k;
+    const U8 op = OP(o);
+    RXi_GET_DECL(prog, progi);
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_REGPROP;
+
+    SvPVCLEAR(sv);
+
+    if (op > REGNODE_MAX) {          /* regnode.type is unsigned */
+        if (pRExC_state) {  /* This gives more info, if we have it */
+            FAIL3("panic: corrupted regexp opcode %d > %d",
+                  (int)op, (int)REGNODE_MAX);
+        }
+        else {
+            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
+                             (int)op, (int)REGNODE_MAX);
+        }
+    }
+    sv_catpv(sv, REGNODE_NAME(op)); /* Take off const! */
+
+    k = REGNODE_TYPE(op);
+
+    if (k == EXACT) {
+        sv_catpvs(sv, " ");
+        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
+         * is a crude hack but it may be the best for now since
+         * we have no flag "this EXACTish node was UTF-8"
+         * --jhi */
+        pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+                  PL_colors[0], PL_colors[1],
+                  PERL_PV_ESCAPE_UNI_DETECT |
+                  PERL_PV_ESCAPE_NONASCII   |
+                  PERL_PV_PRETTY_ELLIPSES   |
+                  PERL_PV_PRETTY_LTGT       |
+                  PERL_PV_PRETTY_NOCLEAR
+                  );
+    } else if (k == TRIE) {
+        /* print the details of the trie in dumpuntil instead, as
+         * progi->data isn't available here */
+        const U32 n = ARG(o);
+        const reg_ac_data * const ac = IS_TRIE_AC(op) ?
+               (reg_ac_data *)progi->data->data[n] :
+               NULL;
+        const reg_trie_data * const trie
+            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
+
+        Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags));
+        DEBUG_TRIE_COMPILE_r({
+          if (trie->jump)
+            sv_catpvs(sv, "(JUMP)");
+          Perl_sv_catpvf(aTHX_ sv,
+            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
+            (UV)trie->startstate,
+            (IV)trie->statecount-1, /* -1 because of the unused 0 element */
+            (UV)trie->wordcount,
+            (UV)trie->minlen,
+            (UV)trie->maxlen,
+            (UV)TRIE_CHARCOUNT(trie),
+            (UV)trie->uniquecharcount
+          );
+        });
+        if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
+            sv_catpvs(sv, "[");
+            (void) put_charclass_bitmap_innards(sv,
+                                                ((IS_ANYOF_TRIE(op))
+                                                 ? ANYOF_BITMAP(o)
+                                                 : TRIE_BITMAP(trie)),
+                                                NULL,
+                                                NULL,
+                                                NULL,
+                                                0,
+                                                FALSE
+                                               );
+            sv_catpvs(sv, "]");
+        }
+    } else if (k == CURLY) {
+        U32 lo = ARG1(o), hi = ARG2(o);
+        if (op == CURLYM || op == CURLYN || op == CURLYX)
+            Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
+        Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
+        if (hi == REG_INFTY)
+            sv_catpvs(sv, "INFTY");
+        else
+            Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi);
+        sv_catpvs(sv, "}");
+    }
+    else if (k == WHILEM && o->flags)                   /* Ordinal/of */
+        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
+    else if (k == REF || k == OPEN || k == CLOSE
+             || k == GROUPP || op == ACCEPT)
+    {
+        AV *name_list= NULL;
+        U32 parno= (op == ACCEPT)              ? (U32)ARG2L(o) :
+                   (op == OPEN || op == CLOSE) ? (U32)PARNO(o) :
+                                                 (U32)ARG(o);
+        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
+        if ( RXp_PAREN_NAMES(prog) ) {
+            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
+        } else if ( pRExC_state ) {
+            name_list= RExC_paren_name_list;
+        }
+        if ( name_list ) {
+            if ( k != REF || (op < REFN)) {
+                SV **name= av_fetch_simple(name_list, parno, 0 );
+                if (name)
+                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
+            }
+            else
+            if (parno > 0) {
+                /* parno must always be larger than 0 for this block
+                 * as it represents a slot into the data array, which
+                 * has the 0 slot reserved for a placeholder so any valid
+                 * index into it is always true, eg non-zero
+                 * see the '%' "what" type and the implementation of
+                 * S_reg_add_data()
+                 */
+                SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
+                I32 *nums=(I32*)SvPVX(sv_dat);
+                SV **name= av_fetch_simple(name_list, nums[0], 0 );
+                I32 n;
+                if (name) {
+                    for ( n=0; n<SvIVX(sv_dat); n++ ) {
+                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
+                                    (n ? "," : ""), (IV)nums[n]);
+                    }
+                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
+                }
+            }
+        }
+        if ( k == REF && reginfo) {
+            U32 n = ARG(o);  /* which paren pair */
+            I32 ln = prog->offs[n].start;
+            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
+                Perl_sv_catpvf(aTHX_ sv, ": FAIL");
+            else if (ln == prog->offs[n].end)
+                Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
+            else {
+                const char *s = reginfo->strbeg + ln;
+                Perl_sv_catpvf(aTHX_ sv, ": ");
+                Perl_pv_pretty( aTHX_ sv, s, prog->offs[n].end - prog->offs[n].start, 32, 0, 0,
+                    PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
+            }
+        }
+    } else if (k == GOSUB) {
+        AV *name_list= NULL;
+        if ( RXp_PAREN_NAMES(prog) ) {
+            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
+        } else if ( pRExC_state ) {
+            name_list= RExC_paren_name_list;
+        }
+
+        /* Paren and offset */
+        Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
+                (int)((o + (int)ARG2L(o)) - progi->program) );
+        if (name_list) {
+            SV **name= av_fetch_simple(name_list, ARG(o), 0 );
+            if (name)
+                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
+        }
+    }
+    else if (k == LOGICAL)
+        /* 2: embedded, otherwise 1 */
+        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
+    else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
+        U8 flags;
+        char * bitmap;
+        U8 do_sep = 0;    /* Do we need to separate various components of the
+                             output? */
+        /* Set if there is still an unresolved user-defined property */
+        SV *unresolved                = NULL;
+
+        /* Things that are ignored except when the runtime locale is UTF-8 */
+        SV *only_utf8_locale_invlist = NULL;
+
+        /* Code points that don't fit in the bitmap */
+        SV *nonbitmap_invlist = NULL;
+
+        /* And things that aren't in the bitmap, but are small enough to be */
+        SV* bitmap_range_not_in_bitmap = NULL;
+
+        bool inverted;
+
+        if (k != ANYOF) {
+            flags = 0;
+            bitmap = NULL;
+        }
+        else {
+            flags = ANYOF_FLAGS(o);
+            bitmap = ANYOF_BITMAP(o);
+        }
+
+        if (op == ANYOFL || op == ANYOFPOSIXL) {
+            if ((flags & ANYOFL_UTF8_LOCALE_REQD)) {
+                sv_catpvs(sv, "{utf8-locale-reqd}");
+            }
+            if (flags & ANYOFL_FOLD) {
+                sv_catpvs(sv, "{i}");
+            }
+        }
+
+        inverted = flags & ANYOF_INVERT;
+
+        /* If there is stuff outside the bitmap, get it */
+        if (k == ANYOFR) {
+
+            /* For a single range, split into the parts inside vs outside the
+             * bitmap. */
+            UV start = ANYOFRbase(o);
+            UV end   = ANYOFRbase(o) + ANYOFRdelta(o);
+
+            if (start < NUM_ANYOF_CODE_POINTS) {
+                if (end < NUM_ANYOF_CODE_POINTS) {
+                    bitmap_range_not_in_bitmap
+                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
+                                                  start, end);
+                }
+                else {
+                    bitmap_range_not_in_bitmap
+                          = _add_range_to_invlist(bitmap_range_not_in_bitmap,
+                                                  start, NUM_ANYOF_CODE_POINTS);
+                    start = NUM_ANYOF_CODE_POINTS;
+                }
+            }
+
+            if (start >= NUM_ANYOF_CODE_POINTS) {
+                nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
+                                                ANYOFRbase(o),
+                                                ANYOFRbase(o) + ANYOFRdelta(o));
+            }
+        }
+        else if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(o)) {
+            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
+                                                      NUM_ANYOF_CODE_POINTS,
+                                                      UV_MAX);
+        }
+        else if (ANYOF_HAS_AUX(o)) {
+                (void) GET_REGCLASS_AUX_DATA(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+
+            /* The aux data may contain stuff that could fit in the bitmap.
+             * This could come from a user-defined property being finally
+             * resolved when this call was done; or much more likely because
+             * there are matches that require UTF-8 to be valid, and so aren't
+             * in the bitmap (or ANYOFR).  This is teased apart later */
+            _invlist_intersection(nonbitmap_invlist,
+                                  PL_InBitmap,
+                                  &bitmap_range_not_in_bitmap);
+            /* Leave just the things that don't fit into the bitmap */
+            _invlist_subtract(nonbitmap_invlist,
+                              PL_InBitmap,
+                              &nonbitmap_invlist);
+        }
+
+        /* Ready to start outputting.  First, the initial left bracket */
+        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+
+        if (   bitmap
+            || bitmap_range_not_in_bitmap
+            || only_utf8_locale_invlist
+            || unresolved)
+        {
+            /* Then all the things that could fit in the bitmap */
+            do_sep = put_charclass_bitmap_innards(
+                                    sv,
+                                    bitmap,
+                                    bitmap_range_not_in_bitmap,
+                                    only_utf8_locale_invlist,
+                                    o,
+                                    flags,
+
+                                    /* Can't try inverting for a
+                                                   * better display if there
+                                                   * are things that haven't
+                                                   * been resolved */
+                                    (unresolved != NULL || k == ANYOFR));
+            SvREFCNT_dec(bitmap_range_not_in_bitmap);
+
+            /* If there are user-defined properties which haven't been defined
+             * yet, output them.  If the result is not to be inverted, it is
+             * clearest to output them in a separate [] from the bitmap range
+             * stuff.  If the result is to be complemented, we have to show
+             * everything in one [], as the inversion applies to the whole
+             * thing.  Use {braces} to separate them from anything in the
+             * bitmap and anything above the bitmap. */
+            if (unresolved) {
+                if (inverted) {
+                    if (! do_sep) { /* If didn't output anything in the bitmap
+                                     */
+                        sv_catpvs(sv, "^");
+                    }
+                    sv_catpvs(sv, "{");
+                }
+                else if (do_sep) {
+                    Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1],
+                                                      PL_colors[0]);
+                }
+                sv_catsv(sv, unresolved);
+                if (inverted) {
+                    sv_catpvs(sv, "}");
+                }
+                do_sep = ! inverted;
+            }
+            else if (     do_sep == 2
+                     && ! nonbitmap_invlist
+                     &&   ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(o))
+            {
+                /* Here, the display shows the class as inverted, and
+                 * everything above the lower display should also match, but
+                 * there is no indication of that.  Add this range so the code
+                 * below will add it to the display */
+                _invlist_union_complement_2nd(nonbitmap_invlist,
+                                              PL_InBitmap,
+                                              &nonbitmap_invlist);
+            }
+        }
+
+        /* And, finally, add the above-the-bitmap stuff */
+        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
+            SV* contents;
+
+            /* See if truncation size is overridden */
+            const STRLEN dump_len = (PL_dump_re_max_len > 256)
+                                    ? PL_dump_re_max_len
+                                    : 256;
+
+            /* This is output in a separate [] */
+            if (do_sep) {
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
+            }
+
+            /* And, for easy of understanding, it is shown in the
+             * uncomplemented form if possible.  The one exception being if
+             * there are unresolved items, where the inversion has to be
+             * delayed until runtime */
+            if (inverted && ! unresolved) {
+                _invlist_invert(nonbitmap_invlist);
+                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
+            }
+
+            contents = invlist_contents(nonbitmap_invlist,
+                                        FALSE /* output suitable for catsv */
+                                       );
+
+            /* If the output is shorter than the permissible maximum, just do it. */
+            if (SvCUR(contents) <= dump_len) {
+                sv_catsv(sv, contents);
+            }
+            else {
+                const char * contents_string = SvPVX(contents);
+                STRLEN i = dump_len;
+
+                /* Otherwise, start at the permissible max and work back to the
+                 * first break possibility */
+                while (i > 0 && contents_string[i] != ' ') {
+                    i--;
+                }
+                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
+                                       find a legal break */
+                    i = dump_len;
+                }
+
+                sv_catpvn(sv, contents_string, i);
+                sv_catpvs(sv, "...");
+            }
+
+            SvREFCNT_dec_NN(contents);
+            SvREFCNT_dec_NN(nonbitmap_invlist);
+        }
+
+        /* And finally the matching, closing ']' */
+        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        if (op == ANYOFHs) {
+            Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1));
+        }
+        else if (REGNODE_TYPE(op) != ANYOF) {
+            U8 lowest = (op != ANYOFHr)
+                         ? FLAGS(o)
+                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
+            U8 highest = (op == ANYOFHr)
+                         ? HIGHEST_ANYOF_HRx_BYTE(FLAGS(o))
+                         : (op == ANYOFH || op == ANYOFR)
+                           ? 0xFF
+                           : lowest;
+#ifndef EBCDIC
+            if (op != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
+#endif
+            {
+                Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+                if (lowest != highest) {
+                    Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+                }
+                Perl_sv_catpvf(aTHX_ sv, ")");
+            }
+        }
+
+        SvREFCNT_dec(unresolved);
+    }
+    else if (k == ANYOFM) {
+        SV * cp_list = get_ANYOFM_contents(o);
+
+        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        if (op == NANYOFM) {
+            _invlist_invert(cp_list);
+        }
+
+        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE);
+        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(cp_list);
+    }
+    else if (k == ANYOFHbbm) {
+        SV * cp_list = get_ANYOFHbbm_contents(o);
+        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+
+        sv_catsv(sv, invlist_contents(cp_list,
+                                      FALSE /* output suitable for catsv */
+                                     ));
+        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(cp_list);
+    }
+    else if (k == POSIXD || k == NPOSIXD) {
+        U8 index = FLAGS(o) * 2;
+        if (index < C_ARRAY_LENGTH(anyofs)) {
+            if (*anyofs[index] != '[')  {
+                sv_catpvs(sv, "[");
+            }
+            sv_catpv(sv, anyofs[index]);
+            if (*anyofs[index] != '[')  {
+                sv_catpvs(sv, "]");
+            }
+        }
+        else {
+            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+        }
+    }
+    else if (k == BOUND || k == NBOUND) {
+        /* Must be synced with order of 'bound_type' in regcomp.h */
+        const char * const bounds[] = {
+            "",      /* Traditional */
+            "{gcb}",
+            "{lb}",
+            "{sb}",
+            "{wb}"
+        };
+        assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
+        sv_catpv(sv, bounds[FLAGS(o)]);
+    }
+    else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
+        Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
+        if (o->next_off) {
+            Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+        }
+        Perl_sv_catpvf(aTHX_ sv, "]");
+    }
+    else if (op == SBOL)
+        Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
+
+    /* add on the verb argument if there is one */
+    if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) {
+        if ( ARG(o) )
+            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
+                       SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+        else
+            sv_catpvs(sv, ":NULL");
+    }
+#else
+    PERL_UNUSED_CONTEXT;
+    PERL_UNUSED_ARG(sv);
+    PERL_UNUSED_ARG(o);
+    PERL_UNUSED_ARG(prog);
+    PERL_UNUSED_ARG(reginfo);
+    PERL_UNUSED_ARG(pRExC_state);
+#endif  /* DEBUGGING */
+}
+
+#ifdef DEBUGGING
+
+STATIC void
+S_put_code_point(pTHX_ SV *sv, UV c)
+{
+    PERL_ARGS_ASSERT_PUT_CODE_POINT;
+
+    if (c > 255) {
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
+    }
+    else if (isPRINT(c)) {
+        const char string = (char) c;
+
+        /* We use {phrase} as metanotation in the class, so also escape literal
+         * braces */
+        if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
+            sv_catpvs(sv, "\\");
+        sv_catpvn(sv, &string, 1);
+    }
+    else if (isMNEMONIC_CNTRL(c)) {
+        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
+    }
+    else {
+        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
+    }
+}
+
+STATIC void
+S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
+{
+    /* Appends to 'sv' a displayable version of the range of code points from
+     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
+     * that have them, when they occur at the beginning or end of the range.
+     * It uses hex to output the remaining code points, unless 'allow_literals'
+     * is true, in which case the printable ASCII ones are output as-is (though
+     * some of these will be escaped by put_code_point()).
+     *
+     * NOTE:  This is designed only for printing ranges of code points that fit
+     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
+     */
+
+    const unsigned int min_range_count = 3;
+
+    assert(start <= end);
+
+    PERL_ARGS_ASSERT_PUT_RANGE;
+
+    while (start <= end) {
+        UV this_end;
+        const char * format;
+
+        if (    end - start < min_range_count
+            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
+        {
+            /* Output a range of 1 or 2 chars individually, or longer ranges
+             * when printable */
+            for (; start <= end; start++) {
+                put_code_point(sv, start);
+            }
+            break;
+        }
+
+        /* If permitted by the input options, and there is a possibility that
+         * this range contains a printable literal, look to see if there is
+         * one. */
+        if (allow_literals && start <= MAX_PRINT_A) {
+
+            /* If the character at the beginning of the range isn't an ASCII
+             * printable, effectively split the range into two parts:
+             *  1) the portion before the first such printable,
+             *  2) the rest
+             * and output them separately. */
+            if (! isPRINT_A(start)) {
+                UV temp_end = start + 1;
+
+                /* There is no point looking beyond the final possible
+                 * printable, in MAX_PRINT_A */
+                UV max = MIN(end, MAX_PRINT_A);
+
+                while (temp_end <= max && ! isPRINT_A(temp_end)) {
+                    temp_end++;
+                }
+
+                /* Here, temp_end points to one beyond the first printable if
+                 * found, or to one beyond 'max' if not.  If none found, make
+                 * sure that we use the entire range */
+                if (temp_end > MAX_PRINT_A) {
+                    temp_end = end + 1;
+                }
+
+                /* Output the first part of the split range: the part that
+                 * doesn't have printables, with the parameter set to not look
+                 * for literals (otherwise we would infinitely recurse) */
+                put_range(sv, start, temp_end - 1, FALSE);
+
+                /* The 2nd part of the range (if any) starts here. */
+                start = temp_end;
+
+                /* We do a continue, instead of dropping down, because even if
+                 * the 2nd part is non-empty, it could be so short that we want
+                 * to output it as individual characters, as tested for at the
+                 * top of this loop.  */
+                continue;
+            }
+
+            /* Here, 'start' is a printable ASCII.  If it is an alphanumeric,
+             * output a sub-range of just the digits or letters, then process
+             * the remaining portion as usual. */
+            if (isALPHANUMERIC_A(start)) {
+                UV mask = (isDIGIT_A(start))
+                           ? CC_DIGIT_
+                             : isUPPER_A(start)
+                               ? CC_UPPER_
+                               : CC_LOWER_;
+                UV temp_end = start + 1;
+
+                /* Find the end of the sub-range that includes just the
+                 * characters in the same class as the first character in it */
+                while (temp_end <= end && generic_isCC_A_(temp_end, mask)) {
+                    temp_end++;
+                }
+                temp_end--;
+
+                /* For short ranges, don't duplicate the code above to output
+                 * them; just call recursively */
+                if (temp_end - start < min_range_count) {
+                    put_range(sv, start, temp_end, FALSE);
+                }
+                else {  /* Output as a range */
+                    put_code_point(sv, start);
+                    sv_catpvs(sv, "-");
+                    put_code_point(sv, temp_end);
+                }
+                start = temp_end + 1;
+                continue;
+            }
+
+            /* We output any other printables as individual characters */
+            if (isPUNCT_A(start) || isSPACE_A(start)) {
+                while (start <= end && (isPUNCT_A(start)
+                                        || isSPACE_A(start)))
+                {
+                    put_code_point(sv, start);
+                    start++;
+                }
+                continue;
+            }
+        } /* End of looking for literals */
+
+        /* Here is not to output as a literal.  Some control characters have
+         * mnemonic names.  Split off any of those at the beginning and end of
+         * the range to print mnemonically.  It isn't possible for many of
+         * these to be in a row, so this won't overwhelm with output */
+        if (   start <= end
+            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
+        {
+            while (isMNEMONIC_CNTRL(start) && start <= end) {
+                put_code_point(sv, start);
+                start++;
+            }
+
+            /* If this didn't take care of the whole range ... */
+            if (start <= end) {
+
+                /* Look backwards from the end to find the final non-mnemonic
+                 * */
+                UV temp_end = end;
+                while (isMNEMONIC_CNTRL(temp_end)) {
+                    temp_end--;
+                }
+
+                /* And separately output the interior range that doesn't start
+                 * or end with mnemonics */
+                put_range(sv, start, temp_end, FALSE);
+
+                /* Then output the mnemonic trailing controls */
+                start = temp_end + 1;
+                while (start <= end) {
+                    put_code_point(sv, start);
+                    start++;
+                }
+                break;
+            }
+        }
+
+        /* As a final resort, output the range or subrange as hex. */
+
+        if (start >= NUM_ANYOF_CODE_POINTS) {
+            this_end = end;
+        }
+        else {  /* Have to split range at the bitmap boundary */
+            this_end = (end < NUM_ANYOF_CODE_POINTS)
+                        ? end
+                        : NUM_ANYOF_CODE_POINTS - 1;
+        }
+#if NUM_ANYOF_CODE_POINTS > 256
+        format = (this_end < 256)
+                 ? "\\x%02" UVXf "-\\x%02" UVXf
+                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
+#else
+        format = "\\x%02" UVXf "-\\x%02" UVXf;
+#endif
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
+        Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
+        GCC_DIAG_RESTORE_STMT;
+        break;
+    }
+}
+
+STATIC void
+S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
+{
+    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
+     * 'invlist' */
+
+    UV start, end;
+    bool allow_literals = TRUE;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;
+
+    /* Generally, it is more readable if printable characters are output as
+     * literals, but if a range (nearly) spans all of them, it's best to output
+     * it as a single range.  This code will use a single range if all but 2
+     * ASCII printables are in it */
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+
+        /* If the range starts beyond the final printable, it doesn't have any
+         * in it */
+        if (start > MAX_PRINT_A) {
+            break;
+        }
+
+        /* In both ASCII and EBCDIC, a SPACE is the lowest printable.  To span
+         * all but two, the range must start and end no later than 2 from
+         * either end */
+        if (start < ' ' + 2 && end > MAX_PRINT_A - 2) {
+            if (end > MAX_PRINT_A) {
+                end = MAX_PRINT_A;
+            }
+            if (start < ' ') {
+                start = ' ';
+            }
+            if (end - start >= MAX_PRINT_A - ' ' - 2) {
+                allow_literals = FALSE;
+            }
+            break;
+        }
+    }
+    invlist_iterfinish(invlist);
+
+    /* Here we have figured things out.  Output each range */
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+        if (start >= NUM_ANYOF_CODE_POINTS) {
+            break;
+        }
+        put_range(sv, start, end, allow_literals);
+    }
+    invlist_iterfinish(invlist);
+
+    return;
+}
+
+STATIC SV*
+S_put_charclass_bitmap_innards_common(pTHX_
+        SV* invlist,            /* The bitmap */
+        SV* posixes,            /* Under /l, things like [:word:], \S */
+        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
+        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
+        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
+        const bool invert       /* Is the result to be inverted? */
+)
+{
+    /* Create and return an SV containing a displayable version of the bitmap
+     * and associated information determined by the input parameters.  If the
+     * output would have been only the inversion indicator '^', NULL is instead
+     * returned. */
+
+    SV * output;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
+
+    if (invert) {
+        output = newSVpvs("^");
+    }
+    else {
+        output = newSVpvs("");
+    }
+
+    /* First, the code points in the bitmap that are unconditionally there */
+    put_charclass_bitmap_innards_invlist(output, invlist);
+
+    /* Traditionally, these have been placed after the main code points */
+    if (posixes) {
+        sv_catsv(output, posixes);
+    }
+
+    if (only_utf8 && _invlist_len(only_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8);
+    }
+
+    if (not_utf8 && _invlist_len(not_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, not_utf8);
+    }
+
+    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);
+
+        /* This is the only list in this routine that can legally contain code
+         * points outside the bitmap range.  The call just above to
+         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
+         * output them here.  There's about a half-dozen possible, and none in
+         * contiguous ranges longer than 2 */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            UV start, end;
+            SV* above_bitmap = NULL;
+
+            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);
+
+            invlist_iterinit(above_bitmap);
+            while (invlist_iternext(above_bitmap, &start, &end)) {
+                UV i;
+
+                for (i = start; i <= end; i++) {
+                    put_code_point(output, i);
+                }
+            }
+            invlist_iterfinish(above_bitmap);
+            SvREFCNT_dec_NN(above_bitmap);
+        }
+    }
+
+    if (invert && SvCUR(output) == 1) {
+        return NULL;
+    }
+
+    return output;
+}
+
+STATIC U8
+S_put_charclass_bitmap_innards(pTHX_ SV *sv,
+                                     char *bitmap,
+                                     SV *nonbitmap_invlist,
+                                     SV *only_utf8_locale_invlist,
+                                     const regnode * const node,
+                                     const U8 flags,
+                                     const bool force_as_is_display)
+{
+    /* Appends to 'sv' a displayable version of the innards of the bracketed
+     * character class defined by the other arguments:
+     *  'bitmap' points to the bitmap, or NULL if to ignore that.
+     *  'nonbitmap_invlist' is an inversion list of the code points that are in
+     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
+     *      none.  The reasons for this could be that they require some
+     *      condition such as the target string being or not being in UTF-8
+     *      (under /d), or because they came from a user-defined property that
+     *      was not resolved at the time of the regex compilation (under /u)
+     *  'only_utf8_locale_invlist' is an inversion list of the code points that
+     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
+     *  'node' is the regex pattern ANYOF node.  It is needed only when the
+     *      above two parameters are not null, and is passed so that this
+     *      routine can tease apart the various reasons for them.
+     *  'flags' is the flags field of 'node'
+     *  'force_as_is_display' is TRUE if this routine should definitely NOT try
+     *      to invert things to see if that leads to a cleaner display.  If
+     *      FALSE, this routine is free to use its judgment about doing this.
+     *
+     * It returns 0 if nothing was actually output.  (It may be that
+     *              the bitmap, etc is empty.)
+     *            1 if the output wasn't inverted (didn't begin with a '^')
+     *            2 if the output was inverted (did begin with a '^')
+     *
+     * When called for outputting the bitmap of a non-ANYOF node, just pass the
+     * bitmap, with the succeeding parameters set to NULL, and the final one to
+     * FALSE.
+     */
+
+    /* In general, it tries to display the 'cleanest' representation of the
+     * innards, choosing whether to display them inverted or not, regardless of
+     * whether the class itself is to be inverted.  However,  there are some
+     * cases where it can't try inverting, as what actually matches isn't known
+     * until runtime, and hence the inversion isn't either. */
+
+    bool inverting_allowed = ! force_as_is_display;
+
+    int i;
+    STRLEN orig_sv_cur = SvCUR(sv);
+
+    SV* invlist;            /* Inversion list we accumulate of code points that
+                               are unconditionally matched */
+    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
+                               UTF-8 */
+    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
+                             */
+    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
+    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
+                                       is UTF-8 */
+
+    SV* as_is_display;      /* The output string when we take the inputs
+                               literally */
+    SV* inverted_display;   /* The output string when we invert the inputs */
+
+    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
+                                                   to match? */
+    /* We are biased in favor of displaying things without them being inverted,
+     * as that is generally easier to understand */
+    const int bias = 5;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
+
+    /* Start off with whatever code points are passed in.  (We clone, so we
+     * don't change the caller's list) */
+    if (nonbitmap_invlist) {
+        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
+        invlist = invlist_clone(nonbitmap_invlist, NULL);
+    }
+    else {  /* Worst case size is every other code point is matched */
+        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
+    }
+
+    if (flags) {
+        if (OP(node) == ANYOFD) {
+
+            /* This flag indicates that the code points below 0x100 in the
+             * nonbitmap list are precisely the ones that match only when the
+             * target is UTF-8 (they should all be non-ASCII). */
+            if (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES) {
+                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
+                _invlist_subtract(invlist, only_utf8, &invlist);
+            }
+
+            /* And this flag for matching all non-ASCII 0xFF and below */
+            if (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared) {
+                not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
+            }
+        }
+        else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
+
+            /* If either of these flags are set, what matches isn't
+             * determinable except during execution, so don't know enough here
+             * to invert */
+            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
+                inverting_allowed = FALSE;
+            }
+
+            /* What the posix classes match also varies at runtime, so these
+             * will be output symbolically. */
+            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
+                int i;
+
+                posixes = newSVpvs("");
+                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
+                    if (ANYOF_POSIXL_TEST(node, i)) {
+                        sv_catpv(posixes, anyofs[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    /* Accumulate the bit map into the unconditional match list */
+    if (bitmap) {
+        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (BITMAP_TEST(bitmap, i)) {
+                int start = i++;
+                for (;
+                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
+                     i++)
+                { /* empty */ }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
+            }
+        }
+    }
+
+    /* Make sure that the conditional match lists don't have anything in them
+     * that match unconditionally; otherwise the output is quite confusing.
+     * This could happen if the code that populates these misses some
+     * duplication. */
+    if (only_utf8) {
+        _invlist_subtract(only_utf8, invlist, &only_utf8);
+    }
+    if (not_utf8) {
+        _invlist_subtract(not_utf8, invlist, &not_utf8);
+    }
+
+    if (only_utf8_locale_invlist) {
+
+        /* Since this list is passed in, we have to make a copy before
+         * modifying it */
+        only_utf8_locale = invlist_clone(only_utf8_locale_invlist, NULL);
+
+        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
+
+        /* And, it can get really weird for us to try outputting an inverted
+         * form of this list when it has things above the bitmap, so don't even
+         * try */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            inverting_allowed = FALSE;
+        }
+    }
+
+    /* Calculate what the output would be if we take the input as-is */
+    as_is_display = put_charclass_bitmap_innards_common(invlist,
+                                                    posixes,
+                                                    only_utf8,
+                                                    not_utf8,
+                                                    only_utf8_locale,
+                                                    invert);
+
+    /* If have to take the output as-is, just do that */
+    if (! inverting_allowed) {
+        if (as_is_display) {
+            sv_catsv(sv, as_is_display);
+            SvREFCNT_dec_NN(as_is_display);
+        }
+    }
+    else { /* But otherwise, create the output again on the inverted input, and
+              use whichever version is shorter */
+
+        int inverted_bias, as_is_bias;
+
+        /* We will apply our bias to whichever of the results doesn't have
+         * the '^' */
+        bool trial_invert;
+        if (invert) {
+            trial_invert = FALSE;
+            as_is_bias = bias;
+            inverted_bias = 0;
+        }
+        else {
+            trial_invert = TRUE;
+            as_is_bias = 0;
+            inverted_bias = bias;
+        }
+
+        /* Now invert each of the lists that contribute to the output,
+         * excluding from the result things outside the possible range */
+
+        /* For the unconditional inversion list, we have to add in all the
+         * conditional code points, so that when inverted, they will be gone
+         * from it */
+        _invlist_union(only_utf8, invlist, &invlist);
+        _invlist_union(not_utf8, invlist, &invlist);
+        _invlist_union(only_utf8_locale, invlist, &invlist);
+        _invlist_invert(invlist);
+        _invlist_intersection(invlist, PL_InBitmap, &invlist);
+
+        if (only_utf8) {
+            _invlist_invert(only_utf8);
+            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
+        }
+        else if (not_utf8) {
+
+            /* If a code point matches iff the target string is not in UTF-8,
+             * then complementing the result has it not match iff not in UTF-8,
+             * which is the same thing as matching iff it is UTF-8. */
+            only_utf8 = not_utf8;
+            not_utf8 = NULL;
+        }
+
+        if (only_utf8_locale) {
+            _invlist_invert(only_utf8_locale);
+            _invlist_intersection(only_utf8_locale,
+                                  PL_InBitmap,
+                                  &only_utf8_locale);
+        }
+
+        inverted_display = put_charclass_bitmap_innards_common(
+                                            invlist,
+                                            posixes,
+                                            only_utf8,
+                                            not_utf8,
+                                            only_utf8_locale, trial_invert);
+
+        /* Use the shortest representation, taking into account our bias
+         * against showing it inverted */
+        if (   inverted_display
+            && (   ! as_is_display
+                || (  SvCUR(inverted_display) + inverted_bias
+                    < SvCUR(as_is_display)    + as_is_bias)))
+        {
+            sv_catsv(sv, inverted_display);
+            invert = ! invert;
+        }
+        else if (as_is_display) {
+            sv_catsv(sv, as_is_display);
+        }
+
+        SvREFCNT_dec(as_is_display);
+        SvREFCNT_dec(inverted_display);
+    }
+
+    SvREFCNT_dec_NN(invlist);
+    SvREFCNT_dec(only_utf8);
+    SvREFCNT_dec(not_utf8);
+    SvREFCNT_dec(posixes);
+    SvREFCNT_dec(only_utf8_locale);
+
+    U8 did_output_something = (bool) (SvCUR(sv) > orig_sv_cur);
+    if (did_output_something) {
+        /* Distinguish between non and inverted cases */
+        did_output_something += invert;
+    }
+
+    return did_output_something;
+}
+
+
+const regnode *
+Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
+            const regnode *last, const regnode *plast,
+            SV* sv, I32 indent, U32 depth)
+{
+    const regnode *next;
+    const regnode *optstart= NULL;
+
+    RXi_GET_DECL(r, ri);
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_DUMPUNTIL;
+
+#ifdef DEBUG_DUMPUNTIL
+    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n", indent, node-start,
+        last ? last-start : 0, plast ? plast-start : 0);
+#endif
+
+    if (plast && plast < last)
+        last= plast;
+
+    while (node && (!last || node < last)) {
+        const U8 op = OP(node);
+
+        if (op == CLOSE || op == SRCLOSE || op == WHILEM)
+            indent--;
+        next = regnext((regnode *)node);
+        const regnode *after = regnode_after((regnode *)node,0);
+
+        /* Where, what. */
+        if (op == OPTIMIZED) {
+            if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
+                optstart = node;
+            else
+                goto after_print;
+        } else
+            CLEAR_OPTSTART;
+
+        regprop(r, sv, node, NULL, NULL);
+        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
+                      (int)(2*indent + 1), "", SvPVX_const(sv));
+
+        if (op != OPTIMIZED) {
+            if (next == NULL)           /* Next ptr. */
+                Perl_re_printf( aTHX_  " (0)");
+            else if (REGNODE_TYPE(op) == BRANCH
+                     && REGNODE_TYPE(OP(next)) != BRANCH )
+                Perl_re_printf( aTHX_  " (FAIL)");
+            else
+                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
+            Perl_re_printf( aTHX_ "\n");
+        }
+
+      after_print:
+        if (REGNODE_TYPE(op) == BRANCHJ) {
+            assert(next);
+            const regnode *nnode = (OP(next) == LONGJMP
+                                   ? regnext((regnode *)next)
+                                   : next);
+            if (last && nnode > last)
+                nnode = last;
+            DUMPUNTIL(after, nnode);
+        }
+        else if (REGNODE_TYPE(op) == BRANCH) {
+            assert(next);
+            DUMPUNTIL(after, next);
+        }
+        else if ( REGNODE_TYPE(op)  == TRIE ) {
+            const regnode *this_trie = node;
+            const U32 n = ARG(node);
+            const reg_ac_data * const ac = op>=AHOCORASICK ?
+               (reg_ac_data *)ri->data->data[n] :
+               NULL;
+            const reg_trie_data * const trie =
+                (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
+#ifdef DEBUGGING
+            AV *const trie_words
+                           = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
+#endif
+            const regnode *nextbranch= NULL;
+            I32 word_idx;
+            SvPVCLEAR(sv);
+            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
+                SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);
+
+                Perl_re_indentf( aTHX_  "%s ",
+                    indent+3,
+                    elem_ptr
+                    ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
+                                SvCUR(*elem_ptr), PL_dump_re_max_len,
+                                PL_colors[0], PL_colors[1],
+                                (SvUTF8(*elem_ptr)
+                                 ? PERL_PV_ESCAPE_UNI
+                                 : 0)
+                                | PERL_PV_PRETTY_ELLIPSES
+                                | PERL_PV_PRETTY_LTGT
+                            )
+                    : "???"
+                );
+                if (trie->jump) {
+                    U16 dist= trie->jump[word_idx+1];
+                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
+                               (UV)((dist ? this_trie + dist : next) - start));
+                    if (dist) {
+                        if (!nextbranch)
+                            nextbranch= this_trie + trie->jump[0];
+                        DUMPUNTIL(this_trie + dist, nextbranch);
+                    }
+                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
+                        nextbranch= regnext((regnode *)nextbranch);
+                } else {
+                    Perl_re_printf( aTHX_  "\n");
+                }
+            }
+            if (last && next > last)
+                node= last;
+            else
+                node= next;
+        }
+        else if ( op == CURLY ) {   /* "next" might be very big: optimizer */
+            DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
+        }
+        else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
+            assert(next);
+            DUMPUNTIL(after, next);
+        }
+        else if ( op == PLUS || op == STAR) {
+            DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
+        }
+        else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
+            /* Literal string, where present. */
+            node = (const regnode *)REGNODE_AFTER_varies(node);
+        }
+        else {
+            node = REGNODE_AFTER_opcode(node,op);
+        }
+        if (op == CURLYX || op == OPEN || op == SROPEN)
+            indent++;
+        if (REGNODE_TYPE(op) == END)
+            break;
+    }
+    CLEAR_OPTSTART;
+#ifdef DEBUG_DUMPUNTIL
+    Perl_re_printf( aTHX_  "--- %d\n", (int)indent);
+#endif
+    return node;
+}
+
+#endif  /* DEBUGGING */
diff --git a/regcomp_internal.h b/regcomp_internal.h
new file mode 100644
index 0000000000..f8682f823d
--- /dev/null
+++ b/regcomp_internal.h
@@ -0,0 +1,1196 @@
+#ifndef REGCOMP_INTERNAL_H
+#define REGCOMP_INTERNAL_H
+#ifndef STATIC
+#define STATIC  static
+#endif
+
+/* this is a chain of data about sub patterns we are processing that
+   need to be handled separately/specially in study_chunk. Its so
+   we can simulate recursion without losing state.  */
+struct scan_frame;
+typedef struct scan_frame {
+    regnode *last_regnode;      /* last node to process in this frame */
+    regnode *next_regnode;      /* next node to process when last is reached */
+    U32 prev_recursed_depth;
+    I32 stopparen;              /* what stopparen do we use */
+    bool in_gosub;              /* this or an outer frame is for GOSUB */
+
+    struct scan_frame *this_prev_frame; /* this previous frame */
+    struct scan_frame *prev_frame;      /* previous frame */
+    struct scan_frame *next_frame;      /* next frame */
+} scan_frame;
+
+/* Certain characters are output as a sequence with the first being a
+ * backslash. */
+#define isBACKSLASHED_PUNCT(c)  memCHRs("-[]\\^", c)
+
+
+struct RExC_state_t {
+    U32         flags;                  /* RXf_* are we folding, multilining? */
+    U32         pm_flags;               /* PMf_* stuff from the calling PMOP */
+    char        *precomp;               /* uncompiled string. */
+    char        *precomp_end;           /* pointer to end of uncompiled string. */
+    REGEXP      *rx_sv;                 /* The SV that is the regexp. */
+    regexp      *rx;                    /* perl core regexp structure */
+    regexp_internal     *rxi;           /* internal data for regexp object
+                                           pprivate field */
+    char        *start;                 /* Start of input for compile */
+    char        *end;                   /* End of input for compile */
+    char        *parse;                 /* Input-scan pointer. */
+    char        *copy_start;            /* start of copy of input within
+                                           constructed parse string */
+    char        *save_copy_start;       /* Provides one level of saving
+                                           and restoring 'copy_start' */
+    char        *copy_start_in_input;   /* Position in input string
+                                           corresponding to copy_start */
+    SSize_t     whilem_seen;            /* number of WHILEM in this expr */
+    regnode     *emit_start;            /* Start of emitted-code area */
+    regnode_offset emit;                /* Code-emit pointer */
+    I32         naughty;                /* How bad is this pattern? */
+    I32         sawback;                /* Did we see \1, ...? */
+    SSize_t     size;                   /* Number of regnode equivalents in
+                                           pattern */
+    Size_t      sets_depth;              /* Counts recursion depth of already-
+                                           compiled regex set patterns */
+    U32         seen;
+
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
+    regnode_offset *open_parens;        /* offsets to open parens */
+    regnode_offset *close_parens;       /* offsets to close parens */
+    HV          *paren_names;           /* Paren names */
+
+    /* position beyond 'precomp' of the warning message furthest away from
+     * 'precomp'.  During the parse, no warnings are raised for any problems
+     * earlier in the parse than this position.  This works if warnings are
+     * raised the first time a given spot is parsed, and if only one
+     * independent warning is raised for any given spot */
+    Size_t      latest_warn_offset;
+
+    I32         npar;                   /* Capture buffer count so far in the
+                                           parse, (OPEN) plus one. ("par" 0 is
+                                           the whole pattern)*/
+    I32         total_par;              /* During initial parse, is either 0,
+                                           or -1; the latter indicating a
+                                           reparse is needed.  After that pass,
+                                           it is what 'npar' became after the
+                                           pass.  Hence, it being > 0 indicates
+                                           we are in a reparse situation */
+    I32         nestroot;               /* root parens we are in - used by
+                                           accept */
+    I32         seen_zerolen;
+    regnode     *end_op;                /* END node in program */
+    I32         utf8;           /* whether the pattern is utf8 or not */
+    I32         orig_utf8;      /* whether the pattern was originally in utf8 */
+                                /* XXX use this for future optimisation of case
+                                 * where pattern must be upgraded to utf8. */
+    I32         uni_semantics;  /* If a d charset modifier should use unicode
+                                   rules, even if the pattern is not in
+                                   utf8 */
+
+    I32         recurse_count;          /* Number of recurse regops we have generated */
+    regnode     **recurse;              /* Recurse regops */
+    U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
+                                           through */
+    U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
+    I32         in_lookaround;
+    I32         contains_locale;
+    I32         override_recoding;
+    I32         recode_x_to_native;
+    I32         in_multi_char_class;
+    int         code_index;             /* next code_blocks[] slot */
+    struct reg_code_blocks *code_blocks;/* positions of literal (?{})
+                                            within pattern */
+    SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
+    scan_frame *frame_head;
+    scan_frame *frame_last;
+    U32         frame_count;
+    AV         *warn_text;
+    HV         *unlexed_names;
+    SV          *runtime_code_qr;       /* qr with the runtime code blocks */
+#ifdef DEBUGGING
+    const char  *lastparse;
+    I32         lastnum;
+    U32         study_chunk_recursed_count;
+    AV          *paren_name_list;       /* idx -> name */
+    SV          *mysv1;
+    SV          *mysv2;
+
+#define RExC_lastparse  (pRExC_state->lastparse)
+#define RExC_lastnum    (pRExC_state->lastnum)
+#define RExC_paren_name_list    (pRExC_state->paren_name_list)
+#define RExC_study_chunk_recursed_count    (pRExC_state->study_chunk_recursed_count)
+#define RExC_mysv       (pRExC_state->mysv1)
+#define RExC_mysv1      (pRExC_state->mysv1)
+#define RExC_mysv2      (pRExC_state->mysv2)
+
+#endif
+    bool        seen_d_op;
+    bool        strict;
+    bool        study_started;
+    bool        in_script_run;
+    bool        use_BRANCHJ;
+    bool        sWARN_EXPERIMENTAL__VLB;
+    bool        sWARN_EXPERIMENTAL__REGEX_SETS;
+};
+
+#define RExC_flags      (pRExC_state->flags)
+#define RExC_pm_flags   (pRExC_state->pm_flags)
+#define RExC_precomp    (pRExC_state->precomp)
+#define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
+#define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
+#define RExC_precomp_end (pRExC_state->precomp_end)
+#define RExC_rx_sv      (pRExC_state->rx_sv)
+#define RExC_rx         (pRExC_state->rx)
+#define RExC_rxi        (pRExC_state->rxi)
+#define RExC_start      (pRExC_state->start)
+#define RExC_end        (pRExC_state->end)
+#define RExC_parse      (pRExC_state->parse)
+#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
+#define RExC_whilem_seen        (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+                                                   under /d from /u ? */
+
+#define RExC_emit       (pRExC_state->emit)
+#define RExC_emit_start (pRExC_state->emit_start)
+#define RExC_sawback    (pRExC_state->sawback)
+#define RExC_seen       (pRExC_state->seen)
+#define RExC_size       (pRExC_state->size)
+#define RExC_maxlen        (pRExC_state->maxlen)
+#define RExC_npar       (pRExC_state->npar)
+#define RExC_total_parens       (pRExC_state->total_par)
+#define RExC_parens_buf_size    (pRExC_state->parens_buf_size)
+#define RExC_nestroot   (pRExC_state->nestroot)
+#define RExC_seen_zerolen       (pRExC_state->seen_zerolen)
+#define RExC_utf8       (pRExC_state->utf8)
+#define RExC_uni_semantics      (pRExC_state->uni_semantics)
+#define RExC_orig_utf8  (pRExC_state->orig_utf8)
+#define RExC_open_parens        (pRExC_state->open_parens)
+#define RExC_close_parens       (pRExC_state->close_parens)
+#define RExC_end_op     (pRExC_state->end_op)
+#define RExC_paren_names        (pRExC_state->paren_names)
+#define RExC_recurse    (pRExC_state->recurse)
+#define RExC_recurse_count      (pRExC_state->recurse_count)
+#define RExC_sets_depth         (pRExC_state->sets_depth)
+#define RExC_study_chunk_recursed        (pRExC_state->study_chunk_recursed)
+#define RExC_study_chunk_recursed_bytes  \
+                                   (pRExC_state->study_chunk_recursed_bytes)
+#define RExC_in_lookaround      (pRExC_state->in_lookaround)
+#define RExC_contains_locale    (pRExC_state->contains_locale)
+#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+
+#ifdef EBCDIC
+#  define SET_recode_x_to_native(x)                                         \
+                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
+#else
+#  define SET_recode_x_to_native(x) NOOP
+#endif
+
+#define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
+#define RExC_frame_head (pRExC_state->frame_head)
+#define RExC_frame_last (pRExC_state->frame_last)
+#define RExC_frame_count (pRExC_state->frame_count)
+#define RExC_strict (pRExC_state->strict)
+#define RExC_study_started      (pRExC_state->study_started)
+#define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
+#define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
+#define RExC_warned_WARN_EXPERIMENTAL__VLB (pRExC_state->sWARN_EXPERIMENTAL__VLB)
+#define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
+#define RExC_unlexed_names (pRExC_state->unlexed_names)
+
+
+/***********************************************************************/
+/* UTILITY MACROS FOR ADVANCING OR SETTING THE PARSE "CURSOR" RExC_parse
+ *
+ * All of these macros depend on the above RExC_ accessor macros, which
+ * in turns depend on a variable pRExC_state being in scope where they
+ * are used. This is the standard regexp parser context variable which is
+ * passed into every non-trivial parse function in this file.
+ *
+ * Note that the UTF macro is itself a wrapper around RExC_utf8, so all
+ * of the macros which do not take an argument will operate on the
+ * pRExC_state structure *only*.
+ *
+ * Please do NOT modify RExC_parse without using these macros. In the
+ * future these macros will be extended for enhanced debugging and trace
+ * output during the parse process.
+ */
+
+/* RExC_parse_incf(flag)
+ *
+ * Increment RExC_parse to point at the next codepoint, while doing
+ * the right thing depending on whether we are parsing UTF-8 strings
+ * or not. The 'flag' argument determines if content is UTF-8 or not,
+ * intended for cases where this is NOT governed by the UTF macro.
+ *
+ * Use RExC_parse_inc() if UTF-8ness is controlled by the UTF macro.
+ *
+ * WARNING: Does NOT take into account RExC_end; it is the callers
+ * responsibility to make sure there are enough octets left in
+ * RExC_parse to ensure that when processing UTF-8 we would not read
+ * past the end of the string.
+ */
+#define RExC_parse_incf(flag) STMT_START {              \
+    RExC_parse += (flag) ? UTF8SKIP(RExC_parse) : 1;    \
+} STMT_END
+
+/* RExC_parse_inc_safef(flag)
+ *
+ * Safely increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not and NOT reading past the end of the buffer.
+ * The 'flag' argument determines if content is UTF-8 or not,
+ * intended for cases where this is NOT governed by the UTF macro.
+ *
+ * Use RExC_parse_safe() if UTF-8ness is controlled by the UTF macro.
+ *
+ * NOTE: Will NOT read past RExC_end when content is UTF-8.
+ */
+#define RExC_parse_inc_safef(flag) STMT_START {                     \
+    RExC_parse += (flag) ? UTF8_SAFE_SKIP(RExC_parse,RExC_end) : 1; \
+} STMT_END
+
+/* RExC_parse_inc()
+ *
+ * Increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not.
+ *
+ * WARNING: Does NOT take into account RExC_end, it is the callers
+ * responsibility to make sure there are enough octets left in
+ * RExC_parse to ensure that when processing UTF-8 we would not read
+ * past the end of the string.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc() RExC_parse_incf(UTF)
+
+/* RExC_parse_inc_safe()
+ *
+ * Safely increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not and NOT reading past the end of the buffer.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc_safe() RExC_parse_inc_safef(UTF)
+
+/* RExC_parse_inc_utf8()
+ *
+ * Increment RExC_parse to point at the next utf8 codepoint,
+ * assumes content is UTF-8.
+ *
+ * WARNING: Does NOT take into account RExC_end; it is the callers
+ * responsibility to make sure there are enough octets left in RExC_parse
+ * to ensure that when processing UTF-8 we would not read past the end
+ * of the string.
+ */
+#define RExC_parse_inc_utf8() STMT_START {  \
+    RExC_parse += UTF8SKIP(RExC_parse);     \
+} STMT_END
+
+/* RExC_parse_inc_if_char()
+ *
+ * Increment RExC_parse to point at the next codepoint, if and only
+ * if the current parse point is NOT a NULL, while doing the right thing
+ * depending on whether we are parsing UTF-8 strings or not.
+ *
+ * WARNING: Does NOT take into account RExC_end, it is the callers
+ * responsibility to make sure there are enough octets left in RExC_parse
+ * to ensure that when processing UTF-8 we would not read past the end
+ * of the string.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc_if_char() STMT_START {         \
+    RExC_parse += SKIP_IF_CHAR(RExC_parse,RExC_end);  \
+} STMT_END
+
+/* RExC_parse_inc_by(n_octets)
+ *
+ * Increment the parse cursor by the number of octets specified by
+ * the 'n_octets' argument.
+ *
+ * NOTE: Does NOT check ANY constraints. It is the callers responsibility
+ * that this will not move past the end of the string, or leave the
+ * pointer in the middle of a UTF-8 sequence.
+ *
+ * Typically used to advanced past previously analyzed content.
+ */
+#define RExC_parse_inc_by(n_octets) STMT_START {  \
+    RExC_parse += (n_octets);                     \
+} STMT_END
+
+/* RExC_parse_set(to_ptr)
+ *
+ * Sets the RExC_parse pointer to the pointer specified by the 'to'
+ * argument. No validation whatsoever is performed on the to pointer.
+ */
+#define RExC_parse_set(to_ptr) STMT_START { \
+    RExC_parse = (to_ptr);                  \
+} STMT_END
+
+/**********************************************************************/
+
+/* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
+ * a flag to disable back-off on the fixed/floating substrings - if it's
+ * a high complexity pattern we assume the benefit of avoiding a full match
+ * is worth the cost of checking for the substrings even if they rarely help.
+ */
+#define RExC_naughty    (pRExC_state->naughty)
+#define TOO_NAUGHTY (10)
+#define MARK_NAUGHTY(add) \
+    if (RExC_naughty < TOO_NAUGHTY) \
+        RExC_naughty += (add)
+#define MARK_NAUGHTY_EXP(exp, add) \
+    if (RExC_naughty < TOO_NAUGHTY) \
+        RExC_naughty += RExC_naughty / (exp) + (add)
+
+#define isNON_BRACE_QUANTIFIER(c)   ((c) == '*' || (c) == '+' || (c) == '?')
+#define isQUANTIFIER(s,e)  (   isNON_BRACE_QUANTIFIER(*s)                      \
+                            || ((*s) == '{' && regcurly(s, e, NULL)))
+
+/*
+ * Flags to be passed up.
+ */
+#define HASWIDTH        0x01    /* Known to not match null strings, could match
+                                   non-null ones. */
+#define SIMPLE          0x02    /* Exactly one character wide */
+                                /* (or LNBREAK as a special case) */
+#define POSTPONED       0x08    /* (?1),(?&name), (??{...}) or similar */
+#define TRYAGAIN        0x10    /* Weeded out a declaration. */
+#define RESTART_PARSE   0x20    /* Need to redo the parse */
+#define NEED_UTF8       0x40    /* In conjunction with RESTART_PARSE, need to
+                                   calcuate sizes as UTF-8 */
+
+#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
+
+/* whether trie related optimizations are enabled */
+#if PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION
+#define TRIE_STUDY_OPT
+#define FULL_TRIE_STUDY
+#define TRIE_STCLASS
+#endif
+
+/* About the term "restudy" and the var "restudied" and the defines
+ * "SCF_TRIE_RESTUDY" and "SCF_TRIE_DOING_RESTUDY": All of these relate to
+ * doing multiple study_chunk() calls over the same set of opcodes for* the
+ * purpose of enhanced TRIE optimizations.
+ *
+ * Specifically, when TRIE_STUDY_OPT is defined, and it is defined in normal
+ * builds, (see above), during compilation SCF_TRIE_RESTUDY may be enabled
+ * which then causes the Perl_re_op_compile() to then call the optimizer
+ * S_study_chunk() a second time to perform additional optimizations,
+ * including the aho_corasick startclass optimization.
+ * This additional pass will only happen once, which is managed by the
+ * 'restudied' variable in Perl_re_op_compile().
+ *
+ * When this second pass is under way the flags passed into study_chunk() will
+ * include SCF_TRIE_DOING_RESTUDY and this flag is and must be cascaded down
+ * to any recursive calls to S_study_chunk().
+ *
+ * IMPORTANT: Any logic in study_chunk() that emits warnings should check that
+ * the SCF_TRIE_DOING_RESTUDY flag is NOT set in 'flags', or the warning may
+ * be produced twice.
+ *
+ * See commit 07be1b83a6b2d24b492356181ddf70e1c7917ae3 and
+ * 688e03912e3bff2d2419c457d8b0e1bab3eb7112 for more details.
+ */
+
+
+#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
+#define PBITVAL(paren) (1 << ((paren) & 7))
+#define PAREN_OFFSET(depth) \
+    (RExC_study_chunk_recursed + (depth) * RExC_study_chunk_recursed_bytes)
+#define PAREN_TEST(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) & PBITVAL(paren))
+#define PAREN_SET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) |= PBITVAL(paren))
+#define PAREN_UNSET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) &= ~PBITVAL(paren))
+
+#define REQUIRE_UTF8(flagp) STMT_START {                                   \
+                                     if (!UTF) {                           \
+                                         *flagp = RESTART_PARSE|NEED_UTF8; \
+                                         return 0;                         \
+                                     }                                     \
+                             } STMT_END
+
+/* /u is to be chosen if we are supposed to use Unicode rules, or if the
+ * pattern is in UTF-8.  This latter condition is in case the outermost rules
+ * are locale.  See GH #17278 */
+#define toUSE_UNI_CHARSET_NOT_DEPENDS (RExC_uni_semantics || UTF)
+
+/* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
+ * a flag that indicates we need to override /d with /u as a result of
+ * something in the pattern.  It should only be used in regards to calling
+ * set_regex_charset() or get_regex_charset() */
+#define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
+    STMT_START {                                                            \
+            if (DEPENDS_SEMANTICS) {                                        \
+                set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
+                RExC_uni_semantics = 1;                                     \
+                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
+                    /* No need to restart the parse if we haven't seen      \
+                     * anything that differs between /u and /d, and no need \
+                     * to restart immediately if we're going to reparse     \
+                     * anyway to count parens */                            \
+                    *flagp |= RESTART_PARSE;                                \
+                    return restart_retval;                                  \
+                }                                                           \
+            }                                                               \
+    } STMT_END
+
+#define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
+    STMT_START {                                                            \
+                RExC_use_BRANCHJ = 1;                                       \
+                *flagp |= RESTART_PARSE;                                    \
+                return restart_retval;                                      \
+    } STMT_END
+
+/* Until we have completed the parse, we leave RExC_total_parens at 0 or
+ * less.  After that, it must always be positive, because the whole re is
+ * considered to be surrounded by virtual parens.  Setting it to negative
+ * indicates there is some construct that needs to know the actual number of
+ * parens to be properly handled.  And that means an extra pass will be
+ * required after we've counted them all */
+#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
+#define REQUIRE_PARENS_PASS                                                 \
+    STMT_START {  /* No-op if have completed a pass */                      \
+                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
+    } STMT_END
+#define IN_PARENS_PASS (RExC_total_parens < 0)
+
+
+/* This is used to return failure (zero) early from the calling function if
+ * various flags in 'flags' are set.  Two flags always cause a return:
+ * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
+ * additional flags that should cause a return; 0 if none.  If the return will
+ * be done, '*flagp' is first set to be all of the flags that caused the
+ * return. */
+#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
+    STMT_START {                                                            \
+            if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
+                *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
+                return 0;                                                   \
+            }                                                               \
+    } STMT_END
+
+#define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
+
+#define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
+                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
+#define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
+                                    if (MUST_RESTART(*(flagp))) return 0
+
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class)  ((int) ((class) / 2))
+#define classnum_to_namedclass(classnum)  ((classnum) * 2)
+
+#define _invlist_union_complement_2nd(a, b, output) \
+                        _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
+#define _invlist_intersection_complement_2nd(a, b, output) \
+                 _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
+
+/* We add a marker if we are deferring expansion of a property that is both
+ * 1) potentiallly user-defined; and
+ * 2) could also be an official Unicode property.
+ *
+ * Without this marker, any deferred expansion can only be for a user-defined
+ * one.  This marker shouldn't conflict with any that could be in a legal name,
+ * and is appended to its name to indicate this.  There is a string and
+ * character form */
+#define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
+#define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
+
+/* What is infinity for optimization purposes */
+#define OPTIMIZE_INFTY  SSize_t_MAX
+
+/* About scan_data_t.
+
+  During optimisation we recurse through the regexp program performing
+  various inplace (keyhole style) optimisations. In addition study_chunk
+  and scan_commit populate this data structure with information about
+  what strings MUST appear in the pattern. We look for the longest
+  string that must appear at a fixed location, and we look for the
+  longest string that may appear at a floating location. So for instance
+  in the pattern:
+
+    /FOO[xX]A.*B[xX]BAR/
+
+  Both 'FOO' and 'A' are fixed strings. Both 'B' and 'BAR' are floating
+  strings (because they follow a .* construct). study_chunk will identify
+  both FOO and BAR as being the longest fixed and floating strings respectively.
+
+  The strings can be composites, for instance
+
+     /(f)(o)(o)/
+
+  will result in a composite fixed substring 'foo'.
+
+  For each string some basic information is maintained:
+
+  - min_offset
+    This is the position the string must appear at, or not before.
+    It also implicitly (when combined with minlenp) tells us how many
+    characters must match before the string we are searching for.
+    Likewise when combined with minlenp and the length of the string it
+    tells us how many characters must appear after the string we have
+    found.
+
+  - max_offset
+    Only used for floating strings. This is the rightmost point that
+    the string can appear at. If set to OPTIMIZE_INFTY it indicates that the
+    string can occur infinitely far to the right.
+    For fixed strings, it is equal to min_offset.
+
+  - minlenp
+    A pointer to the minimum number of characters of the pattern that the
+    string was found inside. This is important as in the case of positive
+    lookahead or positive lookbehind we can have multiple patterns
+    involved. Consider
+
+    /(?=FOO).*F/
+
+    The minimum length of the pattern overall is 3, the minimum length
+    of the lookahead part is 3, but the minimum length of the part that
+    will actually match is 1. So 'FOO's minimum length is 3, but the
+    minimum length for the F is 1. This is important as the minimum length
+    is used to determine offsets in front of and behind the string being
+    looked for.  Since strings can be composites this is the length of the
+    pattern at the time it was committed with a scan_commit. Note that
+    the length is calculated by study_chunk, so that the minimum lengths
+    are not known until the full pattern has been compiled, thus the
+    pointer to the value.
+
+  - lookbehind
+
+    In the case of lookbehind the string being searched for can be
+    offset past the start point of the final matching string.
+    If this value was just blithely removed from the min_offset it would
+    invalidate some of the calculations for how many chars must match
+    before or after (as they are derived from min_offset and minlen and
+    the length of the string being searched for).
+    When the final pattern is compiled and the data is moved from the
+    scan_data_t structure into the regexp structure the information
+    about lookbehind is factored in, with the information that would
+    have been lost precalculated in the end_shift field for the
+    associated string.
+
+  The fields pos_min and pos_delta are used to store the minimum offset
+  and the delta to the maximum offset at the current point in the pattern.
+
+*/
+
+struct scan_data_substrs {
+    SV      *str;       /* longest substring found in pattern */
+    SSize_t min_offset; /* earliest point in string it can appear */
+    SSize_t max_offset; /* latest point in string it can appear */
+    SSize_t *minlenp;   /* pointer to the minlen relevant to the string */
+    SSize_t lookbehind; /* is the pos of the string modified by LB */
+    I32 flags;          /* per substring SF_* and SCF_* flags */
+};
+
+typedef struct scan_data_t {
+    /*I32 len_min;      unused */
+    /*I32 len_delta;    unused */
+    SSize_t pos_min;
+    SSize_t pos_delta;
+    SV *last_found;
+    SSize_t last_end;       /* min value, <0 unless valid. */
+    SSize_t last_start_min;
+    SSize_t last_start_max;
+    U8      cur_is_floating; /* whether the last_* values should be set as
+                              * the next fixed (0) or floating (1)
+                              * substring */
+
+    /* [0] is longest fixed substring so far, [1] is longest float so far */
+    struct scan_data_substrs  substrs[2];
+
+    I32 flags;             /* common SF_* and SCF_* flags */
+    I32 whilem_c;
+    SSize_t *last_closep;
+    regnode **last_close_opp; /* pointer to pointer to last CLOSE regop
+                                 seen. DO NOT DEREFERENCE the regnode
+                                 pointer - the op may have been optimized
+                                 away */
+    regnode_ssc *start_class;
+} scan_data_t;
+
+/*
+ * Forward declarations for pregcomp()'s friends.
+ */
+
+static const scan_data_t zero_scan_data = {
+    0, 0, NULL, 0, 0, 0, 0,
+    {
+        { NULL, 0, 0, 0, 0, 0 },
+        { NULL, 0, 0, 0, 0, 0 },
+    },
+    0, 0, NULL, NULL, NULL
+};
+
+/* study flags */
+
+#define SF_BEFORE_SEOL          0x0001
+#define SF_BEFORE_MEOL          0x0002
+#define SF_BEFORE_EOL           (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
+
+#define SF_IS_INF               0x0040
+#define SF_HAS_PAR              0x0080
+#define SF_IN_PAR               0x0100
+#define SF_HAS_EVAL             0x0200
+
+
+/* SCF_DO_SUBSTR is the flag that tells the regexp analyzer to track the
+ * longest substring in the pattern. When it is not set the optimiser keeps
+ * track of position, but does not keep track of the actual strings seen,
+ *
+ * So for instance /foo/ will be parsed with SCF_DO_SUBSTR being true, but
+ * /foo/i will not.
+ *
+ * Similarly, /foo.*(blah|erm|huh).*fnorble/ will have "foo" and "fnorble"
+ * parsed with SCF_DO_SUBSTR on, but while processing the (...) it will be
+ * turned off because of the alternation (BRANCH). */
+#define SCF_DO_SUBSTR           0x0400
+
+#define SCF_DO_STCLASS_AND      0x0800
+#define SCF_DO_STCLASS_OR       0x1000
+#define SCF_DO_STCLASS          (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
+#define SCF_WHILEM_VISITED_POS  0x2000
+
+#define SCF_TRIE_RESTUDY        0x4000 /* Need to do restudy in study_chunk()?
+                                          Search for "restudy" in this file
+                                          to find a detailed explanation.*/
+#define SCF_SEEN_ACCEPT         0x8000
+#define SCF_TRIE_DOING_RESTUDY 0x10000 /* Are we in restudy right now?
+                                          Search for "restudy" in this file
+                                          to find a detailed explanation. */
+#define SCF_IN_DEFINE          0x20000
+
+
+
+#define UTF cBOOL(RExC_utf8)
+
+/* The enums for all these are ordered so things work out correctly */
+#define LOC (get_regex_charset(RExC_flags) == REGEX_LOCALE_CHARSET)
+#define DEPENDS_SEMANTICS (get_regex_charset(RExC_flags)                    \
+                                                     == REGEX_DEPENDS_CHARSET)
+#define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
+#define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags)                \
+                                                     >= REGEX_UNICODE_CHARSET)
+#define ASCII_RESTRICTED (get_regex_charset(RExC_flags)                      \
+                                            == REGEX_ASCII_RESTRICTED_CHARSET)
+#define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags)             \
+                                            >= REGEX_ASCII_RESTRICTED_CHARSET)
+#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags)                 \
+                                        == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
+
+#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
+
+/* For programs that want to be strictly Unicode compatible by dying if any
+ * attempt is made to match a non-Unicode code point against a Unicode
+ * property.  */
+#define ALWAYS_WARN_SUPER  ckDEAD(packWARN(WARN_NON_UNICODE))
+
+#define OOB_NAMEDCLASS          -1
+
+/* There is no code point that is out-of-bounds, so this is problematic.  But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE             0xDEADBEEF
+
+#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
+
+
+/* length of regex to show in messages that don't mark a position within */
+#define RegexLengthToShowInErrorMessages 127
+
+/*
+ * If MARKER[12] are adjusted, be sure to adjust the constants at the top
+ * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
+ * op/pragma/warn/regcomp.
+ */
+#define MARKER1 "<-- HERE"    /* marker as it appears in the description */
+#define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
+
+#define REPORT_LOCATION " in regex; marked by " MARKER1    \
+                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
+
+/* The code in this file in places uses one level of recursion with parsing
+ * rebased to an alternate string constructed by us in memory.  This can take
+ * the form of something that is completely different from the input, or
+ * something that uses the input as part of the alternate.  In the first case,
+ * there should be no possibility of an error, as we are in complete control of
+ * the alternate string.  But in the second case we don't completely control
+ * the input portion, so there may be errors in that.  Here's an example:
+ *      /[abc\x{DF}def]/ui
+ * is handled specially because \x{df} folds to a sequence of more than one
+ * character: 'ss'.  What is done is to create and parse an alternate string,
+ * which looks like this:
+ *      /(?:\x{DF}|[abc\x{DF}def])/ui
+ * where it uses the input unchanged in the middle of something it constructs,
+ * which is a branch for the DF outside the character class, and clustering
+ * parens around the whole thing. (It knows enough to skip the DF inside the
+ * class while in this substitute parse.) 'abc' and 'def' may have errors that
+ * need to be reported.  The general situation looks like this:
+ *
+ *                                       |<------- identical ------>|
+ *              sI                       tI               xI       eI
+ * Input:       ---------------------------------------------------------------
+ * Constructed:         ---------------------------------------------------
+ *                      sC               tC               xC       eC     EC
+ *                                       |<------- identical ------>|
+ *
+ * sI..eI   is the portion of the input pattern we are concerned with here.
+ * sC..EC   is the constructed substitute parse string.
+ *  sC..tC  is constructed by us
+ *  tC..eC  is an exact duplicate of the portion of the input pattern tI..eI.
+ *          In the diagram, these are vertically aligned.
+ *  eC..EC  is also constructed by us.
+ * xC       is the position in the substitute parse string where we found a
+ *          problem.
+ * xI       is the position in the original pattern corresponding to xC.
+ *
+ * We want to display a message showing the real input string.  Thus we need to
+ * translate from xC to xI.  We know that xC >= tC, since the portion of the
+ * string sC..tC has been constructed by us, and so shouldn't have errors.  We
+ * get:
+ *      xI = tI + (xC - tC)
+ *
+ * When the substitute parse is constructed, the code needs to set:
+ *      RExC_start (sC)
+ *      RExC_end (eC)
+ *      RExC_copy_start_in_input  (tI)
+ *      RExC_copy_start_in_constructed (tC)
+ * and restore them when done.
+ *
+ * During normal processing of the input pattern, both
+ * 'RExC_copy_start_in_input' and 'RExC_copy_start_in_constructed' are set to
+ * sI, so that xC equals xI.
+ */
+
+#define sI              RExC_precomp
+#define eI              RExC_precomp_end
+#define sC              RExC_start
+#define eC              RExC_end
+#define tI              RExC_copy_start_in_input
+#define tC              RExC_copy_start_in_constructed
+#define xI(xC)          (tI + (xC - tC))
+#define xI_offset(xC)   (xI(xC) - sI)
+
+#define REPORT_LOCATION_ARGS(xC)                                            \
+    UTF8fARG(UTF,                                                           \
+             (xI(xC) > eI) /* Don't run off end */                          \
+              ? eI - sI   /* Length before the <--HERE */                   \
+              : ((xI_offset(xC) >= 0)                                       \
+                 ? xI_offset(xC)                                            \
+                 : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
+                                    IVdf " trying to output message for "   \
+                                    " pattern %.*s",                        \
+                                    __FILE__, __LINE__, (IV) xI_offset(xC), \
+                                    ((int) (eC - sC)), sC), 0)),            \
+             sI),         /* The input pattern printed up to the <--HERE */ \
+    UTF8fARG(UTF,                                                           \
+             (xI(xC) > eI) ? 0 : eI - xI(xC), /* Length after <--HERE */    \
+             (xI(xC) > eI) ? eI : xI(xC))     /* pattern after <--HERE */
+
+/* Used to point after bad bytes for an error message, but avoid skipping
+ * past a nul byte. */
+#define SKIP_IF_CHAR(s, e) (!*(s) ? 0 : UTF ? UTF8_SAFE_SKIP(s, e) : 1)
+
+/* Set up to clean up after our imminent demise */
+#define PREPARE_TO_DIE                                                      \
+    STMT_START {                                                            \
+        if (RExC_rx_sv)                                                     \
+            SAVEFREESV(RExC_rx_sv);                                         \
+        if (RExC_open_parens)                                               \
+            SAVEFREEPV(RExC_open_parens);                                   \
+        if (RExC_close_parens)                                              \
+            SAVEFREEPV(RExC_close_parens);                                  \
+    } STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
+ * arg. Show regex, up to a maximum length. If it's too long, chop and add
+ * "...".
+ */
+#define _FAIL(code) STMT_START {                                        \
+    const char *ellipses = "";                                          \
+    IV len = RExC_precomp_end - RExC_precomp;                           \
+                                                                        \
+    PREPARE_TO_DIE;                                                     \
+    if (len > RegexLengthToShowInErrorMessages) {                       \
+        /* chop 10 shorter than the max, to ensure meaning of "..." */  \
+        len = RegexLengthToShowInErrorMessages - 10;                    \
+        ellipses = "...";                                               \
+    }                                                                   \
+    code;                                                               \
+} STMT_END
+
+#define FAIL(msg) _FAIL(                            \
+    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",         \
+            msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+
+#define FAIL2(msg,arg) _FAIL(                       \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
+            arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+
+#define FAIL3(msg,arg1,arg2) _FAIL(                         \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",       \
+     arg1, arg2, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+
+/*
+ * Simple_vFAIL -- like FAIL, but marks the current location in the scan
+ */
+#define Simple_vFAIL(m) STMT_START {                                    \
+    Perl_croak(aTHX_ "%s" REPORT_LOCATION,                              \
+            m, REPORT_LOCATION_ARGS(RExC_parse));                       \
+} STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
+ */
+#define vFAIL(m) STMT_START {                           \
+    PREPARE_TO_DIE;                                     \
+    Simple_vFAIL(m);                                    \
+} STMT_END
+
+/*
+ * Like Simple_vFAIL(), but accepts two arguments.
+ */
+#define Simple_vFAIL2(m,a1) STMT_START {                        \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,                \
+                      REPORT_LOCATION_ARGS(RExC_parse));        \
+} STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
+ */
+#define vFAIL2(m,a1) STMT_START {                       \
+    PREPARE_TO_DIE;                                     \
+    Simple_vFAIL2(m, a1);                               \
+} STMT_END
+
+
+/*
+ * Like Simple_vFAIL(), but accepts three arguments.
+ */
+#define Simple_vFAIL3(m, a1, a2) STMT_START {                   \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,            \
+            REPORT_LOCATION_ARGS(RExC_parse));                  \
+} STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
+ */
+#define vFAIL3(m,a1,a2) STMT_START {                    \
+    PREPARE_TO_DIE;                                     \
+    Simple_vFAIL3(m, a1, a2);                           \
+} STMT_END
+
+/*
+ * Like Simple_vFAIL(), but accepts four arguments.
+ */
+#define Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3,        \
+            REPORT_LOCATION_ARGS(RExC_parse));                  \
+} STMT_END
+
+#define vFAIL4(m,a1,a2,a3) STMT_START {                 \
+    PREPARE_TO_DIE;                                     \
+    Simple_vFAIL4(m, a1, a2, a3);                       \
+} STMT_END
+
+/* A specialized version of vFAIL2 that works with UTF8f */
+#define vFAIL2utf8f(m, a1) STMT_START {             \
+    PREPARE_TO_DIE;                                 \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
+            REPORT_LOCATION_ARGS(RExC_parse));      \
+} STMT_END
+
+#define vFAIL3utf8f(m, a1, a2) STMT_START {             \
+    PREPARE_TO_DIE;                                     \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
+            REPORT_LOCATION_ARGS(RExC_parse));          \
+} STMT_END
+
+/* Setting this to NULL is a signal to not output warnings */
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
+    STMT_START {                                                            \
+      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
+      RExC_copy_start_in_constructed = NULL;                                \
+    } STMT_END
+#define RESTORE_WARNINGS                                                    \
+    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
+
+/* Since a warning can be generated multiple times as the input is reparsed, we
+ * output it the first time we come to that point in the parse, but suppress it
+ * otherwise.  'RExC_copy_start_in_constructed' being NULL is a flag to not
+ * generate any warnings */
+#define TO_OUTPUT_WARNINGS(loc)                                         \
+  (   RExC_copy_start_in_constructed                                    \
+   && ((xI(loc)) - RExC_precomp) > (Ptrdiff_t) RExC_latest_warn_offset)
+
+/* After we've emitted a warning, we save the position in the input so we don't
+ * output it again */
+#define UPDATE_WARNINGS_LOC(loc)                                        \
+    STMT_START {                                                        \
+        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
+            RExC_latest_warn_offset = MAX(sI, MIN(eI, xI(loc)))         \
+                                                       - RExC_precomp;  \
+        }                                                               \
+    } STMT_END
+
+/* 'warns' is the output of the packWARNx macro used in 'code' */
+#define _WARN_HELPER(loc, warns, code)                                  \
+    STMT_START {                                                        \
+        if (! RExC_copy_start_in_constructed) {                         \
+            Perl_croak( aTHX_ "panic! %s: %d: Tried to warn when none"  \
+                              " expected at '%s'",                      \
+                              __FILE__, __LINE__, loc);                 \
+        }                                                               \
+        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
+            if (ckDEAD(warns))                                          \
+                PREPARE_TO_DIE;                                         \
+            code;                                                       \
+            UPDATE_WARNINGS_LOC(loc);                                   \
+        }                                                               \
+    } STMT_END
+
+/* m is not necessarily a "literal string", in this macro */
+#define warn_non_literal_string(loc, packed_warn, m)                    \
+    _WARN_HELPER(loc, packed_warn,                                      \
+                      Perl_warner(aTHX_ packed_warn,                    \
+                                       "%s" REPORT_LOCATION,            \
+                                  m, REPORT_LOCATION_ARGS(loc)))
+#define reg_warn_non_literal_string(loc, m)                             \
+                warn_non_literal_string(loc, packWARN(WARN_REGEXP), m)
+
+#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
+    STMT_START {                                                            \
+                char * format;                                              \
+                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
+                Newx(format, format_size, char);                            \
+                my_strlcpy(format, m, format_size);                         \
+                my_strlcat(format, REPORT_LOCATION, format_size);           \
+                SAVEFREEPV(format);                                         \
+                _WARN_HELPER(loc, packwarn,                                 \
+                      Perl_ck_warner(aTHX_ packwarn,                        \
+                                        format,                             \
+                                        a1, REPORT_LOCATION_ARGS(loc)));    \
+    } STMT_END
+
+#define ckWARNreg(loc,m)                                                \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
+                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
+                                          m REPORT_LOCATION,            \
+                                          REPORT_LOCATION_ARGS(loc)))
+
+#define vWARN(loc, m)                                                   \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
+                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
+                                       m REPORT_LOCATION,               \
+                                       REPORT_LOCATION_ARGS(loc)))      \
+
+#define vWARN_dep(loc, m)                                               \
+    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
+                      Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),      \
+                                       m REPORT_LOCATION,               \
+                                       REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARNdep(loc,m)                                                \
+    _WARN_HELPER(loc, packWARN(WARN_DEPRECATED),                        \
+                      Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
+                                            m REPORT_LOCATION,          \
+                                            REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARNregdep(loc,m)                                                 \
+    _WARN_HELPER(loc, packWARN2(WARN_DEPRECATED, WARN_REGEXP),              \
+                      Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED,     \
+                                                      WARN_REGEXP),         \
+                                             m REPORT_LOCATION,             \
+                                             REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARN2reg_d(loc,m, a1)                                             \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
+                      Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),         \
+                                            m REPORT_LOCATION,              \
+                                            a1, REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARN2reg(loc, m, a1)                                              \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
+                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                          m REPORT_LOCATION,                \
+                                          a1, REPORT_LOCATION_ARGS(loc)))
+
+#define vWARN3(loc, m, a1, a2)                                              \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
+                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),              \
+                                       m REPORT_LOCATION,                   \
+                                       a1, a2, REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARN3reg(loc, m, a1, a2)                                          \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                                \
+                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                          m REPORT_LOCATION,                \
+                                          a1, a2,                           \
+                                          REPORT_LOCATION_ARGS(loc)))
+
+#define vWARN4(loc, m, a1, a2, a3)                                      \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
+                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
+                                       m REPORT_LOCATION,               \
+                                       a1, a2, a3,                      \
+                                       REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARN4reg(loc, m, a1, a2, a3)                                  \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
+                      Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
+                                          m REPORT_LOCATION,            \
+                                          a1, a2, a3,                   \
+                                          REPORT_LOCATION_ARGS(loc)))
+
+#define vWARN5(loc, m, a1, a2, a3, a4)                                  \
+    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
+                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
+                                       m REPORT_LOCATION,               \
+                                       a1, a2, a3, a4,                  \
+                                       REPORT_LOCATION_ARGS(loc)))
+
+#define ckWARNexperimental(loc, class, m)                               \
+    STMT_START {                                                        \
+        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
+            RExC_warned_ ## class = 1;                                  \
+            _WARN_HELPER(loc, packWARN(class),                          \
+                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
+                                            m REPORT_LOCATION,          \
+                                            REPORT_LOCATION_ARGS(loc)));\
+        }                                                               \
+    } STMT_END
+
+#define ckWARNexperimental_with_arg(loc, class, m, arg)                 \
+    STMT_START {                                                        \
+        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
+            RExC_warned_ ## class = 1;                                  \
+            _WARN_HELPER(loc, packWARN(class),                          \
+                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
+                                       m REPORT_LOCATION,               \
+                                       arg, REPORT_LOCATION_ARGS(loc)));\
+        }                                                               \
+    } STMT_END
+
+/* Convert between a pointer to a node and its offset from the beginning of the
+ * program */
+#define REGNODE_p(offset)    (RExC_emit_start + (offset))
+#define REGNODE_OFFSET(node) (__ASSERT_((node) >= RExC_emit_start)      \
+                              (SSize_t) ((node) - RExC_emit_start))
+
+#define ProgLen(ri) ri->proglen
+#define SetProgLen(ri,x) ri->proglen = x
+
+#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
+#define EXPERIMENTAL_INPLACESCAN
+#endif /*PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS*/
+
+#define DEBUG_RExC_seen()                                                   \
+        DEBUG_OPTIMISE_MORE_r({                                             \
+            Perl_re_printf( aTHX_ "RExC_seen: ");                           \
+                                                                            \
+            if (RExC_seen & REG_ZERO_LEN_SEEN)                              \
+                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                \
+                                                                            \
+            if (RExC_seen & REG_LOOKBEHIND_SEEN)                            \
+                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");              \
+                                                                            \
+            if (RExC_seen & REG_GPOS_SEEN)                                  \
+                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                    \
+                                                                            \
+            if (RExC_seen & REG_RECURSE_SEEN)                               \
+                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                 \
+                                                                            \
+            if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                    \
+                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");      \
+                                                                            \
+            if (RExC_seen & REG_VERBARG_SEEN)                               \
+                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                 \
+                                                                            \
+            if (RExC_seen & REG_CUTGROUP_SEEN)                              \
+                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                \
+                                                                            \
+            if (RExC_seen & REG_RUN_ON_COMMENT_SEEN)                        \
+                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");          \
+                                                                            \
+            if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
+                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");          \
+                                                                            \
+            if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                  \
+                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");    \
+                                                                            \
+            Perl_re_printf( aTHX_ "\n");                                    \
+        });
+
+#define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
+  if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
+
+
+#ifdef DEBUGGING
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
+                    debug_studydata(where, data, depth, is_inf, min, stopmin, delta)
+
+#  define DEBUG_PEEP(str, scan, depth, flags)   \
+                    debug_peep(str, pRExC_state, scan, depth, flags)
+#else
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
+#  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
+#endif
+
+#define REGTAIL(x,y,z) regtail((x),(y),(z),depth+1)
+#ifdef DEBUGGING
+#define REGTAIL_STUDY(x,y,z) regtail_study((x),(y),(z),depth+1)
+#else
+#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
+#endif
+
+#define MADE_TRIE       1
+#define MADE_JUMP_TRIE  2
+#define MADE_EXACT_TRIE 4
+
+#define INVLIST_INDEX                   0
+#define ONLY_LOCALE_MATCHES_INDEX       1
+#define DEFERRED_USER_DEFINED_INDEX     2
+
+/* These two functions currently do the exact same thing */
+#define ssc_init_zero           ssc_init
+
+#define ssc_add_cp(ssc, cp)   ssc_add_range((ssc), (cp), (cp))
+#define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
+
+#ifdef DEBUGGING
+#define REGNODE_GUTS(state,op,extra_size) \
+    regnode_guts_debug(state,op,extra_size)
+#else
+#define REGNODE_GUTS(state,op,extra_size) \
+    regnode_guts(state,extra_size)
+#endif
+
+#define CLEAR_OPTSTART                                                          \
+    if (optstart) STMT_START {                                                  \
+        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                  \
+                              " (%" IVdf " nodes)\n", (IV)(node - optstart)));  \
+        optstart=NULL;                                                          \
+    } STMT_END
+
+#define DUMPUNTIL(b,e)                                          \
+    CLEAR_OPTSTART;                                             \
+    node = dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
+
+#endif /* REGCOMP_INTERNAL_H */
diff --git a/regcomp_invlist.c b/regcomp_invlist.c
new file mode 100644
index 0000000000..9ea3f43181
--- /dev/null
+++ b/regcomp_invlist.c
@@ -0,0 +1,1540 @@
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
+#include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
+#define PERL_IN_REGCOMP_INVLIST_C
+#include "perl.h"
+
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
+
+#include "invlist_inline.h"
+#include "unicode_constants.h"
+#include "regcomp_internal.h"
+
+
+void
+Perl_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len)
+{
+    PERL_ARGS_ASSERT_POPULATE_BITMAP_FROM_INVLIST;
+
+    /* As the name says.  The zeroth bit corresponds to the code point given by
+     * 'offset' */
+
+    UV start, end;
+
+    Zero(bitmap, len, U8);
+
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+        assert(start >= offset);
+
+        for (UV i = start; i <= end; i++) {
+            UV adjusted = i - offset;
+
+            BITMAP_BYTE(bitmap, adjusted) |= BITMAP_BIT(adjusted);
+        }
+    }
+    invlist_iterfinish(invlist);
+}
+
+void
+Perl_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_len, SV ** invlist, const UV offset)
+{
+    PERL_ARGS_ASSERT_POPULATE_INVLIST_FROM_BITMAP;
+
+    /* As the name says.  The zeroth bit corresponds to the code point given by
+     * 'offset' */
+
+    Size_t i;
+
+    for (i = 0; i < bitmap_len; i++) {
+        if (BITMAP_TEST(bitmap, i)) {
+            int start = i++;
+
+            /* Save a little work by adding a range all at once instead of bit
+             * by bit */
+            while (i < bitmap_len && BITMAP_TEST(bitmap, i)) {
+                i++;
+            }
+
+            *invlist = _add_range_to_invlist(*invlist,
+                                             start + offset,
+                                             i + offset - 1);
+        }
+    }
+}
+
+/* This section of code defines the inversion list object and its methods.  The
+ * interfaces are highly subject to change, so as much as possible is static to
+ * this file.  An inversion list is here implemented as a malloc'd C UV array
+ * as an SVt_INVLIST scalar.
+ *
+ * An inversion list for Unicode is an array of code points, sorted by ordinal
+ * number.  Each element gives the code point that begins a range that extends
+ * up-to but not including the code point given by the next element.  The final
+ * element gives the first code point of a range that extends to the platform's
+ * infinity.  The even-numbered elements (invlist[0], invlist[2], invlist[4],
+ * ...) give ranges whose code points are all in the inversion list.  We say
+ * that those ranges are in the set.  The odd-numbered elements give ranges
+ * whose code points are not in the inversion list, and hence not in the set.
+ * Thus, element [0] is the first code point in the list.  Element [1]
+ * is the first code point beyond that not in the list; and element [2] is the
+ * first code point beyond that that is in the list.  In other words, the first
+ * range is invlist[0]..(invlist[1]-1), and all code points in that range are
+ * in the inversion list.  The second range is invlist[1]..(invlist[2]-1), and
+ * all code points in that range are not in the inversion list.  The third
+ * range invlist[2]..(invlist[3]-1) gives code points that are in the inversion
+ * list, and so forth.  Thus every element whose index is divisible by two
+ * gives the beginning of a range that is in the list, and every element whose
+ * index is not divisible by two gives the beginning of a range not in the
+ * list.  If the final element's index is divisible by two, the inversion list
+ * extends to the platform's infinity; otherwise the highest code point in the
+ * inversion list is the contents of that element minus 1.
+ *
+ * A range that contains just a single code point N will look like
+ *  invlist[i]   == N
+ *  invlist[i+1] == N+1
+ *
+ * If N is UV_MAX (the highest representable code point on the machine), N+1 is
+ * impossible to represent, so element [i+1] is omitted.  The single element
+ * inversion list
+ *  invlist[0] == UV_MAX
+ * contains just UV_MAX, but is interpreted as matching to infinity.
+ *
+ * Taking the complement (inverting) an inversion list is quite simple, if the
+ * first element is 0, remove it; otherwise add a 0 element at the beginning.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to always contain 0; there is an additional flag in the header which
+ * indicates if the list begins at the 0, or is offset to begin at the next
+ * element.  This means that the inversion list can be inverted without any
+ * copying; just flip the flag.
+ *
+ * More about inversion lists can be found in "Unicode Demystified"
+ * Chapter 13 by Richard Gillam, published by Addison-Wesley.
+ *
+ * The inversion list data structure is currently implemented as an SV pointing
+ * to an array of UVs that the SV thinks are bytes.  This allows us to have an
+ * array of UV whose memory management is automatically handled by the existing
+ * facilities for SV's.
+ *
+ * Some of the methods should always be private to the implementation, and some
+ * should eventually be made public */
+
+/* The header definitions are in F<invlist_inline.h> */
+
+#ifndef PERL_IN_XSUB_RE
+
+PERL_STATIC_INLINE UV*
+S__invlist_array_init(SV* const invlist, const bool will_have_0)
+{
+    /* Returns a pointer to the first element in the inversion list's array.
+     * This is called upon initialization of an inversion list.  Where the
+     * array begins depends on whether the list has the code point U+0000 in it
+     * or not.  The other parameter tells it whether the code that follows this
+     * call is about to put a 0 in the inversion list or not.  The first
+     * element is either the element reserved for 0, if TRUE, or the element
+     * after it, if FALSE */
+
+    bool* offset = get_invlist_offset_addr(invlist);
+    UV* zero_addr = (UV *) SvPVX(invlist);
+
+    PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
+
+    /* Must be empty */
+    assert(! _invlist_len(invlist));
+
+    *zero_addr = 0;
+
+    /* 1^1 = 0; 1^0 = 1 */
+    *offset = 1 ^ will_have_0;
+    return zero_addr + *offset;
+}
+
+STATIC void
+S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
+{
+    /* Replaces the inversion list in 'dest' with the one from 'src'.  It
+     * steals the list from 'src', so 'src' is made to have a NULL list.  This
+     * is similar to what SvSetMagicSV() would do, if it were implemented on
+     * inversion lists, though this routine avoids a copy */
+
+    const UV src_len          = _invlist_len(src);
+    const bool src_offset     = *get_invlist_offset_addr(src);
+    const STRLEN src_byte_len = SvLEN(src);
+    char * array              = SvPVX(src);
+
+#ifndef NO_TAINT_SUPPORT
+    const int oldtainted = TAINT_get;
+#endif
+
+    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
+
+    assert(is_invlist(src));
+    assert(is_invlist(dest));
+    assert(! invlist_is_iterating(src));
+    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
+
+    /* Make sure it ends in the right place with a NUL, as our inversion list
+     * manipulations aren't careful to keep this true, but sv_usepvn_flags()
+     * asserts it */
+    array[src_byte_len - 1] = '\0';
+
+    TAINT_NOT;      /* Otherwise it breaks */
+    sv_usepvn_flags(dest,
+                    (char *) array,
+                    src_byte_len - 1,
+
+                    /* This flag is documented to cause a copy to be avoided */
+                    SV_HAS_TRAILING_NUL);
+    TAINT_set(oldtainted);
+    SvPV_set(src, 0);
+    SvLEN_set(src, 0);
+    SvCUR_set(src, 0);
+
+    /* Finish up copying over the other fields in an inversion list */
+    *get_invlist_offset_addr(dest) = src_offset;
+    invlist_set_len(dest, src_len, src_offset);
+    *get_invlist_previous_index_addr(dest) = 0;
+    invlist_iterfinish(dest);
+}
+
+PERL_STATIC_INLINE IV*
+S_get_invlist_previous_index_addr(SV* invlist)
+{
+    /* Return the address of the IV that is reserved to hold the cached index
+     * */
+    PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
+
+    assert(is_invlist(invlist));
+
+    return &(((XINVLIST*) SvANY(invlist))->prev_index);
+}
+
+PERL_STATIC_INLINE IV
+S_invlist_previous_index(SV* const invlist)
+{
+    /* Returns cached index of previous search */
+
+    PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
+
+    return *get_invlist_previous_index_addr(invlist);
+}
+
+PERL_STATIC_INLINE void
+S_invlist_set_previous_index(SV* const invlist, const IV index)
+{
+    /* Caches <index> for later retrieval */
+
+    PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
+
+    assert(index == 0 || index < (int) _invlist_len(invlist));
+
+    *get_invlist_previous_index_addr(invlist) = index;
+}
+
+PERL_STATIC_INLINE void
+S_invlist_trim(SV* invlist)
+{
+    /* Free the not currently-being-used space in an inversion list */
+
+    /* But don't free up the space needed for the 0 UV that is always at the
+     * beginning of the list, nor the trailing NUL */
+    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
+
+    PERL_ARGS_ASSERT_INVLIST_TRIM;
+
+    assert(is_invlist(invlist));
+
+    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
+}
+
+PERL_STATIC_INLINE void
+S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
+{
+    PERL_ARGS_ASSERT_INVLIST_CLEAR;
+
+    assert(is_invlist(invlist));
+
+    invlist_set_len(invlist, 0, 0);
+    invlist_trim(invlist);
+}
+
+PERL_STATIC_INLINE UV
+S_invlist_max(const SV* const invlist)
+{
+    /* Returns the maximum number of elements storable in the inversion list's
+     * array, without having to realloc() */
+
+    PERL_ARGS_ASSERT_INVLIST_MAX;
+
+    assert(is_invlist(invlist));
+
+    /* Assumes worst case, in which the 0 element is not counted in the
+     * inversion list, so subtracts 1 for that */
+    return SvLEN(invlist) == 0  /* This happens under _new_invlist_C_array */
+           ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
+           : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
+}
+
+STATIC void
+S_initialize_invlist_guts(pTHX_ SV* invlist, const Size_t initial_size)
+{
+    PERL_ARGS_ASSERT_INITIALIZE_INVLIST_GUTS;
+
+    /* First 1 is in case the zero element isn't in the list; second 1 is for
+     * trailing NUL */
+    SvGROW(invlist, TO_INTERNAL_SIZE(initial_size + 1) + 1);
+    invlist_set_len(invlist, 0, 0);
+
+    /* Force iterinit() to be used to get iteration to work */
+    invlist_iterfinish(invlist);
+
+    *get_invlist_previous_index_addr(invlist) = 0;
+    SvPOK_on(invlist);  /* This allows B to extract the PV */
+}
+
+SV*
+Perl__new_invlist(pTHX_ IV initial_size)
+{
+
+    /* Return a pointer to a newly constructed inversion list, with enough
+     * space to store 'initial_size' elements.  If that number is negative, a
+     * system default is used instead */
+
+    SV* new_list;
+
+    if (initial_size < 0) {
+        initial_size = 10;
+    }
+
+    new_list = newSV_type(SVt_INVLIST);
+    initialize_invlist_guts(new_list, initial_size);
+
+    return new_list;
+}
+
+SV*
+Perl__new_invlist_C_array(pTHX_ const UV* const list)
+{
+    /* Return a pointer to a newly constructed inversion list, initialized to
+     * point to <list>, which has to be in the exact correct inversion list
+     * form, including internal fields.  Thus this is a dangerous routine that
+     * should not be used in the wrong hands.  The passed in 'list' contains
+     * several header fields at the beginning that are not part of the
+     * inversion list body proper */
+
+    const STRLEN length = (STRLEN) list[0];
+    const UV version_id =          list[1];
+    const bool offset   =    cBOOL(list[2]);
+#define HEADER_LENGTH 3
+    /* If any of the above changes in any way, you must change HEADER_LENGTH
+     * (if appropriate) and regenerate INVLIST_VERSION_ID by running
+     *      perl -E 'say int(rand 2**31-1)'
+     */
+#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
+                                        data structure type, so that one being
+                                        passed in can be validated to be an
+                                        inversion list of the correct vintage.
+                                       */
+
+    SV* invlist = newSV_type(SVt_INVLIST);
+
+    PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
+
+    if (version_id != INVLIST_VERSION_ID) {
+        Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
+    }
+
+    /* The generated array passed in includes header elements that aren't part
+     * of the list proper, so start it just after them */
+    SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
+
+    SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
+                               shouldn't touch it */
+
+    *(get_invlist_offset_addr(invlist)) = offset;
+
+    /* The 'length' passed to us is the physical number of elements in the
+     * inversion list.  But if there is an offset the logical number is one
+     * less than that */
+    invlist_set_len(invlist, length  - offset, offset);
+
+    invlist_set_previous_index(invlist, 0);
+
+    /* Initialize the iteration pointer. */
+    invlist_iterfinish(invlist);
+
+    SvREADONLY_on(invlist);
+    SvPOK_on(invlist);
+
+    return invlist;
+}
+
+STATIC void
+S__append_range_to_invlist(pTHX_ SV* const invlist,
+                                 const UV start, const UV end)
+{
+   /* Subject to change or removal.  Append the range from 'start' to 'end' at
+    * the end of the inversion list.  The range must be above any existing
+    * ones. */
+
+    UV* array;
+    UV max = invlist_max(invlist);
+    UV len = _invlist_len(invlist);
+    bool offset;
+
+    PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
+
+    if (len == 0) { /* Empty lists must be initialized */
+        offset = start != 0;
+        array = _invlist_array_init(invlist, ! offset);
+    }
+    else {
+        /* Here, the existing list is non-empty. The current max entry in the
+         * list is generally the first value not in the set, except when the
+         * set extends to the end of permissible values, in which case it is
+         * the first entry in that final set, and so this call is an attempt to
+         * append out-of-order */
+
+        UV final_element = len - 1;
+        array = invlist_array(invlist);
+        if (   array[final_element] > start
+            || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
+        {
+            Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
+                     array[final_element], start,
+                     ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
+        }
+
+        /* Here, it is a legal append.  If the new range begins 1 above the end
+         * of the range below it, it is extending the range below it, so the
+         * new first value not in the set is one greater than the newly
+         * extended range.  */
+        offset = *get_invlist_offset_addr(invlist);
+        if (array[final_element] == start) {
+            if (end != UV_MAX) {
+                array[final_element] = end + 1;
+            }
+            else {
+                /* But if the end is the maximum representable on the machine,
+                 * assume that infinity was actually what was meant.  Just let
+                 * the range that this would extend to have no end */
+                invlist_set_len(invlist, len - 1, offset);
+            }
+            return;
+        }
+    }
+
+    /* Here the new range doesn't extend any existing set.  Add it */
+
+    len += 2;   /* Includes an element each for the start and end of range */
+
+    /* If wll overflow the existing space, extend, which may cause the array to
+     * be moved */
+    if (max < len) {
+        invlist_extend(invlist, len);
+
+        /* Have to set len here to avoid assert failure in invlist_array() */
+        invlist_set_len(invlist, len, offset);
+
+        array = invlist_array(invlist);
+    }
+    else {
+        invlist_set_len(invlist, len, offset);
+    }
+
+    /* The next item on the list starts the range, the one after that is
+     * one past the new range.  */
+    array[len - 2] = start;
+    if (end != UV_MAX) {
+        array[len - 1] = end + 1;
+    }
+    else {
+        /* But if the end is the maximum representable on the machine, just let
+         * the range have no end */
+        invlist_set_len(invlist, len - 1, offset);
+    }
+}
+
+SSize_t
+Perl__invlist_search(SV* const invlist, const UV cp)
+{
+    /* Searches the inversion list for the entry that contains the input code
+     * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
+     * return value is the index into the list's array of the range that
+     * contains <cp>, that is, 'i' such that
+     *  array[i] <= cp < array[i+1]
+     */
+
+    IV low = 0;
+    IV mid;
+    IV high = _invlist_len(invlist);
+    const IV highest_element = high - 1;
+    const UV* array;
+
+    PERL_ARGS_ASSERT__INVLIST_SEARCH;
+
+    /* If list is empty, return failure. */
+    if (UNLIKELY(high == 0)) {
+        return -1;
+    }
+
+    /* (We can't get the array unless we know the list is non-empty) */
+    array = invlist_array(invlist);
+
+    mid = invlist_previous_index(invlist);
+    assert(mid >=0);
+    if (UNLIKELY(mid > highest_element)) {
+        mid = highest_element;
+    }
+
+    /* <mid> contains the cache of the result of the previous call to this
+     * function (0 the first time).  See if this call is for the same result,
+     * or if it is for mid-1.  This is under the theory that calls to this
+     * function will often be for related code points that are near each other.
+     * And benchmarks show that caching gives better results.  We also test
+     * here if the code point is within the bounds of the list.  These tests
+     * replace others that would have had to be made anyway to make sure that
+     * the array bounds were not exceeded, and these give us extra information
+     * at the same time */
+    if (cp >= array[mid]) {
+        if (cp >= array[highest_element]) {
+            return highest_element;
+        }
+
+        /* Here, array[mid] <= cp < array[highest_element].  This means that
+         * the final element is not the answer, so can exclude it; it also
+         * means that <mid> is not the final element, so can refer to 'mid + 1'
+         * safely */
+        if (cp < array[mid + 1]) {
+            return mid;
+        }
+        high--;
+        low = mid + 1;
+    }
+    else { /* cp < aray[mid] */
+        if (cp < array[0]) { /* Fail if outside the array */
+            return -1;
+        }
+        high = mid;
+        if (cp >= array[mid - 1]) {
+            goto found_entry;
+        }
+    }
+
+    /* Binary search.  What we are looking for is <i> such that
+     *  array[i] <= cp < array[i+1]
+     * The loop below converges on the i+1.  Note that there may not be an
+     * (i+1)th element in the array, and things work nonetheless */
+    while (low < high) {
+        mid = (low + high) / 2;
+        assert(mid <= highest_element);
+        if (array[mid] <= cp) { /* cp >= array[mid] */
+            low = mid + 1;
+
+            /* We could do this extra test to exit the loop early.
+            if (cp < array[low]) {
+                return mid;
+            }
+            */
+        }
+        else { /* cp < array[mid] */
+            high = mid;
+        }
+    }
+
+  found_entry:
+    high--;
+    invlist_set_previous_index(invlist, high);
+    return high;
+}
+
+void
+Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
+                                         const bool complement_b, SV** output)
+{
+    /* Take the union of two inversion lists and point '*output' to it.  On
+     * input, '*output' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the union.  The first list, 'a', may be
+     * NULL, in which case a copy of the second list is placed in '*output'.
+     * If 'complement_b' is TRUE, the union is taken of the complement
+     * (inversion) of 'b' instead of b itself.
+     *
+     * The basis for this comes from "Unicode Demystified" Chapter 13 by
+     * Richard Gillam, published by Addison-Wesley, and explained at some
+     * length there.  The preface says to incorporate its examples into your
+     * code at your own risk.
+     *
+     * The algorithm is like a merge sort. */
+
+    const UV* array_a;    /* a's array */
+    const UV* array_b;
+    UV len_a;       /* length of a's array */
+    UV len_b;
+
+    SV* u;                      /* the resulting union */
+    UV* array_u;
+    UV len_u = 0;
+
+    UV i_a = 0;             /* current index into a's array */
+    UV i_b = 0;
+    UV i_u = 0;
+
+    /* running count, as explained in the algorithm source book; items are
+     * stopped accumulating and are output when the count changes to/from 0.
+     * The count is incremented when we start a range that's in an input's set,
+     * and decremented when we start a range that's not in a set.  So this
+     * variable can be 0, 1, or 2.  When it is 0 neither input is in their set,
+     * and hence nothing goes into the union; 1, just one of the inputs is in
+     * its set (and its current range gets added to the union); and 2 when both
+     * inputs are in their sets.  */
+    UV count = 0;
+
+    PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
+    assert(a != b);
+    assert(*output == NULL || is_invlist(*output));
+
+    len_b = _invlist_len(b);
+    if (len_b == 0) {
+
+        /* Here, 'b' is empty, hence it's complement is all possible code
+         * points.  So if the union includes the complement of 'b', it includes
+         * everything, and we need not even look at 'a'.  It's easiest to
+         * create a new inversion list that matches everything.  */
+        if (complement_b) {
+            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
+
+            if (*output == NULL) { /* If the output didn't exist, just point it
+                                      at the new list */
+                *output = everything;
+            }
+            else { /* Otherwise, replace its contents with the new list */
+                invlist_replace_list_destroys_src(*output, everything);
+                SvREFCNT_dec_NN(everything);
+            }
+
+            return;
+        }
+
+        /* Here, we don't want the complement of 'b', and since 'b' is empty,
+         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
+         * output will be empty */
+
+        if (a == NULL || _invlist_len(a) == 0) {
+            if (*output == NULL) {
+                *output = _new_invlist(0);
+            }
+            else {
+                invlist_clear(*output);
+            }
+            return;
+        }
+
+        /* Here, 'a' is not empty, but 'b' is, so 'a' entirely determines the
+         * union.  We can just return a copy of 'a' if '*output' doesn't point
+         * to an existing list */
+        if (*output == NULL) {
+            *output = invlist_clone(a, NULL);
+            return;
+        }
+
+        /* If the output is to overwrite 'a', we have a no-op, as it's
+         * already in 'a' */
+        if (*output == a) {
+            return;
+        }
+
+        /* Here, '*output' is to be overwritten by 'a' */
+        u = invlist_clone(a, NULL);
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
+
+        return;
+    }
+
+    /* Here 'b' is not empty.  See about 'a' */
+
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
+
+        /* Here, 'a' is empty (and b is not).  That means the union will come
+         * entirely from 'b'.  If '*output' is NULL, we can directly return a
+         * clone of 'b'.  Otherwise, we replace the contents of '*output' with
+         * the clone */
+
+        SV ** dest = (*output == NULL) ? output : &u;
+        *dest = invlist_clone(b, NULL);
+        if (complement_b) {
+            _invlist_invert(*dest);
+        }
+
+        if (dest == &u) {
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
+        }
+
+        return;
+    }
+
+    /* Here both lists exist and are non-empty */
+    array_a = invlist_array(a);
+    array_b = invlist_array(b);
+
+    /* If are to take the union of 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* To complement, we invert: if the first element is 0, remove it.  To
+         * do this, we just pretend the array starts one later */
+        if (array_b[0] == 0) {
+            array_b++;
+            len_b--;
+        }
+        else {
+
+            /* But if the first element is not zero, we pretend the list starts
+             * at the 0 that is always stored immediately before the array. */
+            array_b--;
+            len_b++;
+        }
+    }
+
+    /* Size the union for the worst case: that the sets are completely
+     * disjoint */
+    u = _new_invlist(len_a + len_b);
+
+    /* Will contain U+0000 if either component does */
+    array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
+                                      || (len_b > 0 && array_b[0] == 0));
+
+    /* Go through each input list item by item, stopping when have exhausted
+     * one of them */
+    while (i_a < len_a && i_b < len_b) {
+        UV cp;      /* The element to potentially add to the union's array */
+        bool cp_in_set;   /* is it in the input list's set or not */
+
+        /* We need to take one or the other of the two inputs for the union.
+         * Since we are merging two sorted lists, we take the smaller of the
+         * next items.  In case of a tie, we take first the one that is in its
+         * set.  If we first took the one not in its set, it would decrement
+         * the count, possibly to 0 which would cause it to be output as ending
+         * the range, and the next time through we would take the same number,
+         * and output it again as beginning the next range.  By doing it the
+         * opposite way, there is no possibility that the count will be
+         * momentarily decremented to 0, and thus the two adjoining ranges will
+         * be seamlessly merged.  (In a tie and both are in the set or both not
+         * in the set, it doesn't matter which we take first.) */
+        if (       array_a[i_a] < array_b[i_b]
+            || (   array_a[i_a] == array_b[i_b]
+                && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
+        {
+            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
+            cp = array_a[i_a++];
+        }
+        else {
+            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
+            cp = array_b[i_b++];
+        }
+
+        /* Here, have chosen which of the two inputs to look at.  Only output
+         * if the running count changes to/from 0, which marks the
+         * beginning/end of a range that's in the set */
+        if (cp_in_set) {
+            if (count == 0) {
+                array_u[i_u++] = cp;
+            }
+            count++;
+        }
+        else {
+            count--;
+            if (count == 0) {
+                array_u[i_u++] = cp;
+            }
+        }
+    }
+
+
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We decrement 'count', as explained below, if
+     * that list is in its set.  (i_a and i_b each currently index the element
+     * beyond the one we care about.) */
+    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+        || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
+    {
+        count--;
+    }
+
+    /* Above we decremented 'count' if the list that had unexamined elements in
+     * it was in its set.  This has made it so that 'count' being non-zero
+     * means there isn't anything left to output; and 'count' equal to 0 means
+     * that what is left to output is precisely that which is left in the
+     * non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to add to the union.  If it was in its set at its end, that means
+     * the set extends from here to the platform's infinity, and hence so does
+     * the union and the non-exhausted set is irrelevant.  The exhausted set
+     * also contributed 1 to 'count'.  If 'count' was 2, it got decremented to
+     * 1, but if it was 1, the non-exhausted set wasn't in its set, and so
+     * 'count' remains at 1.  This is consistent with the decremented 'count'
+     * != 0 meaning there's nothing left to add to the union.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the rest of the union will be whatever the other input is.
+     * If 'count' was 0, neither list was in its set, and 'count' remains 0;
+     * otherwise it gets decremented to 0.  This is consistent with 'count'
+     * == 0 meaning the remainder of the union is whatever is left in the
+     * non-exhausted list. */
+    if (count != 0) {
+        len_u = i_u;
+    }
+    else {
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* The non-exhausted input is 'a' */
+            Copy(array_a + i_a, array_u + i_u, copy_count, UV);
+        }
+        else { /* The non-exhausted input is b */
+            copy_count = len_b - i_b;
+            Copy(array_b + i_b, array_u + i_u, copy_count, UV);
+        }
+        len_u = i_u + copy_count;
+    }
+
+    /* Set the result to the final length, which can change the pointer to
+     * array_u, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
+    if (len_u != _invlist_len(u)) {
+        invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
+        invlist_trim(u);
+        array_u = invlist_array(u);
+    }
+
+    if (*output == NULL) {  /* Simply return the new inversion list */
+        *output = u;
+    }
+    else {
+        /* Otherwise, overwrite the inversion list that was in '*output'.  We
+         * could instead free '*output', and then set it to 'u', but experience
+         * has shown [perl #127392] that if the input is a mortal, we can get a
+         * huge build-up of these during regex compilation before they get
+         * freed. */
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
+    }
+
+    return;
+}
+
+void
+Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
+                                               const bool complement_b, SV** i)
+{
+    /* Take the intersection of two inversion lists and point '*i' to it.  On
+     * input, '*i' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the intersection.  The first list, 'a', may be
+     * NULL, in which case '*i' will be an empty list.  If 'complement_b' is
+     * TRUE, the result will be the intersection of 'a' and the complement (or
+     * inversion) of 'b' instead of 'b' directly.
+     *
+     * The basis for this comes from "Unicode Demystified" Chapter 13 by
+     * Richard Gillam, published by Addison-Wesley, and explained at some
+     * length there.  The preface says to incorporate its examples into your
+     * code at your own risk.  In fact, it had bugs
+     *
+     * The algorithm is like a merge sort, and is essentially the same as the
+     * union above
+     */
+
+    const UV* array_a;          /* a's array */
+    const UV* array_b;
+    UV len_a;   /* length of a's array */
+    UV len_b;
+
+    SV* r;                   /* the resulting intersection */
+    UV* array_r;
+    UV len_r = 0;
+
+    UV i_a = 0;             /* current index into a's array */
+    UV i_b = 0;
+    UV i_r = 0;
+
+    /* running count of how many of the two inputs are postitioned at ranges
+     * that are in their sets.  As explained in the algorithm source book,
+     * items are stopped accumulating and are output when the count changes
+     * to/from 2.  The count is incremented when we start a range that's in an
+     * input's set, and decremented when we start a range that's not in a set.
+     * Only when it is 2 are we in the intersection. */
+    UV count = 0;
+
+    PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
+    assert(a != b);
+    assert(*i == NULL || is_invlist(*i));
+
+    /* Special case if either one is empty */
+    len_a = (a == NULL) ? 0 : _invlist_len(a);
+    if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
+        if (len_a != 0 && complement_b) {
+
+            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
+             * must be empty.  Here, also we are using 'b's complement, which
+             * hence must be every possible code point.  Thus the intersection
+             * is simply 'a'. */
+
+            if (*i == a) {  /* No-op */
+                return;
+            }
+
+            if (*i == NULL) {
+                *i = invlist_clone(a, NULL);
+                return;
+            }
+
+            r = invlist_clone(a, NULL);
+            invlist_replace_list_destroys_src(*i, r);
+            SvREFCNT_dec_NN(r);
+            return;
+        }
+
+        /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
+         * intersection must be empty */
+        if (*i == NULL) {
+            *i = _new_invlist(0);
+            return;
+        }
+
+        invlist_clear(*i);
+        return;
+    }
+
+    /* Here both lists exist and are non-empty */
+    array_a = invlist_array(a);
+    array_b = invlist_array(b);
+
+    /* If are to take the intersection of 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* To complement, we invert: if the first element is 0, remove it.  To
+         * do this, we just pretend the array starts one later */
+        if (array_b[0] == 0) {
+            array_b++;
+            len_b--;
+        }
+        else {
+
+            /* But if the first element is not zero, we pretend the list starts
+             * at the 0 that is always stored immediately before the array. */
+            array_b--;
+            len_b++;
+        }
+    }
+
+    /* Size the intersection for the worst case: that the intersection ends up
+     * fragmenting everything to be completely disjoint */
+    r= _new_invlist(len_a + len_b);
+
+    /* Will contain U+0000 iff both components do */
+    array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
+                                     && len_b > 0 && array_b[0] == 0);
+
+    /* Go through each list item by item, stopping when have exhausted one of
+     * them */
+    while (i_a < len_a && i_b < len_b) {
+        UV cp;      /* The element to potentially add to the intersection's
+                       array */
+        bool cp_in_set; /* Is it in the input list's set or not */
+
+        /* We need to take one or the other of the two inputs for the
+         * intersection.  Since we are merging two sorted lists, we take the
+         * smaller of the next items.  In case of a tie, we take first the one
+         * that is not in its set (a difference from the union algorithm).  If
+         * we first took the one in its set, it would increment the count,
+         * possibly to 2 which would cause it to be output as starting a range
+         * in the intersection, and the next time through we would take that
+         * same number, and output it again as ending the set.  By doing the
+         * opposite of this, there is no possibility that the count will be
+         * momentarily incremented to 2.  (In a tie and both are in the set or
+         * both not in the set, it doesn't matter which we take first.) */
+        if (       array_a[i_a] < array_b[i_b]
+            || (   array_a[i_a] == array_b[i_b]
+                && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
+        {
+            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
+            cp = array_a[i_a++];
+        }
+        else {
+            cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
+            cp= array_b[i_b++];
+        }
+
+        /* Here, have chosen which of the two inputs to look at.  Only output
+         * if the running count changes to/from 2, which marks the
+         * beginning/end of a range that's in the intersection */
+        if (cp_in_set) {
+            count++;
+            if (count == 2) {
+                array_r[i_r++] = cp;
+            }
+        }
+        else {
+            if (count == 2) {
+                array_r[i_r++] = cp;
+            }
+            count--;
+        }
+
+    }
+
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We increment 'count', as explained below, if the
+     * exhausted list was in its set.  (i_a and i_b each currently index the
+     * element beyond the one we care about.) */
+    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+        || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
+    {
+        count++;
+    }
+
+    /* Above we incremented 'count' if the exhausted list was in its set.  This
+     * has made it so that 'count' being below 2 means there is nothing left to
+     * output; otheriwse what's left to add to the intersection is precisely
+     * that which is left in the non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to affect the intersection.  If it was in its set at its end, that
+     * means the set extends from here to the platform's infinity, and hence
+     * anything in the non-exhausted's list will be in the intersection, and
+     * anything not in it won't be.  Hence, the rest of the intersection is
+     * precisely what's in the non-exhausted list  The exhausted set also
+     * contributed 1 to 'count', meaning 'count' was at least 1.  Incrementing
+     * it means 'count' is now at least 2.  This is consistent with the
+     * incremented 'count' being >= 2 means to add the non-exhausted list to
+     * the intersection.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the intersection can't include anything further; the
+     * non-exhausted set is irrelevant.  'count' was at most 1, and doesn't get
+     * incremented.  This is consistent with 'count' being < 2 meaning nothing
+     * further to add to the intersection. */
+    if (count < 2) { /* Nothing left to put in the intersection. */
+        len_r = i_r;
+    }
+    else { /* copy the non-exhausted list, unchanged. */
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* a is the one with stuff left */
+            Copy(array_a + i_a, array_r + i_r, copy_count, UV);
+        }
+        else {  /* b is the one with stuff left */
+            copy_count = len_b - i_b;
+            Copy(array_b + i_b, array_r + i_r, copy_count, UV);
+        }
+        len_r = i_r + copy_count;
+    }
+
+    /* Set the result to the final length, which can change the pointer to
+     * array_r, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
+    if (len_r != _invlist_len(r)) {
+        invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
+        invlist_trim(r);
+        array_r = invlist_array(r);
+    }
+
+    if (*i == NULL) { /* Simply return the calculated intersection */
+        *i = r;
+    }
+    else { /* Otherwise, replace the existing inversion list in '*i'.  We could
+              instead free '*i', and then set it to 'r', but experience has
+              shown [perl #127392] that if the input is a mortal, we can get a
+              huge build-up of these during regex compilation before they get
+              freed. */
+        if (len_r) {
+            invlist_replace_list_destroys_src(*i, r);
+        }
+        else {
+            invlist_clear(*i);
+        }
+        SvREFCNT_dec_NN(r);
+    }
+
+    return;
+}
+
+SV*
+Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
+{
+    /* Add the range from 'start' to 'end' inclusive to the inversion list's
+     * set.  A pointer to the inversion list is returned.  This may actually be
+     * a new list, in which case the passed in one has been destroyed.  The
+     * passed-in inversion list can be NULL, in which case a new one is created
+     * with just the one range in it.  The new list is not necessarily
+     * NUL-terminated.  Space is not freed if the inversion list shrinks as a
+     * result of this function.  The gain would not be large, and in many
+     * cases, this is called multiple times on a single inversion list, so
+     * anything freed may almost immediately be needed again.
+     *
+     * This used to mostly call the 'union' routine, but that is much more
+     * heavyweight than really needed for a single range addition */
+
+    UV* array;              /* The array implementing the inversion list */
+    UV len;                 /* How many elements in 'array' */
+    SSize_t i_s;            /* index into the invlist array where 'start'
+                               should go */
+    SSize_t i_e = 0;        /* And the index where 'end' should go */
+    UV cur_highest;         /* The highest code point in the inversion list
+                               upon entry to this function */
+
+    /* This range becomes the whole inversion list if none already existed */
+    if (invlist == NULL) {
+        invlist = _new_invlist(2);
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
+    }
+
+    /* Likewise, if the inversion list is currently empty */
+    len = _invlist_len(invlist);
+    if (len == 0) {
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
+    }
+
+    /* Starting here, we have to know the internals of the list */
+    array = invlist_array(invlist);
+
+    /* If the new range ends higher than the current highest ... */
+    cur_highest = invlist_highest(invlist);
+    if (end > cur_highest) {
+
+        /* If the whole range is higher, we can just append it */
+        if (start > cur_highest) {
+            _append_range_to_invlist(invlist, start, end);
+            return invlist;
+        }
+
+        /* Otherwise, add the portion that is higher ... */
+        _append_range_to_invlist(invlist, cur_highest + 1, end);
+
+        /* ... and continue on below to handle the rest.  As a result of the
+         * above append, we know that the index of the end of the range is the
+         * final even numbered one of the array.  Recall that the final element
+         * always starts a range that extends to infinity.  If that range is in
+         * the set (meaning the set goes from here to infinity), it will be an
+         * even index, but if it isn't in the set, it's odd, and the final
+         * range in the set is one less, which is even. */
+        if (end == UV_MAX) {
+            i_e = len;
+        }
+        else {
+            i_e = len - 2;
+        }
+    }
+
+    /* We have dealt with appending, now see about prepending.  If the new
+     * range starts lower than the current lowest ... */
+    if (start < array[0]) {
+
+        /* Adding something which has 0 in it is somewhat tricky, and uncommon.
+         * Let the union code handle it, rather than having to know the
+         * trickiness in two code places.  */
+        if (UNLIKELY(start == 0)) {
+            SV* range_invlist;
+
+            range_invlist = _new_invlist(2);
+            _append_range_to_invlist(range_invlist, start, end);
+
+            _invlist_union(invlist, range_invlist, &invlist);
+
+            SvREFCNT_dec_NN(range_invlist);
+
+            return invlist;
+        }
+
+        /* If the whole new range comes before the first entry, and doesn't
+         * extend it, we have to insert it as an additional range */
+        if (end < array[0] - 1) {
+            i_s = i_e = -1;
+            goto splice_in_new_range;
+        }
+
+        /* Here the new range adjoins the existing first range, extending it
+         * downwards. */
+        array[0] = start;
+
+        /* And continue on below to handle the rest.  We know that the index of
+         * the beginning of the range is the first one of the array */
+        i_s = 0;
+    }
+    else { /* Not prepending any part of the new range to the existing list.
+            * Find where in the list it should go.  This finds i_s, such that:
+            *     invlist[i_s] <= start < array[i_s+1]
+            */
+        i_s = _invlist_search(invlist, start);
+    }
+
+    /* At this point, any extending before the beginning of the inversion list
+     * and/or after the end has been done.  This has made it so that, in the
+     * code below, each endpoint of the new range is either in a range that is
+     * in the set, or is in a gap between two ranges that are.  This means we
+     * don't have to worry about exceeding the array bounds.
+     *
+     * Find where in the list the new range ends (but we can skip this if we
+     * have already determined what it is, or if it will be the same as i_s,
+     * which we already have computed) */
+    if (i_e == 0) {
+        i_e = (start == end)
+              ? i_s
+              : _invlist_search(invlist, end);
+    }
+
+    /* Here generally invlist[i_e] <= end < array[i_e+1].  But if invlist[i_e]
+     * is a range that goes to infinity there is no element at invlist[i_e+1],
+     * so only the first relation holds. */
+
+    if ( ! ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+
+        /* Here, the ranges on either side of the beginning of the new range
+         * are in the set, and this range starts in the gap between them.
+         *
+         * The new range extends the range above it downwards if the new range
+         * ends at or above that range's start */
+        const bool extends_the_range_above = (   end == UV_MAX
+                                              || end + 1 >= array[i_s+1]);
+
+        /* The new range extends the range below it upwards if it begins just
+         * after where that range ends */
+        if (start == array[i_s]) {
+
+            /* If the new range fills the entire gap between the other ranges,
+             * they will get merged together.  Other ranges may also get
+             * merged, depending on how many of them the new range spans.  In
+             * the general case, we do the merge later, just once, after we
+             * figure out how many to merge.  But in the case where the new
+             * range exactly spans just this one gap (possibly extending into
+             * the one above), we do the merge here, and an early exit.  This
+             * is done here to avoid having to special case later. */
+            if (i_e - i_s <= 1) {
+
+                /* If i_e - i_s == 1, it means that the new range terminates
+                 * within the range above, and hence 'extends_the_range_above'
+                 * must be true.  (If the range above it extends to infinity,
+                 * 'i_s+2' will be above the array's limit, but 'len-i_s-2'
+                 * will be 0, so no harm done.) */
+                if (extends_the_range_above) {
+                    Move(array + i_s + 2, array + i_s, len - i_s - 2, UV);
+                    invlist_set_len(invlist,
+                                    len - 2,
+                                    *(get_invlist_offset_addr(invlist)));
+                    return invlist;
+                }
+
+                /* Here, i_e must == i_s.  We keep them in sync, as they apply
+                 * to the same range, and below we are about to decrement i_s
+                 * */
+                i_e--;
+            }
+
+            /* Here, the new range is adjacent to the one below.  (It may also
+             * span beyond the range above, but that will get resolved later.)
+             * Extend the range below to include this one. */
+            array[i_s] = (end == UV_MAX) ? UV_MAX : end + 1;
+            i_s--;
+            start = array[i_s];
+        }
+        else if (extends_the_range_above) {
+
+            /* Here the new range only extends the range above it, but not the
+             * one below.  It merges with the one above.  Again, we keep i_e
+             * and i_s in sync if they point to the same range */
+            if (i_e == i_s) {
+                i_e++;
+            }
+            i_s++;
+            array[i_s] = start;
+        }
+    }
+
+    /* Here, we've dealt with the new range start extending any adjoining
+     * existing ranges.
+     *
+     * If the new range extends to infinity, it is now the final one,
+     * regardless of what was there before */
+    if (UNLIKELY(end == UV_MAX)) {
+        invlist_set_len(invlist, i_s + 1, *(get_invlist_offset_addr(invlist)));
+        return invlist;
+    }
+
+    /* If i_e started as == i_s, it has also been dealt with,
+     * and been updated to the new i_s, which will fail the following if */
+    if (! ELEMENT_RANGE_MATCHES_INVLIST(i_e)) {
+
+        /* Here, the ranges on either side of the end of the new range are in
+         * the set, and this range ends in the gap between them.
+         *
+         * If this range is adjacent to (hence extends) the range above it, it
+         * becomes part of that range; likewise if it extends the range below,
+         * it becomes part of that range */
+        if (end + 1 == array[i_e+1]) {
+            i_e++;
+            array[i_e] = start;
+        }
+        else if (start <= array[i_e]) {
+            array[i_e] = end + 1;
+            i_e--;
+        }
+    }
+
+    if (i_s == i_e) {
+
+        /* If the range fits entirely in an existing range (as possibly already
+         * extended above), it doesn't add anything new */
+        if (ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+            return invlist;
+        }
+
+        /* Here, no part of the range is in the list.  Must add it.  It will
+         * occupy 2 more slots */
+      splice_in_new_range:
+
+        invlist_extend(invlist, len + 2);
+        array = invlist_array(invlist);
+        /* Move the rest of the array down two slots. Don't include any
+         * trailing NUL */
+        Move(array + i_e + 1, array + i_e + 3, len - i_e - 1, UV);
+
+        /* Do the actual splice */
+        array[i_e+1] = start;
+        array[i_e+2] = end + 1;
+        invlist_set_len(invlist, len + 2, *(get_invlist_offset_addr(invlist)));
+        return invlist;
+    }
+
+    /* Here the new range crossed the boundaries of a pre-existing range.  The
+     * code above has adjusted things so that both ends are in ranges that are
+     * in the set.  This means everything in between must also be in the set.
+     * Just squash things together */
+    Move(array + i_e + 1, array + i_s + 1, len - i_e - 1, UV);
+    invlist_set_len(invlist,
+                    len - i_e + i_s,
+                    *(get_invlist_offset_addr(invlist)));
+
+    return invlist;
+}
+
+SV*
+Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
+                                 UV** other_elements_ptr)
+{
+    /* Create and return an inversion list whose contents are to be populated
+     * by the caller.  The caller gives the number of elements (in 'size') and
+     * the very first element ('element0').  This function will set
+     * '*other_elements_ptr' to an array of UVs, where the remaining elements
+     * are to be placed.
+     *
+     * Obviously there is some trust involved that the caller will properly
+     * fill in the other elements of the array.
+     *
+     * (The first element needs to be passed in, as the underlying code does
+     * things differently depending on whether it is zero or non-zero) */
+
+    SV* invlist = _new_invlist(size);
+    bool offset;
+
+    PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
+
+    invlist = add_cp_to_invlist(invlist, element0);
+    offset = *get_invlist_offset_addr(invlist);
+
+    invlist_set_len(invlist, size, offset);
+    *other_elements_ptr = invlist_array(invlist) + 1;
+    return invlist;
+}
+
+#endif
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_invert(pTHX_ SV* const invlist)
+{
+    /* Complement the input inversion list.  This adds a 0 if the list didn't
+     * have a zero; removes it otherwise.  As described above, the data
+     * structure is set up so that this is very efficient */
+
+    PERL_ARGS_ASSERT__INVLIST_INVERT;
+
+    assert(! invlist_is_iterating(invlist));
+
+    /* The inverse of matching nothing is matching everything */
+    if (_invlist_len(invlist) == 0) {
+        _append_range_to_invlist(invlist, 0, UV_MAX);
+        return;
+    }
+
+    *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
+}
+
+SV*
+Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
+{
+    /* Return a new inversion list that is a copy of the input one, which is
+     * unchanged.  The new list will not be mortal even if the old one was. */
+
+    const STRLEN nominal_length = _invlist_len(invlist);
+    const STRLEN physical_length = SvCUR(invlist);
+    const bool offset = *(get_invlist_offset_addr(invlist));
+
+    PERL_ARGS_ASSERT_INVLIST_CLONE;
+
+    if (new_invlist == NULL) {
+        new_invlist = _new_invlist(nominal_length);
+    }
+    else {
+        sv_upgrade(new_invlist, SVt_INVLIST);
+        initialize_invlist_guts(new_invlist, nominal_length);
+    }
+
+    *(get_invlist_offset_addr(new_invlist)) = offset;
+    invlist_set_len(new_invlist, nominal_length, offset);
+    Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
+
+    return new_invlist;
+}
+
+#endif
+
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
+                         const char * const indent, SV* const invlist)
+{
+    /* Designed to be called only by do_sv_dump().  Dumps out the ranges of the
+     * inversion list 'invlist' to 'file' at 'level'  Each line is prefixed by
+     * the string 'indent'.  The output looks like this:
+         [0] 0x000A .. 0x000D
+         [2] 0x0085
+         [4] 0x2028 .. 0x2029
+         [6] 0x3104 .. INFTY
+     * This means that the first range of code points matched by the list are
+     * 0xA through 0xD; the second range contains only the single code point
+     * 0x85, etc.  An inversion list is an array of UVs.  Two array elements
+     * are used to define each range (except if the final range extends to
+     * infinity, only a single element is needed).  The array index of the
+     * first element for the corresponding range is given in brackets. */
+
+    UV start, end;
+    STRLEN count = 0;
+
+    PERL_ARGS_ASSERT__INVLIST_DUMP;
+
+    if (invlist_is_iterating(invlist)) {
+        Perl_dump_indent(aTHX_ level, file,
+             "%sCan't dump inversion list because is in middle of iterating\n",
+             indent);
+        return;
+    }
+
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+        if (end == UV_MAX) {
+            Perl_dump_indent(aTHX_ level, file,
+                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n",
+                                   indent, (UV)count, start);
+        }
+        else if (end != start) {
+            Perl_dump_indent(aTHX_ level, file,
+                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
+                                indent, (UV)count, start,         end);
+        }
+        else {
+            Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
+                                            indent, (UV)count, start);
+        }
+        count += 2;
+    }
+}
+
+#endif
+
+#if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
+bool
+Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
+{
+    /* Return a boolean as to if the two passed in inversion lists are
+     * identical.  The final argument, if TRUE, says to take the complement of
+     * the second inversion list before doing the comparison */
+
+    const UV len_a = _invlist_len(a);
+    UV len_b = _invlist_len(b);
+
+    const UV* array_a = NULL;
+    const UV* array_b = NULL;
+
+    PERL_ARGS_ASSERT__INVLISTEQ;
+
+    /* This code avoids accessing the arrays unless it knows the length is
+     * non-zero */
+
+    if (len_a == 0) {
+        if (len_b == 0) {
+            return ! complement_b;
+        }
+    }
+    else {
+        array_a = invlist_array(a);
+    }
+
+    if (len_b != 0) {
+        array_b = invlist_array(b);
+    }
+
+    /* If are to compare 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* The complement of nothing is everything, so <a> would have to have
+         * just one element, starting at zero (ending at infinity) */
+        if (len_b == 0) {
+            return (len_a == 1 && array_a[0] == 0);
+        }
+        if (array_b[0] == 0) {
+
+            /* Otherwise, to complement, we invert.  Here, the first element is
+             * 0, just remove it.  To do this, we just pretend the array starts
+             * one later */
+
+            array_b++;
+            len_b--;
+        }
+        else {
+
+            /* But if the first element is not zero, we pretend the list starts
+             * at the 0 that is always stored immediately before the array. */
+            array_b--;
+            len_b++;
+        }
+    }
+
+    return    len_a == len_b
+           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
+
+}
+#endif
+
+#undef HEADER_LENGTH
+#undef TO_INTERNAL_SIZE
+#undef FROM_INTERNAL_SIZE
+#undef INVLIST_VERSION_ID
+
+/* End of inversion list object */
diff --git a/regcomp_study.c b/regcomp_study.c
new file mode 100644
index 0000000000..bf61d34f48
--- /dev/null
+++ b/regcomp_study.c
@@ -0,0 +1,3808 @@
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
+#include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
+#define PERL_IN_REGCOMP_STUDY_C
+#include "perl.h"
+
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
+
+#include "invlist_inline.h"
+#include "unicode_constants.h"
+#include "regcomp_internal.h"
+
+#define INIT_AND_WITHP \
+    assert(!and_withp); \
+    Newx(and_withp, 1, regnode_ssc); \
+    SAVEFREEPV(and_withp)
+
+
+STATIC void
+S_unwind_scan_frames(pTHX_ const void *p)
+{
+    PERL_ARGS_ASSERT_UNWIND_SCAN_FRAMES;
+    scan_frame *f= (scan_frame *)p;
+    do {
+        scan_frame *n= f->next_frame;
+        Safefree(f);
+        f= n;
+    } while (f);
+}
+
+/* Follow the next-chain of the current node and optimize away
+   all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+    if (OP(node) != CURLYX) {
+        const int max = (REGNODE_OFF_BY_ARG(OP(node))
+                        ? I32_MAX
+                          /* I32 may be smaller than U16 on CRAYs! */
+                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+        int off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG(node) : NEXT_OFF(node));
+        int noff;
+        regnode *n = node;
+
+        /* Skip NOTHING and LONGJMP. */
+        while (
+            (n = regnext(n))
+            && (
+                (REGNODE_TYPE(OP(n)) == NOTHING && (noff = NEXT_OFF(n)))
+                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+            )
+            && off + noff < max
+        ) {
+            off += noff;
+        }
+        if (REGNODE_OFF_BY_ARG(OP(node)))
+            ARG(node) = off;
+        else
+            NEXT_OFF(node) = off;
+    }
+    return;
+}
+
+
+/*
+ * As best we can, determine the characters that can match the start of
+ * the given EXACTF-ish node.  This is for use in creating ssc nodes, so there
+ * can be false positive matches
+ *
+ * Returns the invlist as a new SV*; it is the caller's responsibility to
+ * call SvREFCNT_dec() when done with it.
+ */
+STATIC SV*
+S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
+{
+    const U8 * s = (U8*)STRING(node);
+    SSize_t bytelen = STR_LEN(node);
+    UV uc;
+    /* Start out big enough for 2 separate code points */
+    SV* invlist = _new_invlist(4);
+
+    PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST;
+
+    if (! UTF) {
+        uc = *s;
+
+        /* We punt and assume can match anything if the node begins
+         * with a multi-character fold.  Things are complicated.  For
+         * example, /ffi/i could match any of:
+         *  "\N{LATIN SMALL LIGATURE FFI}"
+         *  "\N{LATIN SMALL LIGATURE FF}I"
+         *  "F\N{LATIN SMALL LIGATURE FI}"
+         *  plus several other things; and making sure we have all the
+         *  possibilities is hard. */
+        if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + bytelen)) {
+            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
+        }
+        else {
+            /* Any Latin1 range character can potentially match any
+             * other depending on the locale, and in Turkic locales, 'I' and
+             * 'i' can match U+130 and U+131 */
+            if (OP(node) == EXACTFL) {
+                _invlist_union(invlist, PL_Latin1, &invlist);
+                if (isALPHA_FOLD_EQ(uc, 'I')) {
+                    invlist = add_cp_to_invlist(invlist,
+                                                LATIN_SMALL_LETTER_DOTLESS_I);
+                    invlist = add_cp_to_invlist(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                }
+            }
+            else {
+                /* But otherwise, it matches at least itself.  We can
+                 * quickly tell if it has a distinct fold, and if so,
+                 * it matches that as well */
+                invlist = add_cp_to_invlist(invlist, uc);
+                if (IS_IN_SOME_FOLD_L1(uc))
+                    invlist = add_cp_to_invlist(invlist, PL_fold_latin1[uc]);
+            }
+
+            /* Some characters match above-Latin1 ones under /i.  This
+             * is true of EXACTFL ones when the locale is UTF-8 */
+            if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
+                && (! isASCII(uc) || ! inRANGE(OP(node), EXACTFAA,
+                                                         EXACTFAA_NO_TRIE)))
+            {
+                add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
+            }
+        }
+    }
+    else {  /* Pattern is UTF-8 */
+        U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+        const U8* e = s + bytelen;
+        IV fc;
+
+        fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+
+        /* The only code points that aren't folded in a UTF EXACTFish
+         * node are the problematic ones in EXACTFL nodes */
+        if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
+            /* We need to check for the possibility that this EXACTFL
+             * node begins with a multi-char fold.  Therefore we fold
+             * the first few characters of it so that we can make that
+             * check */
+            U8 *d = folded;
+            int i;
+
+            fc = -1;
+            for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
+                if (isASCII(*s)) {
+                    *(d++) = (U8) toFOLD(*s);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = *(d-1);
+                    }
+                    s++;
+                }
+                else {
+                    STRLEN len;
+                    UV fold = toFOLD_utf8_safe(s, e, d, &len);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = fold;
+                    }
+                    d += len;
+                    s += UTF8SKIP(s);
+                }
+            }
+
+            /* And set up so the code below that looks in this folded
+             * buffer instead of the node's string */
+            e = d;
+            s = folded;
+        }
+
+        /* When we reach here 's' points to the fold of the first
+         * character(s) of the node; and 'e' points to far enough along
+         * the folded string to be just past any possible multi-char
+         * fold.
+         *
+         * Like the non-UTF case above, we punt if the node begins with a
+         * multi-char fold  */
+
+        if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
+            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
+        }
+        else {  /* Single char fold */
+            unsigned int k;
+            U32 first_fold;
+            const U32 * remaining_folds;
+            Size_t folds_count;
+
+            /* It matches itself */
+            invlist = add_cp_to_invlist(invlist, fc);
+
+            /* ... plus all the things that fold to it, which are found in
+             * PL_utf8_foldclosures */
+            folds_count = _inverse_folds(fc, &first_fold,
+                                                &remaining_folds);
+            for (k = 0; k < folds_count; k++) {
+                UV c = (k == 0) ? first_fold : remaining_folds[k-1];
+
+                /* /aa doesn't allow folds between ASCII and non- */
+                if (   inRANGE(OP(node), EXACTFAA, EXACTFAA_NO_TRIE)
+                    && isASCII(c) != isASCII(fc))
+                {
+                    continue;
+                }
+
+                invlist = add_cp_to_invlist(invlist, c);
+            }
+
+            if (OP(node) == EXACTFL) {
+
+                /* If either [iI] are present in an EXACTFL node the above code
+                 * should have added its normal case pair, but under a Turkish
+                 * locale they could match instead the case pairs from it.  Add
+                 * those as potential matches as well */
+                if (isALPHA_FOLD_EQ(fc, 'I')) {
+                    invlist = add_cp_to_invlist(invlist,
+                                                LATIN_SMALL_LETTER_DOTLESS_I);
+                    invlist = add_cp_to_invlist(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                }
+                else if (fc == LATIN_SMALL_LETTER_DOTLESS_I) {
+                    invlist = add_cp_to_invlist(invlist, 'I');
+                }
+                else if (fc == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+                    invlist = add_cp_to_invlist(invlist, 'i');
+                }
+            }
+        }
+    }
+
+    return invlist;
+}
+
+
+/* Mark that we cannot extend a found fixed substring at this point.
+   Update the longest found anchored substring or the longest found
+   floating substrings if needed. */
+
+void
+Perl_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
+                    SSize_t *minlenp, int is_inf)
+{
+    const STRLEN l = CHR_SVLEN(data->last_found);
+    SV * const longest_sv = data->substrs[data->cur_is_floating].str;
+    const STRLEN old_l = CHR_SVLEN(longest_sv);
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_SCAN_COMMIT;
+
+    if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
+        const U8 i = data->cur_is_floating;
+        SvSetMagicSV(longest_sv, data->last_found);
+        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+        if (!i) /* fixed */
+            data->substrs[0].max_offset = data->substrs[0].min_offset;
+        else { /* float */
+            data->substrs[1].max_offset =
+                      (is_inf)
+                       ? OPTIMIZE_INFTY
+                       : (l
+                          ? data->last_start_max
+                          : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
+                                         ? OPTIMIZE_INFTY
+                                         : data->pos_min + data->pos_delta));
+        }
+
+        data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
+        data->substrs[i].minlenp = minlenp;
+        data->substrs[i].lookbehind = 0;
+    }
+
+    SvCUR_set(data->last_found, 0);
+    {
+        SV * const sv = data->last_found;
+        if (SvUTF8(sv) && SvMAGICAL(sv)) {
+            MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+            if (mg)
+                mg->mg_len = 0;
+        }
+    }
+    data->last_end = -1;
+    data->flags &= ~SF_BEFORE_EOL;
+    DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
+}
+
+/* An SSC is just a regnode_charclass_posix with an extra field: the inversion
+ * list that describes which code points it matches */
+
+STATIC void
+S_ssc_anything(pTHX_ regnode_ssc *ssc)
+{
+    /* Set the SSC 'ssc' to match an empty string or any code point */
+
+    PERL_ARGS_ASSERT_SSC_ANYTHING;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    /* mortalize so won't leak */
+    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
+    ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
+}
+
+STATIC int
+S_ssc_is_anything(const regnode_ssc *ssc)
+{
+    /* Returns TRUE if the SSC 'ssc' can match the empty string and any code
+     * point; FALSE otherwise.  Thus, this is used to see if using 'ssc' buys
+     * us anything: if the function returns TRUE, 'ssc' hasn't been restricted
+     * in any way, so there's no point in using it */
+
+    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
+    bool ret;
+
+    PERL_ARGS_ASSERT_SSC_IS_ANYTHING;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    if (! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
+        return FALSE;
+    }
+
+    /* See if the list consists solely of the range 0 - Infinity */
+    invlist_iterinit(ssc->invlist);
+    ret = invlist_iternext(ssc->invlist, &start, &end)
+          && start == 0
+          && end == UV_MAX;
+
+    invlist_iterfinish(ssc->invlist);
+
+    if (ret) {
+        return TRUE;
+    }
+
+    /* If e.g., both \w and \W are set, matches everything */
+    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
+        int i;
+        for (i = 0; i < ANYOF_POSIXL_MAX; i += 2) {
+            if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i+1)) {
+                return TRUE;
+            }
+        }
+    }
+
+    return FALSE;
+}
+
+void
+Perl_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
+{
+    /* Initializes the SSC 'ssc'.  This includes setting it to match an empty
+     * string, any code point, or any posix class under locale */
+
+    PERL_ARGS_ASSERT_SSC_INIT;
+
+    Zero(ssc, 1, regnode_ssc);
+    set_ANYOF_SYNTHETIC(ssc);
+    ARG_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
+    ssc_anything(ssc);
+
+    /* If any portion of the regex is to operate under locale rules that aren't
+     * fully known at compile time, initialization includes it.  The reason
+     * this isn't done for all regexes is that the optimizer was written under
+     * the assumption that locale was all-or-nothing.  Given the complexity and
+     * lack of documentation in the optimizer, and that there are inadequate
+     * test cases for locale, many parts of it may not work properly, it is
+     * safest to avoid locale unless necessary. */
+    if (RExC_contains_locale) {
+        ANYOF_POSIXL_SETALL(ssc);
+    }
+    else {
+        ANYOF_POSIXL_ZERO(ssc);
+    }
+}
+
+STATIC int
+S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
+                        const regnode_ssc *ssc)
+{
+    /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
+     * to the list of code points matched, and locale posix classes; hence does
+     * not check its flags) */
+
+    UV start = 0, end = 0;  /* Initialize due to messages from dumb compiler */
+    bool ret;
+
+    PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    invlist_iterinit(ssc->invlist);
+    ret = invlist_iternext(ssc->invlist, &start, &end)
+          && start == 0
+          && end == UV_MAX;
+
+    invlist_iterfinish(ssc->invlist);
+
+    if (! ret) {
+        return FALSE;
+    }
+
+    if (RExC_contains_locale && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+STATIC SV*
+S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
+                               const regnode_charclass* const node)
+{
+    /* Returns a mortal inversion list defining which code points are matched
+     * by 'node', which is of ANYOF-ish type .  Handles complementing the
+     * result if appropriate.  If some code points aren't knowable at this
+     * time, the returned list must, and will, contain every code point that is
+     * a possibility. */
+
+    SV* invlist = NULL;
+    SV* only_utf8_locale_invlist = NULL;
+    bool new_node_has_latin1 = FALSE;
+    const U8 flags = (REGNODE_TYPE(OP(node)) == ANYOF)
+                      ? ANYOF_FLAGS(node)
+                      : 0;
+
+    PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
+
+    /* Look at the data structure created by S_set_ANYOF_arg() */
+    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
+        invlist = sv_2mortal(_new_invlist(1));
+        invlist = _add_range_to_invlist(invlist, NUM_ANYOF_CODE_POINTS, UV_MAX);
+    }
+    else if (ANYOF_HAS_AUX(node)) {
+        const U32 n = ARG(node);
+        SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
+        AV * const av = MUTABLE_AV(SvRV(rv));
+        SV **const ary = AvARRAY(av);
+
+        if (av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
+
+            /* Here there are things that won't be known until runtime -- we
+             * have to assume it could be anything */
+            invlist = sv_2mortal(_new_invlist(1));
+            return _add_range_to_invlist(invlist, 0, UV_MAX);
+        }
+        else if (ary[INVLIST_INDEX]) {
+
+            /* Use the node's inversion list */
+            invlist = sv_2mortal(invlist_clone(ary[INVLIST_INDEX], NULL));
+        }
+
+        /* Get the code points valid only under UTF-8 locales */
+        if (   (flags & ANYOFL_FOLD)
+            &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
+        {
+            only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
+        }
+    }
+
+    if (! invlist) {
+        invlist = sv_2mortal(_new_invlist(0));
+    }
+
+    /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
+     * code points, and an inversion list for the others, but if there are code
+     * points that should match only conditionally on the target string being
+     * UTF-8, those are placed in the inversion list, and not the bitmap.
+     * Since there are circumstances under which they could match, they are
+     * included in the SSC.  But if the ANYOF node is to be inverted, we have
+     * to exclude them here, so that when we invert below, the end result
+     * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
+     * have to do this here before we add the unconditionally matched code
+     * points */
+    if (flags & ANYOF_INVERT) {
+        _invlist_intersection_complement_2nd(invlist,
+                                             PL_UpperLatin1,
+                                             &invlist);
+    }
+
+    /* Add in the points from the bit map */
+    if (REGNODE_TYPE(OP(node)) == ANYOF){
+        for (unsigned i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (ANYOF_BITMAP_TEST(node, i)) {
+                unsigned int start = i++;
+
+                for (;    i < NUM_ANYOF_CODE_POINTS
+                       && ANYOF_BITMAP_TEST(node, i); ++i)
+                {
+                    /* empty */
+                }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
+                new_node_has_latin1 = TRUE;
+            }
+        }
+    }
+
+    /* If this can match all upper Latin1 code points, have to add them
+     * as well.  But don't add them if inverting, as when that gets done below,
+     * it would exclude all these characters, including the ones it shouldn't
+     * that were added just above */
+    if ( ! (flags & ANYOF_INVERT)
+        &&  OP(node) == ANYOFD
+        && (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared))
+    {
+        _invlist_union(invlist, PL_UpperLatin1, &invlist);
+    }
+
+    /* Similarly for these */
+    if (ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
+        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
+    }
+
+    if (flags & ANYOF_INVERT) {
+        _invlist_invert(invlist);
+    }
+    else if (flags & ANYOFL_FOLD) {
+        if (new_node_has_latin1) {
+
+            /* These folds are potential in Turkic locales */
+            if (_invlist_contains_cp(invlist, 'i')) {
+                invlist = add_cp_to_invlist(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+            }
+            if (_invlist_contains_cp(invlist, 'I')) {
+                invlist = add_cp_to_invlist(invlist,
+                                                LATIN_SMALL_LETTER_DOTLESS_I);
+            }
+
+            /* Under /li, any 0-255 could fold to any other 0-255, depending on
+             * the locale.  We can skip this if there are no 0-255 at all. */
+            _invlist_union(invlist, PL_Latin1, &invlist);
+        }
+        else {
+            if (_invlist_contains_cp(invlist, LATIN_SMALL_LETTER_DOTLESS_I)) {
+                invlist = add_cp_to_invlist(invlist, 'I');
+            }
+            if (_invlist_contains_cp(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
+            {
+                invlist = add_cp_to_invlist(invlist, 'i');
+            }
+        }
+    }
+
+    /* Similarly add the UTF-8 locale possible matches.  These have to be
+     * deferred until after the non-UTF-8 locale ones are taken care of just
+     * above, or it leads to wrong results under ANYOF_INVERT */
+    if (only_utf8_locale_invlist) {
+        _invlist_union_maybe_complement_2nd(invlist,
+                                            only_utf8_locale_invlist,
+                                            flags & ANYOF_INVERT,
+                                            &invlist);
+    }
+
+    return invlist;
+}
+
+/* 'AND' a given class with another one.  Can create false positives.  'ssc'
+ * should not be inverted. */
+
+STATIC void
+S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
+                const regnode_charclass *and_with)
+{
+    /* Accumulate into SSC 'ssc' its 'AND' with 'and_with', which is either
+     * another SSC or a regular ANYOF class.  Can create false positives. */
+
+    SV* anded_cp_list;
+    U8  and_with_flags = (REGNODE_TYPE(OP(and_with)) == ANYOF)
+                          ? ANYOF_FLAGS(and_with)
+                          : 0;
+    U8  anded_flags;
+
+    PERL_ARGS_ASSERT_SSC_AND;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    /* 'and_with' is used as-is if it too is an SSC; otherwise have to extract
+     * the code point inversion list and just the relevant flags */
+    if (is_ANYOF_SYNTHETIC(and_with)) {
+        anded_cp_list = ((regnode_ssc *)and_with)->invlist;
+        anded_flags = and_with_flags;
+
+        /* XXX This is a kludge around what appears to be deficiencies in the
+         * optimizer.  If we make S_ssc_anything() add in the WARN_SUPER flag,
+         * there are paths through the optimizer where it doesn't get weeded
+         * out when it should.  And if we don't make some extra provision for
+         * it like the code just below, it doesn't get added when it should.
+         * This solution is to add it only when AND'ing, which is here, and
+         * only when what is being AND'ed is the pristine, original node
+         * matching anything.  Thus it is like adding it to ssc_anything() but
+         * only when the result is to be AND'ed.  Probably the same solution
+         * could be adopted for the same problem we have with /l matching,
+         * which is solved differently in S_ssc_init(), and that would lead to
+         * fewer false positives than that solution has.  But if this solution
+         * creates bugs, the consequences are only that a warning isn't raised
+         * that should be; while the consequences for having /l bugs is
+         * incorrect matches */
+        if (ssc_is_anything((regnode_ssc *)and_with)) {
+            anded_flags |= ANYOF_WARN_SUPER__shared;
+        }
+    }
+    else {
+        anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
+        if (OP(and_with) == ANYOFD) {
+            anded_flags = and_with_flags & ANYOF_COMMON_FLAGS;
+        }
+        else {
+            anded_flags = and_with_flags
+                            & ( ANYOF_COMMON_FLAGS
+                               |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
+                               |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
+            if (and_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
+                anded_flags &= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
+            }
+        }
+    }
+
+    ANYOF_FLAGS(ssc) &= anded_flags;
+
+    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
+     * C2 is the list of code points in 'and-with'; P2, its posix classes.
+     * 'and_with' may be inverted.  When not inverted, we have the situation of
+     * computing:
+     *  (C1 | P1) & (C2 | P2)
+     *                     =  (C1 & (C2 | P2)) | (P1 & (C2 | P2))
+     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
+     *                    <=  ((C1 & C2) |       P2)) | ( P1       | (P1 & P2))
+     *                    <=  ((C1 & C2) | P1 | P2)
+     * Alternatively, the last few steps could be:
+     *                     =  ((C1 & C2) | (C1 & P2)) | ((P1 & C2) | (P1 & P2))
+     *                    <=  ((C1 & C2) |  C1      ) | (      C2  | (P1 & P2))
+     *                    <=  (C1 | C2 | (P1 & P2))
+     * We favor the second approach if either P1 or P2 is non-empty.  This is
+     * because these components are a barrier to doing optimizations, as what
+     * they match cannot be known until the moment of matching as they are
+     * dependent on the current locale, 'AND"ing them likely will reduce or
+     * eliminate them.
+     * But we can do better if we know that C1,P1 are in their initial state (a
+     * frequent occurrence), each matching everything:
+     *  (<everything>) & (C2 | P2) =  C2 | P2
+     * Similarly, if C2,P2 are in their initial state (again a frequent
+     * occurrence), the result is a no-op
+     *  (C1 | P1) & (<everything>) =  C1 | P1
+     *
+     * Inverted, we have
+     *  (C1 | P1) & ~(C2 | P2)  =  (C1 | P1) & (~C2 & ~P2)
+     *                          =  (C1 & (~C2 & ~P2)) | (P1 & (~C2 & ~P2))
+     *                         <=  (C1 & ~C2) | (P1 & ~P2)
+     * */
+
+    if ((and_with_flags & ANYOF_INVERT)
+        && ! is_ANYOF_SYNTHETIC(and_with))
+    {
+        unsigned int i;
+
+        ssc_intersection(ssc,
+                         anded_cp_list,
+                         FALSE /* Has already been inverted */
+                         );
+
+        /* If either P1 or P2 is empty, the intersection will be also; can skip
+         * the loop */
+        if (! (and_with_flags & ANYOF_MATCHES_POSIXL)) {
+            ANYOF_POSIXL_ZERO(ssc);
+        }
+        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
+
+            /* Note that the Posix class component P from 'and_with' actually
+             * looks like:
+             *      P = Pa | Pb | ... | Pn
+             * where each component is one posix class, such as in [\w\s].
+             * Thus
+             *      ~P = ~(Pa | Pb | ... | Pn)
+             *         = ~Pa & ~Pb & ... & ~Pn
+             *        <= ~Pa | ~Pb | ... | ~Pn
+             * The last is something we can easily calculate, but unfortunately
+             * is likely to have many false positives.  We could do better
+             * in some (but certainly not all) instances if two classes in
+             * P have known relationships.  For example
+             *      :lower: <= :alpha: <= :alnum: <= \w <= :graph: <= :print:
+             * So
+             *      :lower: & :print: = :lower:
+             * And similarly for classes that must be disjoint.  For example,
+             * since \s and \w can have no elements in common based on rules in
+             * the POSIX standard,
+             *      \w & ^\S = nothing
+             * Unfortunately, some vendor locales do not meet the Posix
+             * standard, in particular almost everything by Microsoft.
+             * The loop below just changes e.g., \w into \W and vice versa */
+
+            regnode_charclass_posixl temp;
+            int add = 1;    /* To calculate the index of the complement */
+
+            Zero(&temp, 1, regnode_charclass_posixl);
+            ANYOF_POSIXL_ZERO(&temp);
+            for (i = 0; i < ANYOF_MAX; i++) {
+                assert(i % 2 != 0
+                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)
+                       || ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i + 1));
+
+                if (ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)) {
+                    ANYOF_POSIXL_SET(&temp, i + add);
+                }
+                add = 0 - add; /* 1 goes to -1; -1 goes to 1 */
+            }
+            ANYOF_POSIXL_AND(&temp, ssc);
+
+        } /* else ssc already has no posixes */
+    } /* else: Not inverted.  This routine is a no-op if 'and_with' is an SSC
+         in its initial state */
+    else if (! is_ANYOF_SYNTHETIC(and_with)
+             || ! ssc_is_cp_posixl_init(pRExC_state, (regnode_ssc *)and_with))
+    {
+        /* But if 'ssc' is in its initial state, the result is just 'and_with';
+         * copy it over 'ssc' */
+        if (ssc_is_cp_posixl_init(pRExC_state, ssc)) {
+            if (is_ANYOF_SYNTHETIC(and_with)) {
+                StructCopy(and_with, ssc, regnode_ssc);
+            }
+            else {
+                ssc->invlist = anded_cp_list;
+                ANYOF_POSIXL_ZERO(ssc);
+                if (and_with_flags & ANYOF_MATCHES_POSIXL) {
+                    ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
+                }
+            }
+        }
+        else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
+                 || (and_with_flags & ANYOF_MATCHES_POSIXL))
+        {
+            /* One or the other of P1, P2 is non-empty. */
+            if (and_with_flags & ANYOF_MATCHES_POSIXL) {
+                ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
+            }
+            ssc_union(ssc, anded_cp_list, FALSE);
+        }
+        else { /* P1 = P2 = empty */
+            ssc_intersection(ssc, anded_cp_list, FALSE);
+        }
+    }
+}
+
+STATIC void
+S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
+               const regnode_charclass *or_with)
+{
+    /* Accumulate into SSC 'ssc' its 'OR' with 'or_with', which is either
+     * another SSC or a regular ANYOF class.  Can create false positives if
+     * 'or_with' is to be inverted. */
+
+    SV* ored_cp_list;
+    U8 ored_flags;
+    U8  or_with_flags = (REGNODE_TYPE(OP(or_with)) == ANYOF)
+                         ? ANYOF_FLAGS(or_with)
+                         : 0;
+
+    PERL_ARGS_ASSERT_SSC_OR;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    /* 'or_with' is used as-is if it too is an SSC; otherwise have to extract
+     * the code point inversion list and just the relevant flags */
+    if (is_ANYOF_SYNTHETIC(or_with)) {
+        ored_cp_list = ((regnode_ssc*) or_with)->invlist;
+        ored_flags = or_with_flags;
+    }
+    else {
+        ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
+        ored_flags = or_with_flags & ANYOF_COMMON_FLAGS;
+        if (OP(or_with) != ANYOFD) {
+            ored_flags |=
+                or_with_flags & ( ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
+                                 |ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
+            if (or_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
+                ored_flags |= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
+            }
+        }
+    }
+
+    ANYOF_FLAGS(ssc) |= ored_flags;
+
+    /* Below, C1 is the list of code points in 'ssc'; P1, its posix classes.
+     * C2 is the list of code points in 'or-with'; P2, its posix classes.
+     * 'or_with' may be inverted.  When not inverted, we have the simple
+     * situation of computing:
+     *  (C1 | P1) | (C2 | P2)  =  (C1 | C2) | (P1 | P2)
+     * If P1|P2 yields a situation with both a class and its complement are
+     * set, like having both \w and \W, this matches all code points, and we
+     * can delete these from the P component of the ssc going forward.  XXX We
+     * might be able to delete all the P components, but I (khw) am not certain
+     * about this, and it is better to be safe.
+     *
+     * Inverted, we have
+     *  (C1 | P1) | ~(C2 | P2)  =  (C1 | P1) | (~C2 & ~P2)
+     *                         <=  (C1 | P1) | ~C2
+     *                         <=  (C1 | ~C2) | P1
+     * (which results in actually simpler code than the non-inverted case)
+     * */
+
+    if ((or_with_flags & ANYOF_INVERT)
+        && ! is_ANYOF_SYNTHETIC(or_with))
+    {
+        /* We ignore P2, leaving P1 going forward */
+    }   /* else  Not inverted */
+    else if (or_with_flags & ANYOF_MATCHES_POSIXL) {
+        ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
+        if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
+            unsigned int i;
+            for (i = 0; i < ANYOF_MAX; i += 2) {
+                if (ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i + 1))
+                {
+                    ssc_match_all_cp(ssc);
+                    ANYOF_POSIXL_CLEAR(ssc, i);
+                    ANYOF_POSIXL_CLEAR(ssc, i+1);
+                }
+            }
+        }
+    }
+
+    ssc_union(ssc,
+              ored_cp_list,
+              FALSE /* Already has been inverted */
+              );
+}
+
+STATIC void
+S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
+{
+    PERL_ARGS_ASSERT_SSC_UNION;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    _invlist_union_maybe_complement_2nd(ssc->invlist,
+                                        invlist,
+                                        invert2nd,
+                                        &ssc->invlist);
+}
+
+STATIC void
+S_ssc_intersection(pTHX_ regnode_ssc *ssc,
+                         SV* const invlist,
+                         const bool invert2nd)
+{
+    PERL_ARGS_ASSERT_SSC_INTERSECTION;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    _invlist_intersection_maybe_complement_2nd(ssc->invlist,
+                                               invlist,
+                                               invert2nd,
+                                               &ssc->invlist);
+}
+
+STATIC void
+S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
+{
+    PERL_ARGS_ASSERT_SSC_ADD_RANGE;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
+}
+
+STATIC void
+S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
+{
+    /* AND just the single code point 'cp' into the SSC 'ssc' */
+
+    SV* cp_list = _new_invlist(2);
+
+    PERL_ARGS_ASSERT_SSC_CP_AND;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    cp_list = add_cp_to_invlist(cp_list, cp);
+    ssc_intersection(ssc, cp_list,
+                     FALSE /* Not inverted */
+                     );
+    SvREFCNT_dec_NN(cp_list);
+}
+
+STATIC void
+S_ssc_clear_locale(regnode_ssc *ssc)
+{
+    /* Set the SSC 'ssc' to not match any locale things */
+    PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    ANYOF_POSIXL_ZERO(ssc);
+    ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
+}
+
+bool
+Perl_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
+{
+    /* The synthetic start class is used to hopefully quickly winnow down
+     * places where a pattern could start a match in the target string.  If it
+     * doesn't really narrow things down that much, there isn't much point to
+     * having the overhead of using it.  This function uses some very crude
+     * heuristics to decide if to use the ssc or not.
+     *
+     * It returns TRUE if 'ssc' rules out more than half what it considers to
+     * be the "likely" possible matches, but of course it doesn't know what the
+     * actual things being matched are going to be; these are only guesses
+     *
+     * For /l matches, it assumes that the only likely matches are going to be
+     *      in the 0-255 range, uniformly distributed, so half of that is 127
+     * For /a and /d matches, it assumes that the likely matches will be just
+     *      the ASCII range, so half of that is 63
+     * For /u and there isn't anything matching above the Latin1 range, it
+     *      assumes that that is the only range likely to be matched, and uses
+     *      half that as the cut-off: 127.  If anything matches above Latin1,
+     *      it assumes that all of Unicode could match (uniformly), except for
+     *      non-Unicode code points and things in the General Category "Other"
+     *      (unassigned, private use, surrogates, controls and formats).  This
+     *      is a much large number. */
+
+    U32 count = 0;      /* Running total of number of code points matched by
+                           'ssc' */
+    UV start, end;      /* Start and end points of current range in inversion
+                           XXX outdated.  UTF-8 locales are common, what about invert? list */
+    const U32 max_code_points = (LOC)
+                                ?  256
+                                : ((  ! UNI_SEMANTICS
+                                    ||  invlist_highest(ssc->invlist) < 256)
+                                  ? 128
+                                  : NON_OTHER_COUNT);
+    const U32 max_match = max_code_points / 2;
+
+    PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
+
+    invlist_iterinit(ssc->invlist);
+    while (invlist_iternext(ssc->invlist, &start, &end)) {
+        if (start >= max_code_points) {
+            break;
+        }
+        end = MIN(end, max_code_points - 1);
+        count += end - start + 1;
+        if (count >= max_match) {
+            invlist_iterfinish(ssc->invlist);
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+
+void
+Perl_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
+{
+    /* The inversion list in the SSC is marked mortal; now we need a more
+     * permanent copy, which is stored the same way that is done in a regular
+     * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
+     * map */
+
+    SV* invlist = invlist_clone(ssc->invlist, NULL);
+
+    PERL_ARGS_ASSERT_SSC_FINALIZE;
+
+    assert(is_ANYOF_SYNTHETIC(ssc));
+
+    /* The code in this file assumes that all but these flags aren't relevant
+     * to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
+     * by the time we reach here */
+    assert(! (ANYOF_FLAGS(ssc)
+        & ~( ANYOF_COMMON_FLAGS
+            |ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
+            |ANYOF_HAS_EXTRA_RUNTIME_MATCHES)));
+
+    populate_anyof_bitmap_from_invlist( (regnode *) ssc, &invlist);
+
+    set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
+    SvREFCNT_dec(invlist);
+
+    /* Make sure is clone-safe */
+    ssc->invlist = NULL;
+
+    if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
+        ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
+        OP(ssc) = ANYOFPOSIXL;
+    }
+    else if (RExC_contains_locale) {
+        OP(ssc) = ANYOFL;
+    }
+
+    assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
+}
+
+/* The below joins as many adjacent EXACTish nodes as possible into a single
+ * one.  The regop may be changed if the node(s) contain certain sequences that
+ * require special handling.  The joining is only done if:
+ * 1) there is room in the current conglomerated node to entirely contain the
+ *    next one.
+ * 2) they are compatible node types
+ *
+ * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
+ * these get optimized out
+ *
+ * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
+ * as possible, even if that means splitting an existing node so that its first
+ * part is moved to the preceding node.  This would maximise the efficiency of
+ * memEQ during matching.
+ *
+ * If a node is to match under /i (folded), the number of characters it matches
+ * can be different than its character length if it contains a multi-character
+ * fold.  *min_subtract is set to the total delta number of characters of the
+ * input nodes.
+ *
+ * And *unfolded_multi_char is set to indicate whether or not the node contains
+ * an unfolded multi-char fold.  This happens when it won't be known until
+ * runtime whether the fold is valid or not; namely
+ *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
+ *      target string being matched against turns out to be UTF-8 is that fold
+ *      valid; or
+ *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
+ *      runtime.
+ * (Multi-char folds whose components are all above the Latin1 range are not
+ * run-time locale dependent, and have already been folded by the time this
+ * function is called.)
+ *
+ * This is as good a place as any to discuss the design of handling these
+ * multi-character fold sequences.  It's been wrong in Perl for a very long
+ * time.  There are three code points in Unicode whose multi-character folds
+ * were long ago discovered to mess things up.  The previous designs for
+ * dealing with these involved assigning a special node for them.  This
+ * approach doesn't always work, as evidenced by this example:
+ *      "\xDFs" =~ /s\xDF/ui    # Used to fail before these patches
+ * Both sides fold to "sss", but if the pattern is parsed to create a node that
+ * would match just the \xDF, it won't be able to handle the case where a
+ * successful match would have to cross the node's boundary.  The new approach
+ * that hopefully generally solves the problem generates an EXACTFUP node
+ * that is "sss" in this case.
+ *
+ * It turns out that there are problems with all multi-character folds, and not
+ * just these three.  Now the code is general, for all such cases.  The
+ * approach taken is:
+ * 1)   This routine examines each EXACTFish node that could contain multi-
+ *      character folded sequences.  Since a single character can fold into
+ *      such a sequence, the minimum match length for this node is less than
+ *      the number of characters in the node.  This routine returns in
+ *      *min_subtract how many characters to subtract from the actual
+ *      length of the string to get a real minimum match length; it is 0 if
+ *      there are no multi-char foldeds.  This delta is used by the caller to
+ *      adjust the min length of the match, and the delta between min and max,
+ *      so that the optimizer doesn't reject these possibilities based on size
+ *      constraints.
+ *
+ * 2)   For the sequence involving the LATIN SMALL LETTER SHARP S (U+00DF)
+ *      under /u, we fold it to 'ss' in regatom(), and in this routine, after
+ *      joining, we scan for occurrences of the sequence 'ss' in non-UTF-8
+ *      EXACTFU nodes.  The node type of such nodes is then changed to
+ *      EXACTFUP, indicating it is problematic, and needs careful handling.
+ *      (The procedures in step 1) above are sufficient to handle this case in
+ *      UTF-8 encoded nodes.)  The reason this is problematic is that this is
+ *      the only case where there is a possible fold length change in non-UTF-8
+ *      patterns.  By reserving a special node type for problematic cases, the
+ *      far more common regular EXACTFU nodes can be processed faster.
+ *      regexec.c takes advantage of this.
+ *
+ *      EXACTFUP has been created as a grab-bag for (hopefully uncommon)
+ *      problematic cases.   These all only occur when the pattern is not
+ *      UTF-8.  In addition to the 'ss' sequence where there is a possible fold
+ *      length change, it handles the situation where the string cannot be
+ *      entirely folded.  The strings in an EXACTFish node are folded as much
+ *      as possible during compilation in regcomp.c.  This saves effort in
+ *      regex matching.  By using an EXACTFUP node when it is not possible to
+ *      fully fold at compile time, regexec.c can know that everything in an
+ *      EXACTFU node is folded, so folding can be skipped at runtime.  The only
+ *      case where folding in EXACTFU nodes can't be done at compile time is
+ *      the presumably uncommon MICRO SIGN, when the pattern isn't UTF-8.  This
+ *      is because its fold requires UTF-8 to represent.  Thus EXACTFUP nodes
+ *      handle two very different cases.  Alternatively, there could have been
+ *      a node type where there are length changes, one for unfolded, and one
+ *      for both.  If yet another special case needed to be created, the number
+ *      of required node types would have to go to 7.  khw figures that even
+ *      though there are plenty of node types to spare, that the maintenance
+ *      cost wasn't worth the small speedup of doing it that way, especially
+ *      since he thinks the MICRO SIGN is rarely encountered in practice.
+ *
+ *      There are other cases where folding isn't done at compile time, but
+ *      none of them are under /u, and hence not for EXACTFU nodes.  The folds
+ *      in EXACTFL nodes aren't known until runtime, and vary as the locale
+ *      changes.  Some folds in EXACTF depend on if the runtime target string
+ *      is UTF-8 or not.  (regatom() will create an EXACTFU node even under /di
+ *      when no fold in it depends on the UTF-8ness of the target string.)
+ *
+ * 3)   A problem remains for unfolded multi-char folds. (These occur when the
+ *      validity of the fold won't be known until runtime, and so must remain
+ *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
+ *      nodes when the pattern isn't in UTF-8.  (Note, BTW, that there cannot
+ *      be an EXACTF node with a UTF-8 pattern.)  They also occur for various
+ *      folds in EXACTFL nodes, regardless of the UTF-ness of the pattern.)
+ *      The reason this is a problem is that the optimizer part of regexec.c
+ *      (probably unwittingly, in Perl_regexec_flags()) makes an assumption
+ *      that a character in the pattern corresponds to at most a single
+ *      character in the target string.  (And I do mean character, and not byte
+ *      here, unlike other parts of the documentation that have never been
+ *      updated to account for multibyte Unicode.)  Sharp s in EXACTF and
+ *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
+ *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
+ *      EXACTFL nodes, violate the assumption, and they are the only instances
+ *      where it is violated.  I'm reluctant to try to change the assumption,
+ *      as the code involved is impenetrable to me (khw), so instead the code
+ *      here punts.  This routine examines EXACTFL nodes, and (when the pattern
+ *      isn't UTF-8) EXACTF and EXACTFAA for such unfolded folds, and returns a
+ *      boolean indicating whether or not the node contains such a fold.  When
+ *      it is true, the caller sets a flag that later causes the optimizer in
+ *      this file to not set values for the floating and fixed string lengths,
+ *      and thus avoids the optimizer code in regexec.c that makes the invalid
+ *      assumption.  Thus, there is no optimization based on string lengths for
+ *      EXACTFL nodes that contain these few folds, nor for non-UTF8-pattern
+ *      EXACTF and EXACTFAA nodes that contain the sharp s.  (The reason the
+ *      assumption is wrong only in these cases is that all other non-UTF-8
+ *      folds are 1-1; and, for UTF-8 patterns, we pre-fold all other folds to
+ *      their expanded versions.  (Again, we can't prefold sharp s to 'ss' in
+ *      EXACTF nodes because we don't know at compile time if it actually
+ *      matches 'ss' or not.  For EXACTF nodes it will match iff the target
+ *      string is in UTF-8.  This is in contrast to EXACTFU nodes, where it
+ *      always matches; and EXACTFAA where it never does.  In an EXACTFAA node
+ *      in a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
+ *      problem; but in a non-UTF8 pattern, folding it to that above-Latin1
+ *      string would require the pattern to be forced into UTF-8, the overhead
+ *      of which we want to avoid.  Similarly the unfolded multi-char folds in
+ *      EXACTFL nodes will match iff the locale at the time of match is a UTF-8
+ *      locale.)
+ *
+ *      Similarly, the code that generates tries doesn't currently handle
+ *      not-already-folded multi-char folds, and it looks like a pain to change
+ *      that.  Therefore, trie generation of EXACTFAA nodes with the sharp s
+ *      doesn't work.  Instead, such an EXACTFAA is turned into a new regnode,
+ *      EXACTFAA_NO_TRIE, which the trie code knows not to handle.  Most people
+ *      using /iaa matching will be doing so almost entirely with ASCII
+ *      strings, so this should rarely be encountered in practice */
+
+U32
+Perl_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
+                   UV *min_subtract, bool *unfolded_multi_char,
+                   U32 flags, regnode *val, U32 depth)
+{
+    /* Merge several consecutive EXACTish nodes into one. */
+
+    regnode *n = regnext(scan);
+    U32 stringok = 1;
+    regnode *next = REGNODE_AFTER_varies(scan);
+    U32 merged = 0;
+    U32 stopnow = 0;
+#ifdef DEBUGGING
+    regnode *stop = scan;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+#else
+    PERL_UNUSED_ARG(depth);
+#endif
+
+    PERL_ARGS_ASSERT_JOIN_EXACT;
+#ifndef EXPERIMENTAL_INPLACESCAN
+    PERL_UNUSED_ARG(flags);
+    PERL_UNUSED_ARG(val);
+#endif
+    DEBUG_PEEP("join", scan, depth, 0);
+
+    assert(REGNODE_TYPE(OP(scan)) == EXACT);
+
+    /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
+     * EXACT ones that are mergeable to the current one. */
+    while (    n
+           && (    REGNODE_TYPE(OP(n)) == NOTHING
+               || (stringok && REGNODE_TYPE(OP(n)) == EXACT))
+           && NEXT_OFF(n)
+           && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX)
+    {
+
+        if (OP(n) == TAIL || n > next)
+            stringok = 0;
+        if (REGNODE_TYPE(OP(n)) == NOTHING) {
+            DEBUG_PEEP("skip:", n, depth, 0);
+            NEXT_OFF(scan) += NEXT_OFF(n);
+            next = n + NODE_STEP_REGNODE;
+#ifdef DEBUGGING
+            if (stringok)
+                stop = n;
+#endif
+            n = regnext(n);
+        }
+        else if (stringok) {
+            const unsigned int oldl = STR_LEN(scan);
+            regnode * const nnext = regnext(n);
+
+            /* XXX I (khw) kind of doubt that this works on platforms (should
+             * Perl ever run on one) where U8_MAX is above 255 because of lots
+             * of other assumptions */
+            /* Don't join if the sum can't fit into a single node */
+            if (oldl + STR_LEN(n) > U8_MAX)
+                break;
+
+            /* Joining something that requires UTF-8 with something that
+             * doesn't, means the result requires UTF-8. */
+            if (OP(scan) == EXACT && (OP(n) == EXACT_REQ8)) {
+                OP(scan) = EXACT_REQ8;
+            }
+            else if (OP(scan) == EXACT_REQ8 && (OP(n) == EXACT)) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_REQ8)) {
+                OP(scan) = EXACTFU_REQ8;
+            }
+            else if ((OP(scan) == EXACTFU_REQ8) && (OP(n) == EXACTFU)) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
+
+                 /* Under /di, temporary EXACTFU_S_EDGE nodes are generated,
+                  * which can join with EXACTFU ones.  We check for this case
+                  * here.  These need to be resolved to either EXACTFU or
+                  * EXACTF at joining time.  They have nothing in them that
+                  * would forbid them from being the more desirable EXACTFU
+                  * nodes except that they begin and/or end with a single [Ss].
+                  * The reason this is problematic is because they could be
+                  * joined in this loop with an adjacent node that ends and/or
+                  * begins with [Ss] which would then form the sequence 'ss',
+                  * which matches differently under /di than /ui, in which case
+                  * EXACTFU can't be used.  If the 'ss' sequence doesn't get
+                  * formed, the nodes get absorbed into any adjacent EXACTFU
+                  * node.  And if the only adjacent node is EXACTF, they get
+                  * absorbed into that, under the theory that a longer node is
+                  * better than two shorter ones, even if one is EXACTFU.  Note
+                  * that EXACTFU_REQ8 is generated only for UTF-8 patterns,
+                  * and the EXACTFU_S_EDGE ones only for non-UTF-8.  */
+
+                if (STRING(n)[STR_LEN(n)-1] == 's') {
+
+                    /* Here the joined node would end with 's'.  If the node
+                     * following the combination is an EXACTF one, it's better to
+                     * join this trailing edge 's' node with that one, leaving the
+                     * current one in 'scan' be the more desirable EXACTFU */
+                    if (OP(nnext) == EXACTF) {
+                        break;
+                    }
+
+                    OP(scan) = EXACTFU_S_EDGE;
+
+                }   /* Otherwise, the beginning 's' of the 2nd node just
+                       becomes an interior 's' in 'scan' */
+            }
+            else if (OP(scan) == EXACTF && OP(n) == EXACTF) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
+
+                /* EXACTF nodes are compatible for joining with EXACTFU_S_EDGE
+                 * nodes.  But the latter nodes can be also joined with EXACTFU
+                 * ones, and that is a better outcome, so if the node following
+                 * 'n' is EXACTFU, quit now so that those two can be joined
+                 * later */
+                if (OP(nnext) == EXACTFU) {
+                    break;
+                }
+
+                /* The join is compatible, and the combined node will be
+                 * EXACTF.  (These don't care if they begin or end with 's' */
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
+                if (   STRING(scan)[STR_LEN(scan)-1] == 's'
+                    && STRING(n)[0] == 's')
+                {
+                    /* When combined, we have the sequence 'ss', which means we
+                     * have to remain /di */
+                    OP(scan) = EXACTF;
+                }
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
+                if (STRING(n)[0] == 's') {
+                    ;   /* Here the join is compatible and the combined node
+                           starts with 's', no need to change OP */
+                }
+                else {  /* Now the trailing 's' is in the interior */
+                    OP(scan) = EXACTFU;
+                }
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
+
+                /* The join is compatible, and the combined node will be
+                 * EXACTF.  (These don't care if they begin or end with 's' */
+                OP(scan) = EXACTF;
+            }
+            else if (OP(scan) != OP(n)) {
+
+                /* The only other compatible joinings are the same node type */
+                break;
+            }
+
+            DEBUG_PEEP("merg", n, depth, 0);
+            merged++;
+
+            next = REGNODE_AFTER_varies(n);
+            NEXT_OFF(scan) += NEXT_OFF(n);
+            assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
+            setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
+            /* Now we can overwrite *n : */
+            Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
+#ifdef DEBUGGING
+            stop = next - 1;
+#endif
+            n = nnext;
+            if (stopnow) break;
+        }
+
+#ifdef EXPERIMENTAL_INPLACESCAN
+        if (flags && !NEXT_OFF(n)) {
+            DEBUG_PEEP("atch", val, depth, 0);
+            if (REGNODE_OFF_BY_ARG(OP(n))) {
+                ARG_SET(n, val - n);
+            }
+            else {
+                NEXT_OFF(n) = val - n;
+            }
+            stopnow = 1;
+        }
+#endif
+    }
+
+    /* This temporary node can now be turned into EXACTFU, and must, as
+     * regexec.c doesn't handle it */
+    if (OP(scan) == EXACTFU_S_EDGE) {
+        OP(scan) = EXACTFU;
+    }
+
+    *min_subtract = 0;
+    *unfolded_multi_char = FALSE;
+
+    /* Here, all the adjacent mergeable EXACTish nodes have been merged.  We
+     * can now analyze for sequences of problematic code points.  (Prior to
+     * this final joining, sequences could have been split over boundaries, and
+     * hence missed).  The sequences only happen in folding, hence for any
+     * non-EXACT EXACTish node */
+    if (OP(scan) != EXACT && OP(scan) != EXACT_REQ8 && OP(scan) != EXACTL) {
+        U8* s0 = (U8*) STRING(scan);
+        U8* s = s0;
+        U8* s_end = s0 + STR_LEN(scan);
+
+        int total_count_delta = 0;  /* Total delta number of characters that
+                                       multi-char folds expand to */
+
+        /* One pass is made over the node's string looking for all the
+         * possibilities.  To avoid some tests in the loop, there are two main
+         * cases, for UTF-8 patterns (which can't have EXACTF nodes) and
+         * non-UTF-8 */
+        if (UTF) {
+            U8* folded = NULL;
+
+            if (OP(scan) == EXACTFL) {
+                U8 *d;
+
+                /* An EXACTFL node would already have been changed to another
+                 * node type unless there is at least one character in it that
+                 * is problematic; likely a character whose fold definition
+                 * won't be known until runtime, and so has yet to be folded.
+                 * For all but the UTF-8 locale, folds are 1-1 in length, but
+                 * to handle the UTF-8 case, we need to create a temporary
+                 * folded copy using UTF-8 locale rules in order to analyze it.
+                 * This is because our macros that look to see if a sequence is
+                 * a multi-char fold assume everything is folded (otherwise the
+                 * tests in those macros would be too complicated and slow).
+                 * Note that here, the non-problematic folds will have already
+                 * been done, so we can just copy such characters.  We actually
+                 * don't completely fold the EXACTFL string.  We skip the
+                 * unfolded multi-char folds, as that would just create work
+                 * below to figure out the size they already are */
+
+                Newx(folded, UTF8_MAX_FOLD_CHAR_EXPAND * STR_LEN(scan) + 1, U8);
+                d = folded;
+                while (s < s_end) {
+                    STRLEN s_len = UTF8SKIP(s);
+                    if (! is_PROBLEMATIC_LOCALE_FOLD_utf8(s)) {
+                        Copy(s, d, s_len, U8);
+                        d += s_len;
+                    }
+                    else if (is_FOLDS_TO_MULTI_utf8(s)) {
+                        *unfolded_multi_char = TRUE;
+                        Copy(s, d, s_len, U8);
+                        d += s_len;
+                    }
+                    else if (isASCII(*s)) {
+                        *(d++) = toFOLD(*s);
+                    }
+                    else {
+                        STRLEN len;
+                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
+                        d += len;
+                    }
+                    s += s_len;
+                }
+
+                /* Point the remainder of the routine to look at our temporary
+                 * folded copy */
+                s = folded;
+                s_end = d;
+            } /* End of creating folded copy of EXACTFL string */
+
+            /* Examine the string for a multi-character fold sequence.  UTF-8
+             * patterns have all characters pre-folded by the time this code is
+             * executed */
+            while (s < s_end - 1) /* Can stop 1 before the end, as minimum
+                                     length sequence we are looking for is 2 */
+            {
+                int count = 0;  /* How many characters in a multi-char fold */
+                int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
+                if (! len) {    /* Not a multi-char fold: get next char */
+                    s += UTF8SKIP(s);
+                    continue;
+                }
+
+                { /* Here is a generic multi-char fold. */
+                    U8* multi_end  = s + len;
+
+                    /* Count how many characters are in it.  In the case of
+                     * /aa, no folds which contain ASCII code points are
+                     * allowed, so check for those, and skip if found. */
+                    if (OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
+                        count = utf8_length(s, multi_end);
+                        s = multi_end;
+                    }
+                    else {
+                        while (s < multi_end) {
+                            if (isASCII(*s)) {
+                                s++;
+                                goto next_iteration;
+                            }
+                            else {
+                                s += UTF8SKIP(s);
+                            }
+                            count++;
+                        }
+                    }
+                }
+
+                /* The delta is how long the sequence is minus 1 (1 is how long
+                 * the character that folds to the sequence is) */
+                total_count_delta += count - 1;
+              next_iteration: ;
+            }
+
+            /* We created a temporary folded copy of the string in EXACTFL
+             * nodes.  Therefore we need to be sure it doesn't go below zero,
+             * as the real string could be shorter */
+            if (OP(scan) == EXACTFL) {
+                int total_chars = utf8_length((U8*) STRING(scan),
+                                           (U8*) STRING(scan) + STR_LEN(scan));
+                if (total_count_delta > total_chars) {
+                    total_count_delta = total_chars;
+                }
+            }
+
+            *min_subtract += total_count_delta;
+            Safefree(folded);
+        }
+        else if (OP(scan) == EXACTFAA) {
+
+            /* Non-UTF-8 pattern, EXACTFAA node.  There can't be a multi-char
+             * fold to the ASCII range (and there are no existing ones in the
+             * upper latin1 range).  But, as outlined in the comments preceding
+             * this function, we need to flag any occurrences of the sharp s.
+             * This character forbids trie formation (because of added
+             * complexity) */
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
+            while (s < s_end) {
+                if (*s == LATIN_SMALL_LETTER_SHARP_S) {
+                    OP(scan) = EXACTFAA_NO_TRIE;
+                    *unfolded_multi_char = TRUE;
+                    break;
+                }
+                s++;
+            }
+        }
+        else if (OP(scan) != EXACTFAA_NO_TRIE) {
+
+            /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
+             * folds that are all Latin1.  As explained in the comments
+             * preceding this function, we look also for the sharp s in EXACTF
+             * and EXACTFL nodes; it can be in the final position.  Otherwise
+             * we can stop looking 1 byte earlier because have to find at least
+             * two characters for a multi-fold */
+            const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
+                              ? s_end
+                              : s_end -1;
+
+            while (s < upper) {
+                int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
+                if (! len) {    /* Not a multi-char fold. */
+                    if (*s == LATIN_SMALL_LETTER_SHARP_S
+                        && (OP(scan) == EXACTF || OP(scan) == EXACTFL))
+                    {
+                        *unfolded_multi_char = TRUE;
+                    }
+                    s++;
+                    continue;
+                }
+
+                if (len == 2
+                    && isALPHA_FOLD_EQ(*s, 's')
+                    && isALPHA_FOLD_EQ(*(s+1), 's'))
+                {
+
+                    /* EXACTF nodes need to know that the minimum length
+                     * changed so that a sharp s in the string can match this
+                     * ss in the pattern, but they remain EXACTF nodes, as they
+                     * won't match this unless the target string is in UTF-8,
+                     * which we don't know until runtime.  EXACTFL nodes can't
+                     * transform into EXACTFU nodes */
+                    if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
+                        OP(scan) = EXACTFUP;
+                    }
+                }
+
+                *min_subtract += len - 1;
+                s += len;
+            }
+#endif
+        }
+    }
+
+#ifdef DEBUGGING
+    /* Allow dumping but overwriting the collection of skipped
+     * ops and/or strings with fake optimized ops */
+    n = REGNODE_AFTER_varies(scan);
+    while (n <= stop) {
+        OP(n) = OPTIMIZED;
+        FLAGS(n) = 0;
+        NEXT_OFF(n) = 0;
+        n++;
+    }
+#endif
+    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl", scan, depth, 0);});
+    return stopnow;
+}
+
+/* REx optimizer.  Converts nodes into quicker variants "in place".
+   Finds fixed substrings.  */
+
+
+/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
+   to the position after last scanned or to NULL. */
+
+/* the return from this sub is the minimum length that could possibly match */
+SSize_t
+Perl_study_chunk(pTHX_
+    RExC_state_t *pRExC_state,
+    regnode **scanp,        /* Start here (read-write). */
+    SSize_t *minlenp,       /* used for the minlen of substrings? */
+    SSize_t *deltap,        /* Write maxlen-minlen here. */
+    regnode *last,          /* Stop before this one. */
+    scan_data_t *data,      /* string data about the pattern */
+    I32 stopparen,          /* treat CLOSE-N as END, see GOSUB */
+    U32 recursed_depth,     /* how deep have we recursed via GOSUB */
+    regnode_ssc *and_withp, /* Valid if flags & SCF_DO_STCLASS_OR */
+    U32 flags,              /* flags controlling this call, see SCF_ flags */
+    U32 depth,              /* how deep have we recursed period */
+    bool was_mutate_ok      /* TRUE if in-place optimizations are allowed.
+                               FALSE only if the caller (recursively) was
+                               prohibited from modifying the regops, because
+                               a higher caller is holding a ptr to them. */
+)
+{
+    /* vars about the regnodes we are working with */
+    regnode *scan = *scanp; /* the current opcode we are inspecting */
+    regnode *next = NULL;   /* the next opcode beyond scan, tmp var */
+    regnode *first_non_open = scan; /* FIXME: should this init to NULL?
+                                       the first non open regop, if the init
+                                       val IS an OPEN then we will skip past
+                                       it just after the var decls section */
+    I32 code = 0;           /* temp var used to hold the optype of a regop */
+
+    /* vars about the min and max length of the pattern */
+    SSize_t min = 0;    /* min length of this part of the pattern */
+    SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
+                                         this is adjusted down if we find
+                                         an ACCEPT */
+    SSize_t delta = 0;  /* difference between min and max length
+                           (not accounting for stopmin) */
+
+    /* vars about capture buffers in the pattern */
+    I32 pars = 0;       /* count of OPEN opcodes */
+    I32 is_par = OP(scan) == OPEN ? PARNO(scan) : 0; /* is this op an OPEN? */
+
+    /* vars about whether this pattern contains something that can match
+     * infinitely long strings, eg, X* or X+ */
+    int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
+    int is_inf_internal = 0;            /* The studied chunk is infinite */
+
+    /* scan_data_t (struct) is used to hold information about the substrings
+     * and start class we have extracted from the string */
+    scan_data_t data_fake; /* temp var used for recursing in some cases */
+
+    SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
+                                   trie optimizations */
+
+    scan_frame *frame = NULL;  /* used as part of fake recursion */
+
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_STUDY_CHUNK;
+    RExC_study_started= 1;
+
+    Zero(&data_fake, 1, scan_data_t);
+
+    if ( depth == 0 ) {
+        while (first_non_open && OP(first_non_open) == OPEN)
+            first_non_open=regnext(first_non_open);
+    }
+
+  fake_study_recurse:
+    DEBUG_r(
+        RExC_study_chunk_recursed_count++;
+    );
+    DEBUG_OPTIMISE_MORE_r(
+    {
+        Perl_re_indentf( aTHX_  "study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
+            depth, (long)stopparen,
+            (unsigned long)RExC_study_chunk_recursed_count,
+            (unsigned long)depth, (unsigned long)recursed_depth,
+            scan,
+            last);
+        if (recursed_depth) {
+            U32 i;
+            U32 j;
+            for ( j = 0 ; j < recursed_depth ; j++ ) {
+                for ( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
+                    if (PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
+                        Perl_re_printf( aTHX_ " %d",(int)i);
+                        break;
+                    }
+                }
+                if ( j + 1 < recursed_depth ) {
+                    Perl_re_printf( aTHX_  ",");
+                }
+            }
+        }
+        Perl_re_printf( aTHX_ "\n");
+    }
+    );
+    while ( scan && OP(scan) != END && scan < last ){
+        UV min_subtract = 0;    /* How mmany chars to subtract from the minimum
+                                   node length to get a real minimum (because
+                                   the folded version may be shorter) */
+        bool unfolded_multi_char = FALSE;
+        /* avoid mutating ops if we are anywhere within the recursed or
+         * enframed handling for a GOSUB: the outermost level will handle it.
+         */
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
+        /* Peephole optimizer: */
+        DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
+        DEBUG_PEEP("Peep", scan, depth, flags);
+
+
+        /* The reason we do this here is that we need to deal with things like
+         * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT
+         * parsing code, as each (?:..) is handled by a different invocation of
+         * reg() -- Yves
+         */
+        if (REGNODE_TYPE(OP(scan)) == EXACT
+            && OP(scan) != LEXACT
+            && OP(scan) != LEXACT_REQ8
+            && mutate_ok
+        ) {
+            join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
+                    0, NULL, depth + 1);
+        }
+
+        /* Follow the next-chain of the current node and optimize
+           away all the NOTHINGs from it.
+         */
+        rck_elide_nothing(scan);
+
+        /* The principal pseudo-switch.  Cannot be a switch, since we look into
+         * several different things.  */
+        if ( OP(scan) == DEFINEP ) {
+            SSize_t minlen = 0;
+            SSize_t deltanext = 0;
+            SSize_t fake_last_close = 0;
+            regnode *fake_last_close_op = NULL;
+            U32 f = SCF_IN_DEFINE | (flags & SCF_TRIE_DOING_RESTUDY);
+
+            StructCopy(&zero_scan_data, &data_fake, scan_data_t);
+            scan = regnext(scan);
+            assert( OP(scan) == IFTHEN );
+            DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
+
+            data_fake.last_closep= &fake_last_close;
+            data_fake.last_close_opp= &fake_last_close_op;
+            minlen = *minlenp;
+            next = regnext(scan);
+            scan = REGNODE_AFTER_type(scan,tregnode_IFTHEN);
+            DEBUG_PEEP("scan", scan, depth, flags);
+            DEBUG_PEEP("next", next, depth, flags);
+
+            /* we suppose the run is continuous, last=next...
+             * NOTE we dont use the return here! */
+            /* DEFINEP study_chunk() recursion */
+            (void)study_chunk(pRExC_state, &scan, &minlen,
+                              &deltanext, next, &data_fake, stopparen,
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
+
+            scan = next;
+        } else
+        if (
+            OP(scan) == BRANCH  ||
+            OP(scan) == BRANCHJ ||
+            OP(scan) == IFTHEN
+        ) {
+            next = regnext(scan);
+            code = OP(scan);
+
+            /* The op(next)==code check below is to see if we
+             * have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN"
+             * IFTHEN is special as it might not appear in pairs.
+             * Not sure whether BRANCH-BRANCHJ is possible, regardless
+             * we dont handle it cleanly. */
+            if (OP(next) == code || code == IFTHEN) {
+                /* NOTE - There is similar code to this block below for
+                 * handling TRIE nodes on a re-study.  If you change stuff here
+                 * check there too. */
+                SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
+                regnode_ssc accum;
+                regnode * const startbranch=scan;
+
+                if (flags & SCF_DO_SUBSTR) {
+                    /* Cannot merge strings after this. */
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
+                }
+
+                if (flags & SCF_DO_STCLASS)
+                    ssc_init_zero(pRExC_state, &accum);
+
+                while (OP(scan) == code) {
+                    SSize_t deltanext, minnext, fake_last_close = 0;
+                    regnode *fake_last_close_op = NULL;
+                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
+                    regnode_ssc this_class;
+
+                    DEBUG_PEEP("Branch", scan, depth, flags);
+
+                    num++;
+                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
+                    if (data) {
+                        data_fake.whilem_c = data->whilem_c;
+                        data_fake.last_closep = data->last_closep;
+                        data_fake.last_close_opp = data->last_close_opp;
+                    }
+                    else {
+                        data_fake.last_closep = &fake_last_close;
+                        data_fake.last_close_opp = &fake_last_close_op;
+                    }
+
+                    data_fake.pos_delta = delta;
+                    next = regnext(scan);
+
+                    scan = REGNODE_AFTER_opcode(scan, code);
+
+                    if (flags & SCF_DO_STCLASS) {
+                        ssc_init(pRExC_state, &this_class);
+                        data_fake.start_class = &this_class;
+                        f |= SCF_DO_STCLASS_AND;
+                    }
+                    if (flags & SCF_WHILEM_VISITED_POS)
+                        f |= SCF_WHILEM_VISITED_POS;
+
+                    /* we suppose the run is continuous, last=next...*/
+                    /* recurse study_chunk() for each BRANCH in an alternation */
+                    minnext = study_chunk(pRExC_state, &scan, minlenp,
+                                      &deltanext, next, &data_fake, stopparen,
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
+
+                    if (min1 > minnext)
+                        min1 = minnext;
+                    if (deltanext == OPTIMIZE_INFTY) {
+                        is_inf = is_inf_internal = 1;
+                        max1 = OPTIMIZE_INFTY;
+                    } else if (max1 < minnext + deltanext)
+                        max1 = minnext + deltanext;
+                    scan = next;
+                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+                        pars++;
+                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
+                        if ( stopmin > minnext)
+                            stopmin = min + min1;
+                        flags &= ~SCF_DO_SUBSTR;
+                        if (data)
+                            data->flags |= SCF_SEEN_ACCEPT;
+                    }
+                    if (data) {
+                        if (data_fake.flags & SF_HAS_EVAL)
+                            data->flags |= SF_HAS_EVAL;
+                        data->whilem_c = data_fake.whilem_c;
+                    }
+                    if (flags & SCF_DO_STCLASS)
+                        ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+                    DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
+                }
+                if (code == IFTHEN && num < 2) /* Empty ELSE branch */
+                    min1 = 0;
+                if (flags & SCF_DO_SUBSTR) {
+                    data->pos_min += min1;
+                    if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
+                        data->pos_delta = OPTIMIZE_INFTY;
+                    else
+                        data->pos_delta += max1 - min1;
+                    if (max1 != min1 || is_inf)
+                        data->cur_is_floating = 1;
+                }
+                min += min1;
+                if (delta == OPTIMIZE_INFTY
+                 || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
+                    delta = OPTIMIZE_INFTY;
+                else
+                    delta += max1 - min1;
+                if (flags & SCF_DO_STCLASS_OR) {
+                    ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
+                    if (min1) {
+                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+                        flags &= ~SCF_DO_STCLASS;
+                    }
+                }
+                else if (flags & SCF_DO_STCLASS_AND) {
+                    if (min1) {
+                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
+                        flags &= ~SCF_DO_STCLASS;
+                    }
+                    else {
+                        /* Switch to OR mode: cache the old value of
+                         * data->start_class */
+                        INIT_AND_WITHP;
+                        StructCopy(data->start_class, and_withp, regnode_ssc);
+                        flags &= ~SCF_DO_STCLASS_AND;
+                        StructCopy(&accum, data->start_class, regnode_ssc);
+                        flags |= SCF_DO_STCLASS_OR;
+                    }
+                }
+                DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
+
+                if (PERL_ENABLE_TRIE_OPTIMISATION
+                    && OP(startbranch) == BRANCH
+                    && mutate_ok
+                ) {
+                /* demq.
+
+                   Assuming this was/is a branch we are dealing with: 'scan'
+                   now points at the item that follows the branch sequence,
+                   whatever it is. We now start at the beginning of the
+                   sequence and look for subsequences of
+
+                   BRANCH->EXACT=>x1
+                   BRANCH->EXACT=>x2
+                   tail
+
+                   which would be constructed from a pattern like
+                   /A|LIST|OF|WORDS/
+
+                   If we can find such a subsequence we need to turn the first
+                   element into a trie and then add the subsequent branch exact
+                   strings to the trie.
+
+                   We have two cases
+
+                     1. patterns where the whole set of branches can be
+                        converted.
+
+                     2. patterns where only a subset can be converted.
+
+                   In case 1 we can replace the whole set with a single regop
+                   for the trie. In case 2 we need to keep the start and end
+                   branches so
+
+                     'BRANCH EXACT; BRANCH EXACT; BRANCH X'
+                     becomes BRANCH TRIE; BRANCH X;
+
+                  There is an additional case, that being where there is a
+                  common prefix, which gets split out into an EXACT like node
+                  preceding the TRIE node.
+
+                  If X(1..n)==tail then we can do a simple trie, if not we make
+                  a "jump" trie, such that when we match the appropriate word
+                  we "jump" to the appropriate tail node. Essentially we turn
+                  a nested if into a case structure of sorts.
+
+                */
+
+                    int made=0;
+                    if (!re_trie_maxbuff) {
+                        re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+                        if (!SvIOK(re_trie_maxbuff))
+                            sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
+                    }
+                    if ( SvIV(re_trie_maxbuff)>=0  ) {
+                        regnode *cur;
+                        regnode *first = (regnode *)NULL;
+                        regnode *prev = (regnode *)NULL;
+                        regnode *tail = scan;
+                        U8 trietype = 0;
+                        U32 count=0;
+
+                        /* var tail is used because there may be a TAIL
+                           regop in the way. Ie, the exacts will point to the
+                           thing following the TAIL, but the last branch will
+                           point at the TAIL. So we advance tail. If we
+                           have nested (?:) we may have to move through several
+                           tails.
+                         */
+
+                        while ( OP( tail ) == TAIL ) {
+                            /* this is the TAIL generated by (?:) */
+                            tail = regnext( tail );
+                        }
+
+
+                        DEBUG_TRIE_COMPILE_r({
+                            regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
+                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
+                              depth+1,
+                              "Looking for TRIE'able sequences. Tail node is ",
+                              (UV) REGNODE_OFFSET(tail),
+                              SvPV_nolen_const( RExC_mysv )
+                            );
+                        });
+
+                        /*
+
+                            Step through the branches
+                                cur represents each branch,
+                                noper is the first thing to be matched as part
+                                      of that branch
+                                noper_next is the regnext() of that node.
+
+                            We normally handle a case like this
+                            /FOO[xyz]|BAR[pqr]/ via a "jump trie" but we also
+                            support building with NOJUMPTRIE, which restricts
+                            the trie logic to structures like /FOO|BAR/.
+
+                            If noper is a trieable nodetype then the branch is
+                            a possible optimization target. If we are building
+                            under NOJUMPTRIE then we require that noper_next is
+                            the same as scan (our current position in the regex
+                            program).
+
+                            Once we have two or more consecutive such branches
+                            we can create a trie of the EXACT's contents and
+                            stitch it in place into the program.
+
+                            If the sequence represents all of the branches in
+                            the alternation we replace the entire thing with a
+                            single TRIE node.
+
+                            Otherwise when it is a subsequence we need to
+                            stitch it in place and replace only the relevant
+                            branches. This means the first branch has to remain
+                            as it is used by the alternation logic, and its
+                            next pointer, and needs to be repointed at the item
+                            on the branch chain following the last branch we
+                            have optimized away.
+
+                            This could be either a BRANCH, in which case the
+                            subsequence is internal, or it could be the item
+                            following the branch sequence in which case the
+                            subsequence is at the end (which does not
+                            necessarily mean the first node is the start of the
+                            alternation).
+
+                            TRIE_TYPE(X) is a define which maps the optype to a
+                            trietype.
+
+                                optype          |  trietype
+                                ----------------+-----------
+                                NOTHING         | NOTHING
+                                EXACT           | EXACT
+                                EXACT_REQ8      | EXACT
+                                EXACTFU         | EXACTFU
+                                EXACTFU_REQ8    | EXACTFU
+                                EXACTFUP        | EXACTFU
+                                EXACTFAA        | EXACTFAA
+                                EXACTL          | EXACTL
+                                EXACTFLU8       | EXACTFLU8
+
+
+                        */
+#define TRIE_TYPE(X) ( ( NOTHING == (X) )                                   \
+                       ? NOTHING                                            \
+                       : ( EXACT == (X) || EXACT_REQ8 == (X) )             \
+                         ? EXACT                                            \
+                         : (     EXACTFU == (X)                             \
+                              || EXACTFU_REQ8 == (X)                       \
+                              || EXACTFUP == (X) )                          \
+                           ? EXACTFU                                        \
+                           : ( EXACTFAA == (X) )                            \
+                             ? EXACTFAA                                     \
+                             : ( EXACTL == (X) )                            \
+                               ? EXACTL                                     \
+                               : ( EXACTFLU8 == (X) )                       \
+                                 ? EXACTFLU8                                \
+                                 : 0 )
+
+                        /* dont use tail as the end marker for this traverse */
+                        for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
+                            regnode * const noper = REGNODE_AFTER( cur );
+                            U8 noper_type = OP( noper );
+                            U8 noper_trietype = TRIE_TYPE( noper_type );
+#if defined(DEBUGGING) || defined(NOJUMPTRIE)
+                            regnode * const noper_next = regnext( noper );
+                            U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
+                            U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
+#endif
+
+                            DEBUG_TRIE_COMPILE_r({
+                                regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
+                                Perl_re_indentf( aTHX_  "- %d:%s (%d)",
+                                   depth+1,
+                                   REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
+
+                                regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
+                                Perl_re_printf( aTHX_  " -> %d:%s",
+                                    REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
+
+                                if ( noper_next ) {
+                                  regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
+                                  Perl_re_printf( aTHX_ "\t=> %d:%s\t",
+                                    REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
+                                }
+                                Perl_re_printf( aTHX_  "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
+                                   REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
+                                   REGNODE_NAME(trietype), REGNODE_NAME(noper_trietype), REGNODE_NAME(noper_next_trietype)
+                                );
+                            });
+
+                            /* Is noper a trieable nodetype that can be merged
+                             * with the current trie (if there is one)? */
+                            if ( noper_trietype
+                                  &&
+                                  (
+                                        ( noper_trietype == NOTHING )
+                                        || ( trietype == NOTHING )
+                                        || ( trietype == noper_trietype )
+                                  )
+#ifdef NOJUMPTRIE
+                                  && noper_next >= tail
+#endif
+                                  && count < U16_MAX)
+                            {
+                                /* Handle mergable triable node Either we are
+                                 * the first node in a new trieable sequence,
+                                 * in which case we do some bookkeeping,
+                                 * otherwise we update the end pointer. */
+                                if ( !first ) {
+                                    first = cur;
+                                    if ( noper_trietype == NOTHING ) {
+#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
+                                        regnode * const noper_next = regnext( noper );
+                                        U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
+                                        U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
+#endif
+
+                                        if ( noper_next_trietype ) {
+                                            trietype = noper_next_trietype;
+                                        } else if (noper_next_type)  {
+                                            /* a NOTHING regop is 1 regop wide.
+                                             * We need at least two for a trie
+                                             * so we can't merge this in */
+                                            first = NULL;
+                                        }
+                                    } else {
+                                        trietype = noper_trietype;
+                                    }
+                                } else {
+                                    if ( trietype == NOTHING )
+                                        trietype = noper_trietype;
+                                    prev = cur;
+                                }
+                                if (first)
+                                    count++;
+                            } /* end handle mergable triable node */
+                            else {
+                                /* handle unmergable node -
+                                 * noper may either be a triable node which can
+                                 * not be tried together with the current trie,
+                                 * or a non triable node */
+                                if ( prev ) {
+                                    /* If last is set and trietype is not
+                                     * NOTHING then we have found at least two
+                                     * triable branch sequences in a row of a
+                                     * similar trietype so we can turn them
+                                     * into a trie. If/when we allow NOTHING to
+                                     * start a trie sequence this condition
+                                     * will be required, and it isn't expensive
+                                     * so we leave it in for now. */
+                                    if ( trietype && trietype != NOTHING )
+                                        make_trie( pRExC_state,
+                                                startbranch, first, cur, tail,
+                                                count, trietype, depth+1 );
+                                    prev = NULL; /* note: we clear/update
+                                                    first, trietype etc below,
+                                                    so we dont do it here */
+                                }
+                                if ( noper_trietype
+#ifdef NOJUMPTRIE
+                                     && noper_next >= tail
+#endif
+                                ){
+                                    /* noper is triable, so we can start a new
+                                     * trie sequence */
+                                    count = 1;
+                                    first = cur;
+                                    trietype = noper_trietype;
+                                } else if (first) {
+                                    /* if we already saw a first but the
+                                     * current node is not triable then we have
+                                     * to reset the first information. */
+                                    count = 0;
+                                    first = NULL;
+                                    trietype = 0;
+                                }
+                            } /* end handle unmergable node */
+                        } /* loop over branches */
+                        DEBUG_TRIE_COMPILE_r({
+                            regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
+                            Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
+                              depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
+                            Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
+                               REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
+                               REGNODE_NAME(trietype)
+                            );
+
+                        });
+                        if ( prev && trietype ) {
+                            if ( trietype != NOTHING ) {
+                                /* the last branch of the sequence was part of
+                                 * a trie, so we have to construct it here
+                                 * outside of the loop */
+                                made= make_trie( pRExC_state, startbranch,
+                                                 first, scan, tail, count,
+                                                 trietype, depth+1 );
+#ifdef TRIE_STUDY_OPT
+                                if ( ((made == MADE_EXACT_TRIE &&
+                                     startbranch == first)
+                                     || ( first_non_open == first )) &&
+                                     depth==0 ) {
+                                    flags |= SCF_TRIE_RESTUDY;
+                                    if ( startbranch == first
+                                         && scan >= tail )
+                                    {
+                                        RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN;
+                                    }
+                                }
+#endif
+                            } else {
+                                /* at this point we know whatever we have is a
+                                 * NOTHING sequence/branch AND if 'startbranch'
+                                 * is 'first' then we can turn the whole thing
+                                 * into a NOTHING
+                                 */
+                                if ( startbranch == first ) {
+                                    regnode *opt;
+                                    /* the entire thing is a NOTHING sequence,
+                                     * something like this: (?:|) So we can
+                                     * turn it into a plain NOTHING op. */
+                                    DEBUG_TRIE_COMPILE_r({
+                                        regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
+                                        Perl_re_indentf( aTHX_  "- %s (%d) <NOTHING BRANCH SEQUENCE>\n",
+                                          depth+1,
+                                          SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
+
+                                    });
+                                    OP(startbranch)= NOTHING;
+                                    NEXT_OFF(startbranch)= tail - startbranch;
+                                    for ( opt= startbranch + 1; opt < tail ; opt++ )
+                                        OP(opt)= OPTIMIZED;
+                                }
+                            }
+                        } /* end if ( prev) */
+                    } /* TRIE_MAXBUF is non zero */
+                } /* do trie */
+                DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
+            }
+            else
+                scan = REGNODE_AFTER_opcode(scan,code);
+            continue;
+        } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
+            I32 paren = 0;
+            regnode *start = NULL;
+            regnode *end = NULL;
+            U32 my_recursed_depth= recursed_depth;
+
+            if (OP(scan) != SUSPEND) { /* GOSUB */
+                /* Do setup, note this code has side effects beyond
+                 * the rest of this block. Specifically setting
+                 * RExC_recurse[] must happen at least once during
+                 * study_chunk(). */
+                paren = ARG(scan);
+                RExC_recurse[ARG2L(scan)] = scan;
+                start = REGNODE_p(RExC_open_parens[paren]);
+                end   = REGNODE_p(RExC_close_parens[paren]);
+
+                /* NOTE we MUST always execute the above code, even
+                 * if we do nothing with a GOSUB */
+                if (
+                    ( flags & SCF_IN_DEFINE )
+                    ||
+                    (
+                        (is_inf_internal || is_inf || (data && data->flags & SF_IS_INF))
+                        &&
+                        ( (flags & (SCF_DO_STCLASS | SCF_DO_SUBSTR)) == 0 )
+                    )
+                ) {
+                    /* no need to do anything here if we are in a define. */
+                    /* or we are after some kind of infinite construct
+                     * so we can skip recursing into this item.
+                     * Since it is infinite we will not change the maxlen
+                     * or delta, and if we miss something that might raise
+                     * the minlen it will merely pessimise a little.
+                     *
+                     * Iow /(?(DEFINE)(?<foo>foo|food))a+(?&foo)/
+                     * might result in a minlen of 1 and not of 4,
+                     * but this doesn't make us mismatch, just try a bit
+                     * harder than we should.
+                     *
+                     * However we must assume this GOSUB is infinite, to
+                     * avoid wrongly applying other optimizations in the
+                     * enclosing scope - see GH 18096, for example.
+                     */
+                    is_inf = is_inf_internal = 1;
+                    scan= regnext(scan);
+                    continue;
+                }
+
+                if (
+                    !recursed_depth
+                    || !PAREN_TEST(recursed_depth - 1, paren)
+                ) {
+                    /* it is quite possible that there are more efficient ways
+                     * to do this. We maintain a bitmap per level of recursion
+                     * of which patterns we have entered so we can detect if a
+                     * pattern creates a possible infinite loop. When we
+                     * recurse down a level we copy the previous levels bitmap
+                     * down. When we are at recursion level 0 we zero the top
+                     * level bitmap. It would be nice to implement a different
+                     * more efficient way of doing this. In particular the top
+                     * level bitmap may be unnecessary.
+                     */
+                    if (!recursed_depth) {
+                        Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
+                    } else {
+                        Copy(PAREN_OFFSET(recursed_depth - 1),
+                             PAREN_OFFSET(recursed_depth),
+                             RExC_study_chunk_recursed_bytes, U8);
+                    }
+                    /* we havent recursed into this paren yet, so recurse into it */
+                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
+                    PAREN_SET(recursed_depth, paren);
+                    my_recursed_depth= recursed_depth + 1;
+                } else {
+                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
+                    /* some form of infinite recursion, assume infinite length
+                     * */
+                    if (flags & SCF_DO_SUBSTR) {
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                        data->cur_is_floating = 1;
+                    }
+                    is_inf = is_inf_internal = 1;
+                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+                        ssc_anything(data->start_class);
+                    flags &= ~SCF_DO_STCLASS;
+
+                    start= NULL; /* reset start so we dont recurse later on. */
+                }
+            } else {
+                paren = stopparen;
+                start = scan + 2;
+                end = regnext(scan);
+            }
+            if (start) {
+                scan_frame *newframe;
+                assert(end);
+                if (!RExC_frame_last) {
+                    Newxz(newframe, 1, scan_frame);
+                    SAVEDESTRUCTOR_X(S_unwind_scan_frames, newframe);
+                    RExC_frame_head= newframe;
+                    RExC_frame_count++;
+                } else if (!RExC_frame_last->next_frame) {
+                    Newxz(newframe, 1, scan_frame);
+                    RExC_frame_last->next_frame= newframe;
+                    newframe->prev_frame= RExC_frame_last;
+                    RExC_frame_count++;
+                } else {
+                    newframe= RExC_frame_last->next_frame;
+                }
+                RExC_frame_last= newframe;
+
+                newframe->next_regnode = regnext(scan);
+                newframe->last_regnode = last;
+                newframe->stopparen = stopparen;
+                newframe->prev_recursed_depth = recursed_depth;
+                newframe->this_prev_frame= frame;
+                newframe->in_gosub = (
+                    (frame && frame->in_gosub) || OP(scan) == GOSUB
+                );
+
+                DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
+                DEBUG_PEEP("fnew", scan, depth, flags);
+
+                frame = newframe;
+                scan =  start;
+                stopparen = paren;
+                last = end;
+                depth = depth + 1;
+                recursed_depth= my_recursed_depth;
+
+                continue;
+            }
+        }
+        else if (REGNODE_TYPE(OP(scan)) == EXACT && ! isEXACTFish(OP(scan))) {
+            SSize_t bytelen = STR_LEN(scan), charlen;
+            UV uc;
+            assert(bytelen);
+            if (UTF) {
+                const U8 * const s = (U8*)STRING(scan);
+                uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+                charlen = utf8_length(s, s + bytelen);
+            } else {
+                uc = *((U8*)STRING(scan));
+                charlen = bytelen;
+            }
+            min += charlen;
+            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
+                /* The code below prefers earlier match for fixed
+                   offset, later match for variable offset.  */
+                if (data->last_end == -1) { /* Update the start info. */
+                    data->last_start_min = data->pos_min;
+                    data->last_start_max =
+                        is_inf ? OPTIMIZE_INFTY
+                        : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
+                            ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
+                }
+                sv_catpvn(data->last_found, STRING(scan), bytelen);
+                if (UTF)
+                    SvUTF8_on(data->last_found);
+                {
+                    SV * const sv = data->last_found;
+                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
+                    if (mg && mg->mg_len >= 0)
+                        mg->mg_len += charlen;
+                }
+                data->last_end = data->pos_min + charlen;
+                data->pos_min += charlen; /* As in the first entry. */
+                data->flags &= ~SF_BEFORE_EOL;
+            }
+
+            /* ANDing the code point leaves at most it, and not in locale, and
+             * can't match null string */
+            if (flags & SCF_DO_STCLASS_AND) {
+                ssc_cp_and(data->start_class, uc);
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+                ssc_clear_locale(data->start_class);
+            }
+            else if (flags & SCF_DO_STCLASS_OR) {
+                ssc_add_cp(data->start_class, uc);
+                ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+
+                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+            }
+            flags &= ~SCF_DO_STCLASS;
+            DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
+        }
+        else if (REGNODE_TYPE(OP(scan)) == EXACT) {
+            /* But OP != EXACT!, so is EXACTFish */
+            SSize_t bytelen = STR_LEN(scan), charlen;
+            const U8 * s = (U8*)STRING(scan);
+
+            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
+             * with the mask set to the complement of the bit that differs
+             * between upper and lower case, and the lowest code point of the
+             * pair (which the '&' forces) */
+            if (     bytelen == 1
+                &&   isALPHA_A(*s)
+                &&  (         OP(scan) == EXACTFAA
+                     || (     OP(scan) == EXACTFU
+                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
+                &&   mutate_ok
+            ) {
+                U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
+
+                OP(scan) = ANYOFM;
+                ARG_SET(scan, *s & mask);
+                FLAGS(scan) = mask;
+                /* We're not EXACTFish any more, so restudy.
+                 * Search for "restudy" in this file to find
+                 * a comment with details. */
+                continue;
+            }
+
+            /* Search for fixed substrings supports EXACT only. */
+            if (flags & SCF_DO_SUBSTR) {
+                assert(data);
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+            }
+            charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
+            if (unfolded_multi_char) {
+                RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
+            }
+            min += charlen - min_subtract;
+            assert (min >= 0);
+            if ((SSize_t)min_subtract < OPTIMIZE_INFTY
+                && delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
+            ) {
+                delta += min_subtract;
+            } else {
+                delta = OPTIMIZE_INFTY;
+            }
+            if (flags & SCF_DO_SUBSTR) {
+                data->pos_min += charlen - min_subtract;
+                if (data->pos_min < 0) {
+                    data->pos_min = 0;
+                }
+                if ((SSize_t)min_subtract < OPTIMIZE_INFTY
+                    && data->pos_delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
+                ) {
+                    data->pos_delta += min_subtract;
+                } else {
+                    data->pos_delta = OPTIMIZE_INFTY;
+                }
+                if (min_subtract) {
+                    data->cur_is_floating = 1; /* float */
+                }
+            }
+
+            if (flags & SCF_DO_STCLASS) {
+                SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
+
+                assert(EXACTF_invlist);
+                if (flags & SCF_DO_STCLASS_AND) {
+                    if (OP(scan) != EXACTFL)
+                        ssc_clear_locale(data->start_class);
+                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+                    ANYOF_POSIXL_ZERO(data->start_class);
+                    ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
+                }
+                else {  /* SCF_DO_STCLASS_OR */
+                    ssc_union(data->start_class, EXACTF_invlist, FALSE);
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+
+                    /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
+                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+                }
+                flags &= ~SCF_DO_STCLASS;
+                SvREFCNT_dec(EXACTF_invlist);
+            }
+            DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
+        }
+        else if (REGNODE_VARIES(OP(scan))) {
+            SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
+            I32 fl = 0;
+            U32 f = flags;
+            regnode * const oscan = scan;
+            regnode_ssc this_class;
+            regnode_ssc *oclass = NULL;
+            I32 next_is_eval = 0;
+
+            switch (REGNODE_TYPE(OP(scan))) {
+            case WHILEM:                /* End of (?:...)* . */
+                scan = REGNODE_AFTER(scan);
+                goto finish;
+            case PLUS:
+                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
+                    next = REGNODE_AFTER(scan);
+                    if (   (     REGNODE_TYPE(OP(next)) == EXACT
+                            && ! isEXACTFish(OP(next)))
+                        || (flags & SCF_DO_STCLASS))
+                    {
+                        mincount = 1;
+                        maxcount = REG_INFTY;
+                        next = regnext(scan);
+                        scan = REGNODE_AFTER(scan);
+                        goto do_curly;
+                    }
+                }
+                if (flags & SCF_DO_SUBSTR)
+                    data->pos_min++;
+                /* This will bypass the formal 'min += minnext * mincount'
+                 * calculation in the do_curly path, so assumes min width
+                 * of the PLUS payload is exactly one. */
+                min++;
+                /* FALLTHROUGH */
+            case STAR:
+                next = REGNODE_AFTER(scan);
+
+                /* This temporary node can now be turned into EXACTFU, and
+                 * must, as regexec.c doesn't handle it */
+                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
+                    OP(next) = EXACTFU;
+                }
+
+                if (     STR_LEN(next) == 1
+                    &&   isALPHA_A(* STRING(next))
+                    && (         OP(next) == EXACTFAA
+                        || (     OP(next) == EXACTFU
+                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
+                    &&   mutate_ok
+                ) {
+                    /* These differ in just one bit */
+                    U8 mask = ~ ('A' ^ 'a');
+
+                    assert(isALPHA_A(* STRING(next)));
+
+                    /* Then replace it by an ANYOFM node, with
+                    * the mask set to the complement of the
+                    * bit that differs between upper and lower
+                    * case, and the lowest code point of the
+                    * pair (which the '&' forces) */
+                    OP(next) = ANYOFM;
+                    ARG_SET(next, *STRING(next) & mask);
+                    FLAGS(next) = mask;
+                }
+
+                if (flags & SCF_DO_STCLASS) {
+                    mincount = 0;
+                    maxcount = REG_INFTY;
+                    next = regnext(scan);
+                    scan = REGNODE_AFTER(scan);
+                    goto do_curly;
+                }
+                if (flags & SCF_DO_SUBSTR) {
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
+                    /* Cannot extend fixed substrings */
+                    data->cur_is_floating = 1; /* float */
+                }
+                is_inf = is_inf_internal = 1;
+                scan = regnext(scan);
+                goto optimize_curly_tail;
+            case CURLY:
+                if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
+                    && (scan->flags == stopparen))
+                {
+                    mincount = 1;
+                    maxcount = 1;
+                } else {
+                    mincount = ARG1(scan);
+                    maxcount = ARG2(scan);
+                }
+                next = regnext(scan);
+                if (OP(scan) == CURLYX) {
+                    I32 lp = (data ? *(data->last_closep) : 0);
+                    scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
+                }
+                scan = REGNODE_AFTER(scan);
+                next_is_eval = (OP(scan) == EVAL);
+              do_curly:
+                if (flags & SCF_DO_SUBSTR) {
+                    if (mincount == 0)
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                    /* Cannot extend fixed substrings */
+                    pos_before = data->pos_min;
+                }
+                if (data) {
+                    fl = data->flags;
+                    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
+                    if (is_inf)
+                        data->flags |= SF_IS_INF;
+                }
+                if (flags & SCF_DO_STCLASS) {
+                    ssc_init(pRExC_state, &this_class);
+                    oclass = data->start_class;
+                    data->start_class = &this_class;
+                    f |= SCF_DO_STCLASS_AND;
+                    f &= ~SCF_DO_STCLASS_OR;
+                }
+                /* Exclude from super-linear cache processing any {n,m}
+                   regops for which the combination of input pos and regex
+                   pos is not enough information to determine if a match
+                   will be possible.
+
+                   For example, in the regex /foo(bar\s*){4,8}baz/ with the
+                   regex pos at the \s*, the prospects for a match depend not
+                   only on the input position but also on how many (bar\s*)
+                   repeats into the {4,8} we are. */
+               if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY))
+                    f &= ~SCF_WHILEM_VISITED_POS;
+
+                /* This will finish on WHILEM, setting scan, or on NULL: */
+                /* recurse study_chunk() on loop bodies */
+                minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
+                                  last, data, stopparen, recursed_depth, NULL,
+                                  (mincount == 0
+                                   ? (f & ~SCF_DO_SUBSTR)
+                                   : f)
+                                  , depth+1, mutate_ok);
+
+                if (data && data->flags & SCF_SEEN_ACCEPT) {
+                    if (mincount > 1)
+                        mincount = 1;
+                }
+
+                if (flags & SCF_DO_STCLASS)
+                    data->start_class = oclass;
+                if (mincount == 0 || minnext == 0) {
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        /* Switch to OR mode: cache the old value of
+                         * data->start_class */
+                        INIT_AND_WITHP;
+                        StructCopy(data->start_class, and_withp, regnode_ssc);
+                        flags &= ~SCF_DO_STCLASS_AND;
+                        StructCopy(&this_class, data->start_class, regnode_ssc);
+                        flags |= SCF_DO_STCLASS_OR;
+                        ANYOF_FLAGS(data->start_class)
+                                                |= SSC_MATCHES_EMPTY_STRING;
+                    }
+                } else {                /* Non-zero len */
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND)
+                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+                    flags &= ~SCF_DO_STCLASS;
+                }
+                if (!scan)              /* It was not CURLYX, but CURLY. */
+                    scan = next;
+                if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
+                    /* ? quantifier ok, except for (?{ ... }) */
+                    && (next_is_eval || !(mincount == 0 && maxcount == 1))
+                    && (minnext == 0) && (deltanext == 0)
+                    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
+                    && maxcount <= REG_INFTY/3) /* Complement check for big
+                                                   count */
+                {
+                    _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
+                        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+                            "Quantifier unexpected on zero-length expression "
+                            "in regex m/%" UTF8f "/",
+                             UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
+                                  RExC_precomp)));
+                }
+
+                if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
+                    || min >= SSize_t_MAX - minnext * mincount )
+                {
+                    FAIL("Regexp out of space");
+                }
+
+                min += minnext * mincount;
+                is_inf_internal |= deltanext == OPTIMIZE_INFTY
+                         || (maxcount == REG_INFTY && minnext + deltanext > 0);
+                is_inf |= is_inf_internal;
+                if (is_inf) {
+                    delta = OPTIMIZE_INFTY;
+                } else {
+                    delta += (minnext + deltanext) * maxcount
+                             - minnext * mincount;
+                }
+
+                if (data && data->flags & SCF_SEEN_ACCEPT) {
+                    if (flags & SCF_DO_SUBSTR) {
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                        flags &= ~SCF_DO_SUBSTR;
+                    }
+                    if (stopmin > min)
+                        stopmin = min;
+                    DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
+                }
+                /* Try powerful optimization CURLYX => CURLYN. */
+                if (  OP(oscan) == CURLYX && data
+                      && data->flags & SF_IN_PAR
+                      && !(data->flags & SF_HAS_EVAL)
+                      && !deltanext && minnext == 1
+                      && mutate_ok
+                ) {
+                    /* Try to optimize to CURLYN.  */
+                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX);
+                    regnode * const nxt1 = nxt;
+#ifdef DEBUGGING
+                    regnode *nxt2;
+#endif
+
+                    /* Skip open. */
+                    nxt = regnext(nxt);
+                    if (!REGNODE_SIMPLE(OP(nxt))
+                        && !(REGNODE_TYPE(OP(nxt)) == EXACT
+                             && STR_LEN(nxt) == 1))
+                        goto nogo;
+#ifdef DEBUGGING
+                    nxt2 = nxt;
+#endif
+                    nxt = regnext(nxt);
+                    if (OP(nxt) != CLOSE)
+                        goto nogo;
+                    if (RExC_open_parens) {
+
+                        /*open->CURLYM*/
+                        RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
+
+                        /*close->while*/
+                        RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
+                    }
+                    /* Now we know that nxt2 is the only contents: */
+                    oscan->flags = (U8)PARNO(nxt);
+                    OP(oscan) = CURLYN;
+                    OP(nxt1) = NOTHING; /* was OPEN. */
+
+#ifdef DEBUGGING
+                    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
+                    NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
+                    NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
+                    OP(nxt) = OPTIMIZED;        /* was CLOSE. */
+                    OP(nxt + 1) = OPTIMIZED; /* was count. */
+                    NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
+#endif
+                }
+              nogo:
+
+                /* Try optimization CURLYX => CURLYM. */
+                if (  OP(oscan) == CURLYX && data
+                      && !(data->flags & SF_HAS_PAR)
+                      && !(data->flags & SF_HAS_EVAL)
+                      && !deltanext     /* atom is fixed width */
+                      && minnext != 0   /* CURLYM can't handle zero width */
+                         /* Nor characters whose fold at run-time may be
+                          * multi-character */
+                      && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+                      && mutate_ok
+                ) {
+                    /* XXXX How to optimize if data == 0? */
+                    /* Optimize to a simpler form.  */
+                    regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX); /* OPEN */
+                    regnode *nxt2;
+
+                    OP(oscan) = CURLYM;
+                    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
+                            && (OP(nxt2) != WHILEM))
+                        nxt = nxt2;
+                    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
+                    /* Need to optimize away parenths. */
+                    if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
+                        /* Set the parenth number.  */
+                        /* note that we have changed the type of oscan to CURLYM here */
+                        regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/
+
+                        oscan->flags = (U8)PARNO(nxt);
+                        if (RExC_open_parens) {
+                             /*open->CURLYM*/
+                            RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
+
+                            /*close->NOTHING*/
+                            RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt2)
+                                                         + 1;
+                        }
+                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
+                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
+
+#ifdef DEBUGGING
+                        OP(nxt1 + 1) = OPTIMIZED; /* was count. */
+                        OP(nxt + 1) = OPTIMIZED; /* was count. */
+                        NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
+                        NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
+#endif
+#if 0
+                        while ( nxt1 && (OP(nxt1) != WHILEM)) {
+                            regnode *nnxt = regnext(nxt1);
+                            if (nnxt == nxt) {
+                                if (REGNODE_OFF_BY_ARG(OP(nxt1)))
+                                    ARG_SET(nxt1, nxt2 - nxt1);
+                                else if (nxt2 - nxt1 < U16_MAX)
+                                    NEXT_OFF(nxt1) = nxt2 - nxt1;
+                                else
+                                    OP(nxt) = NOTHING;  /* Cannot beautify */
+                            }
+                            nxt1 = nnxt;
+                        }
+#endif
+                        /* Optimize again: */
+                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
+                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
+                                    NULL, stopparen, recursed_depth, NULL, 0,
+                                    depth+1, mutate_ok);
+                    }
+                    else
+                        oscan->flags = 0;
+                }
+                else if ((OP(oscan) == CURLYX)
+                         && (flags & SCF_WHILEM_VISITED_POS)
+                         /* See the comment on a similar expression above.
+                            However, this time it's not a subexpression
+                            we care about, but the expression itself. */
+                         && (maxcount == REG_INFTY)
+                         && data) {
+                    /* This stays as CURLYX, we can put the count/of pair. */
+                    /* Find WHILEM (as in regexec.c) */
+                    regnode *nxt = oscan + NEXT_OFF(oscan);
+
+                    if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
+                        nxt += ARG(nxt);
+                    nxt = REGNODE_BEFORE(nxt);
+                    if (nxt->flags & 0xf) {
+                        /* we've already set whilem count on this node */
+                    } else if (++data->whilem_c < 16) {
+                        assert(data->whilem_c <= RExC_whilem_seen);
+                        nxt->flags = (U8)(data->whilem_c
+                            | (RExC_whilem_seen << 4)); /* On WHILEM */
+                    }
+                }
+                if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
+                    pars++;
+                if (flags & SCF_DO_SUBSTR) {
+                    SV *last_str = NULL;
+                    STRLEN last_chrs = 0;
+                    int counted = mincount != 0;
+
+                    if (data->last_end > 0 && mincount != 0) { /* Ends with a
+                                                                  string. */
+                        SSize_t b = pos_before >= data->last_start_min
+                            ? pos_before : data->last_start_min;
+                        STRLEN l;
+                        const char * const s = SvPV_const(data->last_found, l);
+                        SSize_t old = b - data->last_start_min;
+                        assert(old >= 0);
+
+                        if (UTF)
+                            old = utf8_hop_forward((U8*)s, old,
+                                               (U8 *) SvEND(data->last_found))
+                                - (U8*)s;
+                        l -= old;
+                        /* Get the added string: */
+                        last_str = newSVpvn_utf8(s  + old, l, UTF);
+                        last_chrs = UTF ? utf8_length((U8*)(s + old),
+                                            (U8*)(s + old + l)) : l;
+                        if (deltanext == 0 && pos_before == b) {
+                            /* What was added is a constant string */
+                            if (mincount > 1) {
+
+                                SvGROW(last_str, (mincount * l) + 1);
+                                repeatcpy(SvPVX(last_str) + l,
+                                          SvPVX_const(last_str), l,
+                                          mincount - 1);
+                                SvCUR_set(last_str, SvCUR(last_str) * mincount);
+                                /* Add additional parts. */
+                                SvCUR_set(data->last_found,
+                                          SvCUR(data->last_found) - l);
+                                sv_catsv(data->last_found, last_str);
+                                {
+                                    SV * sv = data->last_found;
+                                    MAGIC *mg =
+                                        SvUTF8(sv) && SvMAGICAL(sv) ?
+                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
+                                    if (mg && mg->mg_len >= 0)
+                                        mg->mg_len += last_chrs * (mincount-1);
+                                }
+                                last_chrs *= mincount;
+                                data->last_end += l * (mincount - 1);
+                            }
+                        } else {
+                            /* start offset must point into the last copy */
+                            data->last_start_min += minnext * (mincount - 1);
+                            data->last_start_max =
+                              is_inf
+                               ? OPTIMIZE_INFTY
+                               : data->last_start_max +
+                                 (maxcount - 1) * (minnext + data->pos_delta);
+                        }
+                    }
+                    /* It is counted once already... */
+                    data->pos_min += minnext * (mincount - counted);
+#if 0
+    Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
+                              " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
+                              " maxcount=%" UVuf " mincount=%" UVuf
+                              " data->pos_delta=%" UVuf "\n",
+        (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext,
+        (UV)maxcount, (UV)mincount, (UV)data->pos_delta);
+    if (deltanext != OPTIMIZE_INFTY)
+        Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
+            (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
+            - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
+#endif
+                    if (deltanext == OPTIMIZE_INFTY
+                        || data->pos_delta == OPTIMIZE_INFTY
+                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
+                        data->pos_delta = OPTIMIZE_INFTY;
+                    else
+                        data->pos_delta += - counted * deltanext +
+                        (minnext + deltanext) * maxcount - minnext * mincount;
+                    if (mincount != maxcount) {
+                         /* Cannot extend fixed substrings found inside
+                            the group.  */
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                        if (mincount && last_str) {
+                            SV * const sv = data->last_found;
+                            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+                                mg_find(sv, PERL_MAGIC_utf8) : NULL;
+
+                            if (mg)
+                                mg->mg_len = -1;
+                            sv_setsv(sv, last_str);
+                            data->last_end = data->pos_min;
+                            data->last_start_min = data->pos_min - last_chrs;
+                            data->last_start_max = is_inf
+                                ? OPTIMIZE_INFTY
+                                : data->pos_min + data->pos_delta - last_chrs;
+                        }
+                        data->cur_is_floating = 1; /* float */
+                    }
+                    SvREFCNT_dec(last_str);
+                }
+                if (data && (fl & SF_HAS_EVAL))
+                    data->flags |= SF_HAS_EVAL;
+              optimize_curly_tail:
+                rck_elide_nothing(oscan);
+                continue;
+
+            default:
+                Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
+                                                                    OP(scan));
+            case REF:
+            case CLUMP:
+                if (flags & SCF_DO_SUBSTR) {
+                    /* Cannot expect anything... */
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
+                    data->cur_is_floating = 1; /* float */
+                }
+                is_inf = is_inf_internal = 1;
+                if (flags & SCF_DO_STCLASS_OR) {
+                    if (OP(scan) == CLUMP) {
+                        /* Actually is any start char, but very few code points
+                         * aren't start characters */
+                        ssc_match_all_cp(data->start_class);
+                    }
+                    else {
+                        ssc_anything(data->start_class);
+                    }
+                }
+                flags &= ~SCF_DO_STCLASS;
+                break;
+            }
+        }
+        else if (OP(scan) == LNBREAK) {
+            if (flags & SCF_DO_STCLASS) {
+                if (flags & SCF_DO_STCLASS_AND) {
+                    ssc_intersection(data->start_class,
+                                    PL_XPosix_ptrs[CC_VERTSPACE_], FALSE);
+                    ssc_clear_locale(data->start_class);
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
+                }
+                else if (flags & SCF_DO_STCLASS_OR) {
+                    ssc_union(data->start_class,
+                              PL_XPosix_ptrs[CC_VERTSPACE_],
+                              FALSE);
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+
+                    /* See commit msg for
+                     * 749e076fceedeb708a624933726e7989f2302f6a */
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
+                }
+                flags &= ~SCF_DO_STCLASS;
+            }
+            min++;
+            if (delta != OPTIMIZE_INFTY)
+                delta++;    /* Because of the 2 char string cr-lf */
+            if (flags & SCF_DO_SUBSTR) {
+                /* Cannot expect anything... */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+                data->pos_min += 1;
+                if (data->pos_delta != OPTIMIZE_INFTY) {
+                    data->pos_delta += 1;
+                }
+                data->cur_is_floating = 1; /* float */
+            }
+        }
+        else if (REGNODE_SIMPLE(OP(scan))) {
+
+            if (flags & SCF_DO_SUBSTR) {
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+                data->pos_min++;
+            }
+            min++;
+            if (flags & SCF_DO_STCLASS) {
+                bool invert = 0;
+                SV* my_invlist = NULL;
+                U8 namedclass;
+
+                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+
+                /* Some of the logic below assumes that switching
+                   locale on will only add false positives. */
+                switch (OP(scan)) {
+
+                default:
+#ifdef DEBUGGING
+                   Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
+                                                                     OP(scan));
+#endif
+                case SANY:
+                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+                        ssc_match_all_cp(data->start_class);
+                    break;
+
+                case REG_ANY:
+                    {
+                        SV* REG_ANY_invlist = _new_invlist(2);
+                        REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist,
+                                                            '\n');
+                        if (flags & SCF_DO_STCLASS_OR) {
+                            ssc_union(data->start_class,
+                                      REG_ANY_invlist,
+                                      TRUE /* TRUE => invert, hence all but \n
+                                            */
+                                      );
+                        }
+                        else if (flags & SCF_DO_STCLASS_AND) {
+                            ssc_intersection(data->start_class,
+                                             REG_ANY_invlist,
+                                             TRUE  /* TRUE => invert */
+                                             );
+                            ssc_clear_locale(data->start_class);
+                        }
+                        SvREFCNT_dec_NN(REG_ANY_invlist);
+                    }
+                    break;
+
+                case ANYOFD:
+                case ANYOFL:
+                case ANYOFPOSIXL:
+                case ANYOFH:
+                case ANYOFHb:
+                case ANYOFHr:
+                case ANYOFHs:
+                case ANYOF:
+                    if (flags & SCF_DO_STCLASS_AND)
+                        ssc_and(pRExC_state, data->start_class,
+                                (regnode_charclass *) scan);
+                    else
+                        ssc_or(pRExC_state, data->start_class,
+                                                          (regnode_charclass *) scan);
+                    break;
+
+                case ANYOFHbbm:
+                  {
+                    SV* cp_list = get_ANYOFHbbm_contents(scan);
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class, cp_list, invert);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class, cp_list, invert);
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
+                case NANYOFM: /* NANYOFM already contains the inversion of the
+                                 input ANYOF data, so, unlike things like
+                                 NPOSIXA, don't change 'invert' to TRUE */
+                    /* FALLTHROUGH */
+                case ANYOFM:
+                  {
+                    SV* cp_list = get_ANYOFM_contents(scan);
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class, cp_list, invert);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class, cp_list, invert);
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
+                case ANYOFR:
+                case ANYOFRb:
+                  {
+                    SV* cp_list = NULL;
+
+                    cp_list = _add_range_to_invlist(cp_list,
+                                        ANYOFRbase(scan),
+                                        ANYOFRbase(scan) + ANYOFRdelta(scan));
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class, cp_list, invert);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class, cp_list, invert);
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
+                case NPOSIXL:
+                    invert = 1;
+                    /* FALLTHROUGH */
+
+                case POSIXL:
+                    namedclass = classnum_to_namedclass(FLAGS(scan)) + invert;
+                    if (flags & SCF_DO_STCLASS_AND) {
+                        bool was_there = cBOOL(
+                                          ANYOF_POSIXL_TEST(data->start_class,
+                                                                 namedclass));
+                        ANYOF_POSIXL_ZERO(data->start_class);
+                        if (was_there) {    /* Do an AND */
+                            ANYOF_POSIXL_SET(data->start_class, namedclass);
+                        }
+                        /* No individual code points can now match */
+                        data->start_class->invlist
+                                                = sv_2mortal(_new_invlist(0));
+                    }
+                    else {
+                        int complement = namedclass + ((invert) ? -1 : 1);
+
+                        assert(flags & SCF_DO_STCLASS_OR);
+
+                        /* If the complement of this class was already there,
+                         * the result is that they match all code points,
+                         * (\d + \D == everything).  Remove the classes from
+                         * future consideration.  Locale is not relevant in
+                         * this case */
+                        if (ANYOF_POSIXL_TEST(data->start_class, complement)) {
+                            ssc_match_all_cp(data->start_class);
+                            ANYOF_POSIXL_CLEAR(data->start_class, namedclass);
+                            ANYOF_POSIXL_CLEAR(data->start_class, complement);
+                        }
+                        else {  /* The usual case; just add this class to the
+                                   existing set */
+                            ANYOF_POSIXL_SET(data->start_class, namedclass);
+                        }
+                    }
+                    break;
+
+                case NPOSIXA:   /* For these, we always know the exact set of
+                                   what's matched */
+                    invert = 1;
+                    /* FALLTHROUGH */
+                case POSIXA:
+                    my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
+                    goto join_posix_and_ascii;
+
+                case NPOSIXD:
+                case NPOSIXU:
+                    invert = 1;
+                    /* FALLTHROUGH */
+                case POSIXD:
+                case POSIXU:
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
+
+                    /* NPOSIXD matches all upper Latin1 code points unless the
+                     * target string being matched is UTF-8, which is
+                     * unknowable until match time.  Since we are going to
+                     * invert, we want to get rid of all of them so that the
+                     * inversion will match all */
+                    if (OP(scan) == NPOSIXD) {
+                        _invlist_subtract(my_invlist, PL_UpperLatin1,
+                                          &my_invlist);
+                    }
+
+                  join_posix_and_ascii:
+
+                    if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class, my_invlist, invert);
+                        ssc_clear_locale(data->start_class);
+                    }
+                    else {
+                        assert(flags & SCF_DO_STCLASS_OR);
+                        ssc_union(data->start_class, my_invlist, invert);
+                    }
+                    SvREFCNT_dec(my_invlist);
+                }
+                if (flags & SCF_DO_STCLASS_OR)
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+                flags &= ~SCF_DO_STCLASS;
+            }
+        }
+        else if (REGNODE_TYPE(OP(scan)) == EOL && flags & SCF_DO_SUBSTR) {
+            data->flags |= (OP(scan) == MEOL
+                            ? SF_BEFORE_MEOL
+                            : SF_BEFORE_SEOL);
+            scan_commit(pRExC_state, data, minlenp, is_inf);
+
+        }
+        else if (  REGNODE_TYPE(OP(scan)) == BRANCHJ
+                 /* Lookbehind, or need to calculate parens/evals/stclass: */
+                   && (scan->flags || data || (flags & SCF_DO_STCLASS))
+                   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
+        {
+            if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
+                || OP(scan) == UNLESSM )
+            {
+                /* Negative Lookahead/lookbehind
+                   In this case we can't do fixed string optimisation.
+                */
+
+                bool is_positive = OP(scan) == IFMATCH ? 1 : 0;
+                SSize_t deltanext, minnext;
+                SSize_t fake_last_close = 0;
+                regnode *fake_last_close_op = NULL;
+                regnode *cur_last_close_op;
+                regnode *nscan;
+                regnode_ssc intrnl;
+                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
+
+                StructCopy(&zero_scan_data, &data_fake, scan_data_t);
+                if (data) {
+                    data_fake.whilem_c = data->whilem_c;
+                    data_fake.last_closep = data->last_closep;
+                    data_fake.last_close_opp = data->last_close_opp;
+                }
+                else {
+                    data_fake.last_closep = &fake_last_close;
+                    data_fake.last_close_opp = &fake_last_close_op;
+                }
+
+                /* remember the last_close_op we saw so we can see if
+                 * we are dealing with variable length lookbehind that
+                 * contains capturing buffers, which are considered
+                 * experimental */
+                cur_last_close_op= *(data_fake.last_close_opp);
+
+                data_fake.pos_delta = delta;
+                if ( flags & SCF_DO_STCLASS && !scan->flags
+                     && OP(scan) == IFMATCH ) { /* Lookahead */
+                    ssc_init(pRExC_state, &intrnl);
+                    data_fake.start_class = &intrnl;
+                    f |= SCF_DO_STCLASS_AND;
+                }
+                if (flags & SCF_WHILEM_VISITED_POS)
+                    f |= SCF_WHILEM_VISITED_POS;
+                next = regnext(scan);
+                nscan = REGNODE_AFTER(scan);
+
+                /* recurse study_chunk() for lookahead body */
+                minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
+                                      last, &data_fake, stopparen,
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
+
+                if (scan->flags) {
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || minnext > (I32)U8_MAX
+                        || minnext + deltanext > (I32)U8_MAX)
+                    {
+                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
+                              (UV)U8_MAX);
+                    }
+
+                    /* The 'next_off' field has been repurposed to count the
+                     * additional starting positions to try beyond the initial
+                     * one.  (This leaves it at 0 for non-variable length
+                     * matches to avoid breakage for those not using this
+                     * extension) */
+                    if (deltanext)  {
+                        scan->next_off = deltanext;
+                        if (
+                            /* See a CLOSE op inside this lookbehind? */
+                            cur_last_close_op != *(data_fake.last_close_opp)
+                            /* and not doing restudy. see: restudied */
+                            && !(flags & SCF_TRIE_DOING_RESTUDY)
+                        ) {
+                            /* this is positive variable length lookbehind with
+                             * capture buffers inside of it */
+                            ckWARNexperimental_with_arg(RExC_parse,
+                                WARN_EXPERIMENTAL__VLB,
+                                "Variable length %s lookbehind with capturing is experimental",
+                                is_positive ? "positive" : "negative");
+                        }
+                    }
+                    scan->flags = (U8)minnext + deltanext;
+                }
+                if (data) {
+                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+                        pars++;
+                    if (data_fake.flags & SF_HAS_EVAL)
+                        data->flags |= SF_HAS_EVAL;
+                    data->whilem_c = data_fake.whilem_c;
+                }
+                if (f & SCF_DO_STCLASS_AND) {
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        /* OR before, AND after: ideally we would recurse with
+                         * data_fake to get the AND applied by study of the
+                         * remainder of the pattern, and then derecurse;
+                         * *** HACK *** for now just treat as "no information".
+                         * See [perl #56690].
+                         */
+                        ssc_init(pRExC_state, data->start_class);
+                    }  else {
+                        /* AND before and after: combine and continue.  These
+                         * assertions are zero-length, so can match an EMPTY
+                         * string */
+                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                        ANYOF_FLAGS(data->start_class)
+                                                   |= SSC_MATCHES_EMPTY_STRING;
+                    }
+                }
+                DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
+            }
+#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
+            else {
+                /* Positive Lookahead/lookbehind
+                   In this case we can do fixed string optimisation,
+                   but we must be careful about it. Note in the case of
+                   lookbehind the positions will be offset by the minimum
+                   length of the pattern, something we won't know about
+                   until after the recurse.
+                */
+                SSize_t deltanext, fake_last_close = 0;
+                regnode *last_close_op = NULL;
+                regnode *nscan;
+                regnode_ssc intrnl;
+                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
+                /* We use SAVEFREEPV so that when the full compile
+                    is finished perl will clean up the allocated
+                    minlens when it's all done. This way we don't
+                    have to worry about freeing them when we know
+                    they wont be used, which would be a pain.
+                 */
+                SSize_t *minnextp;
+                Newx( minnextp, 1, SSize_t );
+                SAVEFREEPV(minnextp);
+
+                if (data) {
+                    StructCopy(data, &data_fake, scan_data_t);
+                    if ((flags & SCF_DO_SUBSTR) && data->last_found) {
+                        f |= SCF_DO_SUBSTR;
+                        if (scan->flags)
+                            scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
+                        data_fake.last_found=newSVsv(data->last_found);
+                    }
+                }
+                else {
+                    data_fake.last_closep = &fake_last_close;
+                    data_fake.last_close_opp = &fake_last_close_opp;
+                }
+                data_fake.flags = 0;
+                data_fake.substrs[0].flags = 0;
+                data_fake.substrs[1].flags = 0;
+                data_fake.pos_delta = delta;
+                if (is_inf)
+                    data_fake.flags |= SF_IS_INF;
+                if ( flags & SCF_DO_STCLASS && !scan->flags
+                     && OP(scan) == IFMATCH ) { /* Lookahead */
+                    ssc_init(pRExC_state, &intrnl);
+                    data_fake.start_class = &intrnl;
+                    f |= SCF_DO_STCLASS_AND;
+                }
+                if (flags & SCF_WHILEM_VISITED_POS)
+                    f |= SCF_WHILEM_VISITED_POS;
+                next = regnext(scan);
+                nscan = REGNODE_AFTER(scan);
+
+                /* positive lookahead study_chunk() recursion */
+                *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
+                                        &deltanext, last, &data_fake,
+                                        stopparen, recursed_depth, NULL,
+                                        f, depth+1, mutate_ok);
+                if (scan->flags) {
+                    assert(0);  /* This code has never been tested since this
+                                   is normally not compiled */
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || *minnextp > (I32)U8_MAX
+                        || *minnextp + deltanext > (I32)U8_MAX)
+                    {
+                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
+                              (UV)U8_MAX);
+                    }
+
+                    if (deltanext) {
+                        scan->next_off = deltanext;
+                    }
+                    scan->flags = (U8)*minnextp + deltanext;
+                }
+
+                *minnextp += min;
+
+                if (f & SCF_DO_STCLASS_AND) {
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                    ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
+                }
+                if (data) {
+                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+                        pars++;
+                    if (data_fake.flags & SF_HAS_EVAL)
+                        data->flags |= SF_HAS_EVAL;
+                    data->whilem_c = data_fake.whilem_c;
+                    if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+                        int i;
+                        if (RExC_rx->minlen < *minnextp)
+                            RExC_rx->minlen = *minnextp;
+                        scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
+                        SvREFCNT_dec_NN(data_fake.last_found);
+
+                        for (i = 0; i < 2; i++) {
+                            if (data_fake.substrs[i].minlenp != minlenp) {
+                                data->substrs[i].min_offset =
+                                            data_fake.substrs[i].min_offset;
+                                data->substrs[i].max_offset =
+                                            data_fake.substrs[i].max_offset;
+                                data->substrs[i].minlenp =
+                                            data_fake.substrs[i].minlenp;
+                                data->substrs[i].lookbehind += scan->flags;
+                            }
+                        }
+                    }
+                }
+            }
+#endif
+        }
+        else if (OP(scan) == OPEN) {
+            if (stopparen != (I32)PARNO(scan))
+                pars++;
+        }
+        else if (OP(scan) == CLOSE) {
+            if (stopparen == (I32)PARNO(scan)) {
+                break;
+            }
+            if ((I32)PARNO(scan) == is_par) {
+                next = regnext(scan);
+
+                if ( next && (OP(next) != WHILEM) && next < last)
+                    is_par = 0;         /* Disable optimization */
+            }
+            if (data) {
+                *(data->last_closep) = PARNO(scan);
+                *(data->last_close_opp) = scan;
+            }
+        }
+        else if (OP(scan) == EVAL) {
+            if (data)
+                data->flags |= SF_HAS_EVAL;
+        }
+        else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
+            if (flags & SCF_DO_SUBSTR) {
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+                flags &= ~SCF_DO_SUBSTR;
+            }
+            if (OP(scan)==ACCEPT) {
+                /* m{(*ACCEPT)x} does not have to start with 'x' */
+                flags &= ~SCF_DO_STCLASS;
+                if (data)
+                    data->flags |= SCF_SEEN_ACCEPT;
+                if (stopmin > min)
+                    stopmin = min;
+            }
+        }
+        else if (OP(scan) == COMMIT) {
+            /* gh18770: m{abc(*COMMIT)xyz} must fail on "abc abcxyz", so we
+             * must not end up with "abcxyz" as a fixed substring else we'll
+             * skip straight to attempting to match at offset 4.
+             */
+            if (flags & SCF_DO_SUBSTR) {
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+                flags &= ~SCF_DO_SUBSTR;
+            }
+        }
+        else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+        {
+                if (flags & SCF_DO_SUBSTR) {
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
+                    data->cur_is_floating = 1; /* float */
+                }
+                is_inf = is_inf_internal = 1;
+                if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+                    ssc_anything(data->start_class);
+                flags &= ~SCF_DO_STCLASS;
+        }
+        else if (OP(scan) == GPOS) {
+            if (!(RExC_rx->intflags & PREGf_GPOS_FLOAT) &&
+                !(delta || is_inf || (data && data->pos_delta)))
+            {
+                if (!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR))
+                    RExC_rx->intflags |= PREGf_ANCH_GPOS;
+                if (RExC_rx->gofs < (STRLEN)min)
+                    RExC_rx->gofs = min;
+            } else {
+                RExC_rx->intflags |= PREGf_GPOS_FLOAT;
+                RExC_rx->gofs = 0;
+            }
+        }
+#ifdef TRIE_STUDY_OPT
+#ifdef FULL_TRIE_STUDY
+        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
+            /* NOTE - There is similar code to this block above for handling
+               BRANCH nodes on the initial study.  If you change stuff here
+               check there too. */
+            regnode *trie_node= scan;
+            regnode *tail= regnext(scan);
+            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
+            SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
+            regnode_ssc accum;
+
+            if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
+                /* Cannot merge strings after this. */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+            }
+            if (flags & SCF_DO_STCLASS)
+                ssc_init_zero(pRExC_state, &accum);
+
+            if (!trie->jump) {
+                min1= trie->minlen;
+                max1= trie->maxlen;
+            } else {
+                const regnode *nextbranch= NULL;
+                U32 word;
+
+                for ( word=1 ; word <= trie->wordcount ; word++)
+                {
+                    SSize_t deltanext = 0, minnext = 0;
+                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
+                    SSize_t fake_last_close = 0;
+                    regnode *fake_last_close_op = NULL;
+                    regnode_ssc this_class;
+
+                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
+                    if (data) {
+                        data_fake.whilem_c = data->whilem_c;
+                        data_fake.last_closep = data->last_closep;
+                        data_fake.last_close_opp = data->last_close_opp;
+                    }
+                    else {
+                        data_fake.last_closep = &fake_last_close;
+                        data_fake.last_close_opp = &fake_last_close_op;
+                    }
+                    data_fake.pos_delta = delta;
+                    if (flags & SCF_DO_STCLASS) {
+                        ssc_init(pRExC_state, &this_class);
+                        data_fake.start_class = &this_class;
+                        f |= SCF_DO_STCLASS_AND;
+                    }
+                    if (flags & SCF_WHILEM_VISITED_POS)
+                        f |= SCF_WHILEM_VISITED_POS;
+
+                    if (trie->jump[word]) {
+                        if (!nextbranch)
+                            nextbranch = trie_node + trie->jump[0];
+                        scan= trie_node + trie->jump[word];
+                        /* We go from the jump point to the branch that follows
+                           it. Note this means we need the vestigal unused
+                           branches even though they arent otherwise used. */
+                        /* optimise study_chunk() for TRIE */
+                        minnext = study_chunk(pRExC_state, &scan, minlenp,
+                            &deltanext, (regnode *)nextbranch, &data_fake,
+                            stopparen, recursed_depth, NULL, f, depth+1,
+                            mutate_ok);
+                    }
+                    if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
+                        nextbranch= regnext((regnode*)nextbranch);
+
+                    if (min1 > (SSize_t)(minnext + trie->minlen))
+                        min1 = minnext + trie->minlen;
+                    if (deltanext == OPTIMIZE_INFTY) {
+                        is_inf = is_inf_internal = 1;
+                        max1 = OPTIMIZE_INFTY;
+                    } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
+                        max1 = minnext + deltanext + trie->maxlen;
+
+                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+                        pars++;
+                    if (data_fake.flags & SCF_SEEN_ACCEPT) {
+                        if ( stopmin > min + min1)
+                            stopmin = min + min1;
+                        flags &= ~SCF_DO_SUBSTR;
+                        if (data)
+                            data->flags |= SCF_SEEN_ACCEPT;
+                    }
+                    if (data) {
+                        if (data_fake.flags & SF_HAS_EVAL)
+                            data->flags |= SF_HAS_EVAL;
+                        data->whilem_c = data_fake.whilem_c;
+                    }
+                    if (flags & SCF_DO_STCLASS)
+                        ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
+                }
+                DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
+            }
+            if (flags & SCF_DO_SUBSTR) {
+                data->pos_min += min1;
+                data->pos_delta += max1 - min1;
+                if (max1 != min1 || is_inf)
+                    data->cur_is_floating = 1; /* float */
+            }
+            min += min1;
+            if (delta != OPTIMIZE_INFTY) {
+                if (OPTIMIZE_INFTY - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = OPTIMIZE_INFTY;
+            }
+            if (flags & SCF_DO_STCLASS_OR) {
+                ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
+                if (min1) {
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+                    flags &= ~SCF_DO_STCLASS;
+                }
+            }
+            else if (flags & SCF_DO_STCLASS_AND) {
+                if (min1) {
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
+                    flags &= ~SCF_DO_STCLASS;
+                }
+                else {
+                    /* Switch to OR mode: cache the old value of
+                     * data->start_class */
+                    INIT_AND_WITHP;
+                    StructCopy(data->start_class, and_withp, regnode_ssc);
+                    flags &= ~SCF_DO_STCLASS_AND;
+                    StructCopy(&accum, data->start_class, regnode_ssc);
+                    flags |= SCF_DO_STCLASS_OR;
+                }
+            }
+            scan= tail;
+            DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
+            continue;
+        }
+#else
+        else if (REGNODE_TYPE(OP(scan)) == TRIE) {
+            reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
+            U8*bang=NULL;
+
+            min += trie->minlen;
+            delta += (trie->maxlen - trie->minlen);
+            flags &= ~SCF_DO_STCLASS; /* xxx */
+            if (flags & SCF_DO_SUBSTR) {
+                /* Cannot expect anything... */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+                data->pos_min += trie->minlen;
+                data->pos_delta += (trie->maxlen - trie->minlen);
+                if (trie->maxlen != trie->minlen)
+                    data->cur_is_floating = 1; /* float */
+            }
+            if (trie->jump) /* no more substrings -- for now /grr*/
+               flags &= ~SCF_DO_SUBSTR;
+        }
+
+#endif /* old or new */
+#endif /* TRIE_STUDY_OPT */
+
+        else if (OP(scan) == REGEX_SET) {
+            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
+                             " before optimization", REGNODE_NAME(REGEX_SET));
+        }
+
+        /* Else: zero-length, ignore. */
+        scan = regnext(scan);
+    }
+
+  finish:
+    if (frame) {
+        /* we need to unwind recursion. */
+        depth = depth - 1;
+
+        DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
+        DEBUG_PEEP("fend", scan, depth, flags);
+
+        /* restore previous context */
+        last = frame->last_regnode;
+        scan = frame->next_regnode;
+        stopparen = frame->stopparen;
+        recursed_depth = frame->prev_recursed_depth;
+
+        RExC_frame_last = frame->prev_frame;
+        frame = frame->this_prev_frame;
+        goto fake_study_recurse;
+    }
+
+    assert(!frame);
+    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
+
+    /* is this pattern infinite? Eg, consider /(a|b+)/ */
+    if (is_inf_internal)
+        delta = OPTIMIZE_INFTY;
+
+    /* deal with (*ACCEPT), Eg, consider /(foo(*ACCEPT)|bop)bar/ */
+    if (min > stopmin) {
+        /*
+        At this point 'min' represents the minimum length string we can
+        match while *ignoring* the implication of ACCEPT, and 'delta'
+        represents the difference between the minimum length and maximum
+        length, and if the pattern matches an infinitely long string
+        (consider the + and * quantifiers) then we use the special delta
+        value of OPTIMIZE_INFTY to represent it. 'stopmin' is the
+        minimum length that can be matched *and* accepted.
+
+        A pattern is accepted when matching was successful *and*
+        complete, and thus there is no further matching needing to be
+        done, no backtracking to occur, etc. Prior to the introduction
+        of ACCEPT the only opcode that signaled acceptance was the END
+        opcode, which is always the very last opcode in a regex program.
+        ACCEPT is thus conceptually an early successful return out of
+        the matching process. stopmin starts out as OPTIMIZE_INFTY to
+        represent "the entire pattern", and is ratched down to the
+        "current min" if necessary when an ACCEPT opcode is encountered.
+
+        Thus stopmin might be smaller than min if we saw an (*ACCEPT),
+        and we now need to account for it in both min and delta.
+        Consider that in a pattern /AB/ normally the min length it can
+        match can be computed as min(A)+min(B). But (*ACCEPT) means
+        that it might be something else, not even neccesarily min(A) at
+        all. Consider
+
+             A  = /(foo(*ACCEPT)|x+)/
+             B  = /whop/
+             AB = /(foo(*ACCEPT)|x+)whop/
+
+        The min for A is 1 for "x" and the delta for A is OPTIMIZE_INFTY
+        for "xxxxx...", its stopmin is 3 for "foo". The min for B is 4 for
+        "whop", and the delta of 0 as the pattern is of fixed length, the
+        stopmin would be OPTIMIZE_INFTY as it does not contain an ACCEPT.
+        When handling AB we expect to see a min of 5 for "xwhop", and a
+        delta of OPTIMIZE_INFTY for "xxxxx...whop", and a stopmin of 3
+        for "foo". This should result in a final min of 3 for "foo", and
+        a final delta of OPTIMIZE_INFTY for "xxxxx...whop".
+
+        In something like /(dude(*ACCEPT)|irk)x{3,7}/ we would have a
+        min of 6 for "irkxxx" and a delta of 4 for "irkxxxxxxx", and the
+        stop min would be 4 for "dude". This should result in a final
+        min of 4 for "dude", and a final delta of 6, for "irkxxxxxxx".
+
+        When min is smaller than stopmin then we can ignore it. In the
+        fragment /(x{10,20}(*ACCEPT)|a)b+/, we would have a min of 2,
+        and a delta of OPTIMIZE_INFTY, and a stopmin of 10. Obviously
+        the ACCEPT doesn't reduce the minimum length of the string that
+        might be matched, nor affect the maximum length.
+
+        In something like /foo(*ACCEPT)ba?r/ we would have a min of 5
+        for "foobr", a delta of 1 for "foobar", and a stopmin of 3 for
+        "foo". We currently turn this into a min of 3 for "foo" and a
+        delta of 3 for "foobar" even though technically "foobar" isn't
+        possible. ACCEPT affects some aspects of the optimizer, like
+        length computations and mandatory substring optimizations, but
+        there are other optimzations this routine perfoms that are not
+        affected and this compromise simplifies implementation.
+
+        It might be helpful to consider that this C function is called
+        recursively on the pattern in a bottom up fashion, and that the
+        min returned by a nested call may be marked as coming from an
+        ACCEPT, causing its callers to treat the returned min as a
+        stopmin as the recursion unwinds. Thus a single ACCEPT can affect
+        multiple calls into this function in different ways.
+        */
+
+        if (OPTIMIZE_INFTY - delta >= min - stopmin)
+            delta += min - stopmin;
+        else
+            delta = OPTIMIZE_INFTY;
+        min = stopmin;
+    }
+
+    *scanp = scan;
+    *deltap = delta;
+
+    if (flags & SCF_DO_SUBSTR && is_inf)
+        data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
+    if (is_par > (I32)U8_MAX)
+        is_par = 0;
+    if (is_par && pars==1 && data) {
+        data->flags |= SF_IN_PAR;
+        data->flags &= ~SF_HAS_PAR;
+    }
+    else if (pars && data) {
+        data->flags |= SF_HAS_PAR;
+        data->flags &= ~SF_IN_PAR;
+    }
+    if (flags & SCF_DO_STCLASS_OR)
+        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+    if (flags & SCF_TRIE_RESTUDY)
+        data->flags |=  SCF_TRIE_RESTUDY;
+
+
+    if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
+        if (min > OPTIMIZE_INFTY - delta)
+            RExC_maxlen = OPTIMIZE_INFTY;
+        else if (RExC_maxlen < min + delta)
+            RExC_maxlen = min + delta;
+    }
+    DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
+    return min;
+}
diff --git a/regcomp_trie.c b/regcomp_trie.c
new file mode 100644
index 0000000000..65963cacf2
--- /dev/null
+++ b/regcomp_trie.c
@@ -0,0 +1,1688 @@
+#ifdef PERL_EXT_RE_BUILD
+#include "re_top.h"
+#endif
+
+#include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
+#define PERL_IN_REGCOMP_ANY
+#define PERL_IN_REGCOMP_TRIE_C
+#include "perl.h"
+
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
+
+#include "invlist_inline.h"
+#include "unicode_constants.h"
+#include "regcomp_internal.h"
+
+#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
+#define TRIE_LIST_CUR(state)  ( TRIE_LIST_ITEM( state, 0 ).forid )
+#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
+#define TRIE_LIST_USED(idx)  ( trie->states[state].trans.list         \
+                               ? (TRIE_LIST_CUR( idx ) - 1)           \
+                               : 0 )
+
+
+#ifdef DEBUGGING
+/*
+   dump_trie(trie,widecharmap,revcharmap)
+   dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
+   dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
+
+   These routines dump out a trie in a somewhat readable format.
+   The _interim_ variants are used for debugging the interim
+   tables that are used to generate the final compressed
+   representation which is what dump_trie expects.
+
+   Part of the reason for their existence is to provide a form
+   of documentation as to how the different representations function.
+
+*/
+
+/*
+  Dumps the final compressed table form of the trie to Perl_debug_log.
+  Used for debugging make_trie().
+*/
+
+STATIC void
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
+            AV *revcharmap, U32 depth)
+{
+    U32 state;
+    SV *sv=sv_newmortal();
+    int colwidth= widecharmap ? 6 : 4;
+    U16 word;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_DUMP_TRIE;
+
+    Perl_re_indentf( aTHX_  "Char : %-6s%-6s%-4s ",
+        depth+1, "Match","Base","Ofs" );
+
+    for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
+        SV ** const tmp = av_fetch_simple( revcharmap, state, 0);
+        if ( tmp ) {
+            Perl_re_printf( aTHX_  "%*s",
+                colwidth,
+                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
+                            PL_colors[0], PL_colors[1],
+                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+                            PERL_PV_ESCAPE_FIRSTCHAR
+                )
+            );
+        }
+    }
+    Perl_re_printf( aTHX_  "\n");
+    Perl_re_indentf( aTHX_ "State|-----------------------", depth+1);
+
+    for( state = 0 ; state < trie->uniquecharcount ; state++ )
+        Perl_re_printf( aTHX_  "%.*s", colwidth, "--------");
+    Perl_re_printf( aTHX_  "\n");
+
+    for( state = 1 ; state < trie->statecount ; state++ ) {
+        const U32 base = trie->states[ state ].trans.base;
+
+        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
+
+        if ( trie->states[ state ].wordnum ) {
+            Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
+        } else {
+            Perl_re_printf( aTHX_  "%6s", "" );
+        }
+
+        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
+
+        if ( base ) {
+            U32 ofs = 0;
+
+            while( ( base + ofs  < trie->uniquecharcount ) ||
+                   ( base + ofs - trie->uniquecharcount < trie->lasttrans
+                     && trie->trans[ base + ofs - trie->uniquecharcount ].check
+                                                                    != state))
+                    ofs++;
+
+            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
+
+            for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
+                if ( ( base + ofs >= trie->uniquecharcount )
+                        && ( base + ofs - trie->uniquecharcount
+                                                        < trie->lasttrans )
+                        && trie->trans[ base + ofs
+                                    - trie->uniquecharcount ].check == state )
+                {
+                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
+                    (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
+                   );
+                } else {
+                    Perl_re_printf( aTHX_  "%*s", colwidth,"   ." );
+                }
+            }
+
+            Perl_re_printf( aTHX_  "]");
+
+        }
+        Perl_re_printf( aTHX_  "\n" );
+    }
+    Perl_re_indentf( aTHX_  "word_info N:(prev,len)=",
+                                depth);
+    for (word=1; word <= trie->wordcount; word++) {
+        Perl_re_printf( aTHX_  " %d:(%d,%d)",
+            (int)word, (int)(trie->wordinfo[word].prev),
+            (int)(trie->wordinfo[word].len));
+    }
+    Perl_re_printf( aTHX_  "\n" );
+}
+/*
+  Dumps a fully constructed but uncompressed trie in list form.
+  List tries normally only are used for construction when the number of
+  possible chars (trie->uniquecharcount) is very high.
+  Used for debugging make_trie().
+*/
+STATIC void
+S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
+                         HV *widecharmap, AV *revcharmap, U32 next_alloc,
+                         U32 depth)
+{
+    U32 state;
+    SV *sv=sv_newmortal();
+    int colwidth= widecharmap ? 6 : 4;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
+
+    /* print out the table precompression.  */
+    Perl_re_indentf( aTHX_  "State :Word | Transition Data\n",
+            depth+1 );
+    Perl_re_indentf( aTHX_  "%s",
+            depth+1, "------:-----+-----------------\n" );
+
+    for( state=1 ; state < next_alloc ; state ++ ) {
+        U16 charid;
+
+        Perl_re_indentf( aTHX_  " %4" UVXf " :",
+            depth+1, (UV)state  );
+        if ( ! trie->states[ state ].wordnum ) {
+            Perl_re_printf( aTHX_  "%5s| ","");
+        } else {
+            Perl_re_printf( aTHX_  "W%4x| ",
+                trie->states[ state ].wordnum
+            );
+        }
+        for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
+            SV ** const tmp = av_fetch_simple( revcharmap,
+                                        TRIE_LIST_ITEM(state, charid).forid, 0);
+            if ( tmp ) {
+                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
+                    colwidth,
+                    pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
+                              colwidth,
+                              PL_colors[0], PL_colors[1],
+                              (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
+                              | PERL_PV_ESCAPE_FIRSTCHAR
+                    ) ,
+                    TRIE_LIST_ITEM(state, charid).forid,
+                    (UV)TRIE_LIST_ITEM(state, charid).newstate
+                );
+                if (!(charid % 10))
+                    Perl_re_printf( aTHX_  "\n%*s| ",
+                        (int)((depth * 2) + 14), "");
+            }
+        }
+        Perl_re_printf( aTHX_  "\n");
+    }
+}
+
+/*
+  Dumps a fully constructed but uncompressed trie in table form.
+  This is the normal DFA style state transition table, with a few
+  twists to facilitate compression later.
+  Used for debugging make_trie().
+*/
+STATIC void
+S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
+                          HV *widecharmap, AV *revcharmap, U32 next_alloc,
+                          U32 depth)
+{
+    U32 state;
+    U16 charid;
+    SV *sv=sv_newmortal();
+    int colwidth= widecharmap ? 6 : 4;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
+
+    /*
+       print out the table precompression so that we can do a visual check
+       that they are identical.
+     */
+
+    Perl_re_indentf( aTHX_  "Char : ", depth+1 );
+
+    for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
+        SV ** const tmp = av_fetch_simple( revcharmap, charid, 0);
+        if ( tmp ) {
+            Perl_re_printf( aTHX_  "%*s",
+                colwidth,
+                pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
+                            PL_colors[0], PL_colors[1],
+                            (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+                            PERL_PV_ESCAPE_FIRSTCHAR
+                )
+            );
+        }
+    }
+
+    Perl_re_printf( aTHX_ "\n");
+    Perl_re_indentf( aTHX_  "State+-", depth+1 );
+
+    for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
+        Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
+    }
+
+    Perl_re_printf( aTHX_  "\n" );
+
+    for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
+
+        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
+            depth+1,
+            (UV)TRIE_NODENUM( state ) );
+
+        for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
+            UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
+            if (v)
+                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
+            else
+                Perl_re_printf( aTHX_  "%*s", colwidth, "." );
+        }
+        if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
+            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
+                                            (UV)trie->trans[ state ].check );
+        } else {
+            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
+                                            (UV)trie->trans[ state ].check,
+            trie->states[ TRIE_NODENUM( state ) ].wordnum );
+        }
+    }
+}
+
+#endif
+
+
+/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
+  startbranch: the first branch in the whole branch sequence
+  first      : start branch of sequence of branch-exact nodes.
+               May be the same as startbranch
+  last       : Thing following the last branch.
+               May be the same as tail.
+  tail       : item following the branch sequence
+  count      : words in the sequence
+  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/
+  depth      : indent depth
+
+Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
+
+A trie is an N'ary tree where the branches are determined by digital
+decomposition of the key. IE, at the root node you look up the 1st character and
+follow that branch repeat until you find the end of the branches. Nodes can be
+marked as "accepting" meaning they represent a complete word. Eg:
+
+  /he|she|his|hers/
+
+would convert into the following structure. Numbers represent states, letters
+following numbers represent valid transitions on the letter from that state, if
+the number is in square brackets it represents an accepting state, otherwise it
+will be in parenthesis.
+
+      +-h->+-e->[3]-+-r->(8)-+-s->[9]
+      |    |
+      |   (2)
+      |    |
+     (1)   +-i->(6)-+-s->[7]
+      |
+      +-s->(3)-+-h->(4)-+-e->[5]
+
+      Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
+
+This shows that when matching against the string 'hers' we will begin at state 1
+read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
+then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
+is also accepting. Thus we know that we can match both 'he' and 'hers' with a
+single traverse. We store a mapping from accepting to state to which word was
+matched, and then when we have multiple possibilities we try to complete the
+rest of the regex in the order in which they occurred in the alternation.
+
+The only prior NFA like behaviour that would be changed by the TRIE support is
+the silent ignoring of duplicate alternations which are of the form:
+
+ / (DUPE|DUPE) X? (?{ ... }) Y /x
+
+Thus EVAL blocks following a trie may be called a different number of times with
+and without the optimisation. With the optimisations dupes will be silently
+ignored. This inconsistent behaviour of EVAL type nodes is well established as
+the following demonstrates:
+
+ 'words'=~/(word|word|word)(?{ print $1 })[xyz]/
+
+which prints out 'word' three times, but
+
+ 'words'=~/(word|word|word)(?{ print $1 })S/
+
+which doesnt print it out at all. This is due to other optimisations kicking in.
+
+Example of what happens on a structural level:
+
+The regexp /(ac|ad|ab)+/ will produce the following debug output:
+
+   1: CURLYM[1] {1,32767}(18)
+   5:   BRANCH(8)
+   6:     EXACT <ac>(16)
+   8:   BRANCH(11)
+   9:     EXACT <ad>(16)
+  11:   BRANCH(14)
+  12:     EXACT <ab>(16)
+  16:   SUCCEED(0)
+  17:   NOTHING(18)
+  18: END(0)
+
+This would be optimizable with startbranch=5, first=5, last=16, tail=16
+and should turn into:
+
+   1: CURLYM[1] {1,32767}(18)
+   5:   TRIE(16)
+        [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
+          <ac>
+          <ad>
+          <ab>
+  16:   SUCCEED(0)
+  17:   NOTHING(18)
+  18: END(0)
+
+Cases where tail != last would be like /(?foo|bar)baz/:
+
+   1: BRANCH(4)
+   2:   EXACT <foo>(8)
+   4: BRANCH(7)
+   5:   EXACT <bar>(8)
+   7: TAIL(8)
+   8: EXACT <baz>(10)
+  10: END(0)
+
+which would be optimizable with startbranch=1, first=1, last=7, tail=8
+and would end up looking like:
+
+    1: TRIE(8)
+      [Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
+        <foo>
+        <bar>
+   7: TAIL(8)
+   8: EXACT <baz>(10)
+  10: END(0)
+
+    d = uvchr_to_utf8_flags(d, uv, 0);
+
+is the recommended Unicode-aware way of saying
+
+    *(d++) = uv;
+*/
+
+#define TRIE_STORE_REVCHAR(val)                                            \
+    STMT_START {                                                           \
+        if (UTF) {                                                         \
+            SV *zlopp = newSV(UTF8_MAXBYTES);                              \
+            unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
+            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
+            *kapow = '\0';                                                 \
+            SvCUR_set(zlopp, kapow - flrbbbbb);                            \
+            SvPOK_on(zlopp);                                               \
+            SvUTF8_on(zlopp);                                              \
+            av_push_simple(revcharmap, zlopp);                                     \
+        } else {                                                           \
+            char ooooff = (char)val;                                           \
+            av_push_simple(revcharmap, newSVpvn(&ooooff, 1));                      \
+        }                                                                  \
+        } STMT_END
+
+/* This gets the next character from the input, folding it if not already
+ * folded. */
+#define TRIE_READ_CHAR STMT_START {                                           \
+    wordlen++;                                                                \
+    if ( UTF ) {                                                              \
+        /* if it is UTF then it is either already folded, or does not need    \
+         * folding */                                                         \
+        uvc = valid_utf8_to_uvchr( (const U8*) uc, &len);                     \
+    }                                                                         \
+    else if (folder == PL_fold_latin1) {                                      \
+        /* This folder implies Unicode rules, which in the range expressible  \
+         *  by not UTF is the lower case, with the two exceptions, one of     \
+         *  which should have been taken care of before calling this */       \
+        assert(*uc != LATIN_SMALL_LETTER_SHARP_S);                            \
+        uvc = toLOWER_L1(*uc);                                                \
+        if (UNLIKELY(uvc == MICRO_SIGN)) uvc = GREEK_SMALL_LETTER_MU;         \
+        len = 1;                                                              \
+    } else {                                                                  \
+        /* raw data, will be folded later if needed */                        \
+        uvc = (U32)*uc;                                                       \
+        len = 1;                                                              \
+    }                                                                         \
+} STMT_END
+
+
+
+#define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
+    if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
+        U32 ging = TRIE_LIST_LEN( state ) * 2;                  \
+        Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
+        TRIE_LIST_LEN( state ) = ging;                          \
+    }                                                           \
+    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
+    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
+    TRIE_LIST_CUR( state )++;                                   \
+} STMT_END
+
+#define TRIE_LIST_NEW(state) STMT_START {                       \
+    Newx( trie->states[ state ].trans.list,                     \
+        4, reg_trie_trans_le );                                 \
+     TRIE_LIST_CUR( state ) = 1;                                \
+     TRIE_LIST_LEN( state ) = 4;                                \
+} STMT_END
+
+#define TRIE_HANDLE_WORD(state) STMT_START {                    \
+    U16 dupe= trie->states[ state ].wordnum;                    \
+    regnode * const noper_next = regnext( noper );              \
+                                                                \
+    DEBUG_r({                                                   \
+        /* store the word for dumping */                        \
+        SV* tmp;                                                \
+        if (OP(noper) != NOTHING)                               \
+            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);    \
+        else                                                    \
+            tmp = newSVpvn_utf8( "", 0, UTF );                  \
+        av_push_simple( trie_words, tmp );                             \
+    });                                                         \
+                                                                \
+    curword++;                                                  \
+    trie->wordinfo[curword].prev   = 0;                         \
+    trie->wordinfo[curword].len    = wordlen;                   \
+    trie->wordinfo[curword].accept = state;                     \
+                                                                \
+    if ( noper_next < tail ) {                                  \
+        if (!trie->jump)                                        \
+            trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, \
+                                                 sizeof(U16) ); \
+        trie->jump[curword] = (U16)(noper_next - convert);      \
+        if (!jumper)                                            \
+            jumper = noper_next;                                \
+        if (!nextbranch)                                        \
+            nextbranch= regnext(cur);                           \
+    }                                                           \
+                                                                \
+    if ( dupe ) {                                               \
+        /* It's a dupe. Pre-insert into the wordinfo[].prev   */\
+        /* chain, so that when the bits of chain are later    */\
+        /* linked together, the dups appear in the chain      */\
+        trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
+        trie->wordinfo[dupe].prev = curword;                    \
+    } else {                                                    \
+        /* we haven't inserted this word yet.                */ \
+        trie->states[ state ].wordnum = curword;                \
+    }                                                           \
+} STMT_END
+
+
+#define TRIE_TRANS_STATE(state,base,ucharcount,charid,special)          \
+     ( ( base + charid >=  ucharcount                                   \
+         && base + charid < ubound                                      \
+         && state == trie->trans[ base - ucharcount + charid ].check    \
+         && trie->trans[ base - ucharcount + charid ].next )            \
+           ? trie->trans[ base - ucharcount + charid ].next             \
+           : ( state==1 ? special : 0 )                                 \
+      )
+
+#define TRIE_BITMAP_SET_FOLDED(trie, uvc, folder)           \
+STMT_START {                                                \
+    TRIE_BITMAP_SET(trie, uvc);                             \
+    /* store the folded codepoint */                        \
+    if ( folder )                                           \
+        TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);           \
+                                                            \
+    if ( !UTF ) {                                           \
+        /* store first byte of utf8 representation of */    \
+        /* variant codepoints */                            \
+        if (! UVCHR_IS_INVARIANT(uvc)) {                    \
+            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));   \
+        }                                                   \
+    }                                                       \
+} STMT_END
+
+I32
+Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
+                  regnode *first, regnode *last, regnode *tail,
+                  U32 word_count, U32 flags, U32 depth)
+{
+    /* first pass, loop through and scan words */
+    reg_trie_data *trie;
+    HV *widecharmap = NULL;
+    AV *revcharmap = newAV();
+    regnode *cur;
+    STRLEN len = 0;
+    UV uvc = 0;
+    U16 curword = 0;
+    U32 next_alloc = 0;
+    regnode *jumper = NULL;
+    regnode *nextbranch = NULL;
+    regnode *convert = NULL;
+    U32 *prev_states; /* temp array mapping each state to previous one */
+    /* we just use folder as a flag in utf8 */
+    const U8 * folder = NULL;
+
+    /* in the below reg_add_data call we are storing either 'tu' or 'tuaa'
+     * which stands for one trie structure, one hash, optionally followed
+     * by two arrays */
+#ifdef DEBUGGING
+    const U32 data_slot = reg_add_data( pRExC_state, STR_WITH_LEN("tuaa"));
+    AV *trie_words = NULL;
+    /* along with revcharmap, this only used during construction but both are
+     * useful during debugging so we store them in the struct when debugging.
+     */
+#else
+    const U32 data_slot = reg_add_data( pRExC_state, STR_WITH_LEN("tu"));
+    STRLEN trie_charcount=0;
+#endif
+    SV *re_trie_maxbuff;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_MAKE_TRIE;
+#ifndef DEBUGGING
+    PERL_UNUSED_ARG(depth);
+#endif
+
+    switch (flags) {
+        case EXACT: case EXACT_REQ8: case EXACTL: break;
+        case EXACTFAA:
+        case EXACTFUP:
+        case EXACTFU:
+        case EXACTFLU8: folder = PL_fold_latin1; break;
+        case EXACTF:  folder = PL_fold; break;
+        default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, REGNODE_NAME(flags) );
+    }
+
+    trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
+    trie->refcount = 1;
+    trie->startstate = 1;
+    trie->wordcount = word_count;
+    RExC_rxi->data->data[ data_slot ] = (void*)trie;
+    trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
+    if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL)
+        trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
+    trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
+                       trie->wordcount+1, sizeof(reg_trie_wordinfo));
+
+    DEBUG_r({
+        trie_words = newAV();
+    });
+
+    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
+    assert(re_trie_maxbuff);
+    if (!SvIOK(re_trie_maxbuff)) {
+        sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
+    }
+    DEBUG_TRIE_COMPILE_r({
+        Perl_re_indentf( aTHX_
+          "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
+          depth+1,
+          REG_NODE_NUM(startbranch), REG_NODE_NUM(first),
+          REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth);
+    });
+
+   /* Find the node we are going to overwrite */
+    if ( first == startbranch && OP( last ) != BRANCH ) {
+        /* whole branch chain */
+        convert = first;
+    } else {
+        /* branch sub-chain */
+        convert = REGNODE_AFTER( first );
+    }
+
+    /*  -- First loop and Setup --
+
+       We first traverse the branches and scan each word to determine if it
+       contains widechars, and how many unique chars there are, this is
+       important as we have to build a table with at least as many columns as we
+       have unique chars.
+
+       We use an array of integers to represent the character codes 0..255
+       (trie->charmap) and we use a an HV* to store Unicode characters. We use
+       the native representation of the character value as the key and IV's for
+       the coded index.
+
+       *TODO* If we keep track of how many times each character is used we can
+       remap the columns so that the table compression later on is more
+       efficient in terms of memory by ensuring the most common value is in the
+       middle and the least common are on the outside.  IMO this would be better
+       than a most to least common mapping as theres a decent chance the most
+       common letter will share a node with the least common, meaning the node
+       will not be compressible. With a middle is most common approach the worst
+       case is when we have the least common nodes twice.
+
+     */
+
+    for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
+        regnode *noper = REGNODE_AFTER( cur );
+        const U8 *uc;
+        const U8 *e;
+        int foldlen = 0;
+        U32 wordlen      = 0;         /* required init */
+        STRLEN minchars = 0;
+        STRLEN maxchars = 0;
+        bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the
+                                               bitmap?*/
+
+        if (OP(noper) == NOTHING) {
+            /* skip past a NOTHING at the start of an alternation
+             * eg, /(?:)a|(?:b)/ should be the same as /a|b/
+             *
+             * If the next node is not something we are supposed to process
+             * we will just ignore it due to the condition guarding the
+             * next block.
+             */
+
+            regnode *noper_next= regnext(noper);
+            if (noper_next < tail)
+                noper= noper_next;
+        }
+
+        if (    noper < tail
+            && (    OP(noper) == flags
+                || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
+                                         || OP(noper) == EXACTFUP))))
+        {
+            uc= (U8*)STRING(noper);
+            e= uc + STR_LEN(noper);
+        } else {
+            trie->minlen= 0;
+            continue;
+        }
+
+
+        if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
+            TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
+                                          regardless of encoding */
+            if (OP( noper ) == EXACTFUP) {
+                /* false positives are ok, so just set this */
+                TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
+            }
+        }
+
+        for ( ; uc < e ; uc += len ) {  /* Look at each char in the current
+                                           branch */
+            TRIE_CHARCOUNT(trie)++;
+            TRIE_READ_CHAR;
+
+            /* TRIE_READ_CHAR returns the current character, or its fold if /i
+             * is in effect.  Under /i, this character can match itself, or
+             * anything that folds to it.  If not under /i, it can match just
+             * itself.  Most folds are 1-1, for example k, K, and KELVIN SIGN
+             * all fold to k, and all are single characters.   But some folds
+             * expand to more than one character, so for example LATIN SMALL
+             * LIGATURE FFI folds to the three character sequence 'ffi'.  If
+             * the string beginning at 'uc' is 'ffi', it could be matched by
+             * three characters, or just by the one ligature character. (It
+             * could also be matched by two characters: LATIN SMALL LIGATURE FF
+             * followed by 'i', or by 'f' followed by LATIN SMALL LIGATURE FI).
+             * (Of course 'I' and/or 'F' instead of 'i' and 'f' can also
+             * match.)  The trie needs to know the minimum and maximum number
+             * of characters that could match so that it can use size alone to
+             * quickly reject many match attempts.  The max is simple: it is
+             * the number of folded characters in this branch (since a fold is
+             * never shorter than what folds to it. */
+
+            maxchars++;
+
+            /* And the min is equal to the max if not under /i (indicated by
+             * 'folder' being NULL), or there are no multi-character folds.  If
+             * there is a multi-character fold, the min is incremented just
+             * once, for the character that folds to the sequence.  Each
+             * character in the sequence needs to be added to the list below of
+             * characters in the trie, but we count only the first towards the
+             * min number of characters needed.  This is done through the
+             * variable 'foldlen', which is returned by the macros that look
+             * for these sequences as the number of bytes the sequence
+             * occupies.  Each time through the loop, we decrement 'foldlen' by
+             * how many bytes the current char occupies.  Only when it reaches
+             * 0 do we increment 'minchars' or look for another multi-character
+             * sequence. */
+            if (folder == NULL) {
+                minchars++;
+            }
+            else if (foldlen > 0) {
+                foldlen -= (UTF) ? UTF8SKIP(uc) : 1;
+            }
+            else {
+                minchars++;
+
+                /* See if *uc is the beginning of a multi-character fold.  If
+                 * so, we decrement the length remaining to look at, to account
+                 * for the current character this iteration.  (We can use 'uc'
+                 * instead of the fold returned by TRIE_READ_CHAR because the
+                 * macro is smart enough to account for any unfolded
+                 * characters. */
+                if (UTF) {
+                    if ((foldlen = is_MULTI_CHAR_FOLD_utf8_safe(uc, e))) {
+                        foldlen -= UTF8SKIP(uc);
+                    }
+                }
+                else if ((foldlen = is_MULTI_CHAR_FOLD_latin1_safe(uc, e))) {
+                    foldlen--;
+                }
+            }
+
+            /* The current character (and any potential folds) should be added
+             * to the possible matching characters for this position in this
+             * branch */
+            if ( uvc < 256 ) {
+                if ( folder ) {
+                    U8 folded= folder[ (U8) uvc ];
+                    if ( !trie->charmap[ folded ] ) {
+                        trie->charmap[ folded ]=( ++trie->uniquecharcount );
+                        TRIE_STORE_REVCHAR( folded );
+                    }
+                }
+                if ( !trie->charmap[ uvc ] ) {
+                    trie->charmap[ uvc ]=( ++trie->uniquecharcount );
+                    TRIE_STORE_REVCHAR( uvc );
+                }
+                if ( set_bit ) {
+                    /* store the codepoint in the bitmap, and its folded
+                     * equivalent. */
+                    TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
+                    set_bit = 0; /* We've done our bit :-) */
+                }
+            } else {
+
+                /* XXX We could come up with the list of code points that fold
+                 * to this using PL_utf8_foldclosures, except not for
+                 * multi-char folds, as there may be multiple combinations
+                 * there that could work, which needs to wait until runtime to
+                 * resolve (The comment about LIGATURE FFI above is such an
+                 * example */
+
+                SV** svpp;
+                if ( !widecharmap )
+                    widecharmap = newHV();
+
+                svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
+
+                if ( !svpp )
+                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
+
+                if ( !SvTRUE( *svpp ) ) {
+                    sv_setiv( *svpp, ++trie->uniquecharcount );
+                    TRIE_STORE_REVCHAR(uvc);
+                }
+            }
+        } /* end loop through characters in this branch of the trie */
+
+        /* We take the min and max for this branch and combine to find the min
+         * and max for all branches processed so far */
+        if( cur == first ) {
+            trie->minlen = minchars;
+            trie->maxlen = maxchars;
+        } else if (minchars < trie->minlen) {
+            trie->minlen = minchars;
+        } else if (maxchars > trie->maxlen) {
+            trie->maxlen = maxchars;
+        }
+    } /* end first pass */
+    DEBUG_TRIE_COMPILE_r(
+        Perl_re_indentf( aTHX_
+                "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
+                depth+1,
+                ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
+                (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
+                (int)trie->minlen, (int)trie->maxlen )
+    );
+
+    /*
+        We now know what we are dealing with in terms of unique chars and
+        string sizes so we can calculate how much memory a naive
+        representation using a flat table  will take. If it's over a reasonable
+        limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
+        conservative but potentially much slower representation using an array
+        of lists.
+
+        At the end we convert both representations into the same compressed
+        form that will be used in regexec.c for matching with. The latter
+        is a form that cannot be used to construct with but has memory
+        properties similar to the list form and access properties similar
+        to the table form making it both suitable for fast searches and
+        small enough that its feasable to store for the duration of a program.
+
+        See the comment in the code where the compressed table is produced
+        inplace from the flat tabe representation for an explanation of how
+        the compression works.
+
+    */
+
+
+    Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32);
+    prev_states[1] = 0;
+
+    if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1)
+                                                    > SvIV(re_trie_maxbuff) )
+    {
+        /*
+            Second Pass -- Array Of Lists Representation
+
+            Each state will be represented by a list of charid:state records
+            (reg_trie_trans_le) the first such element holds the CUR and LEN
+            points of the allocated array. (See defines above).
+
+            We build the initial structure using the lists, and then convert
+            it into the compressed table form which allows faster lookups
+            (but cant be modified once converted).
+        */
+
+        STRLEN transcount = 1;
+
+        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using list compiler\n",
+            depth+1));
+
+        trie->states = (reg_trie_state *)
+            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+                                  sizeof(reg_trie_state) );
+        TRIE_LIST_NEW(1);
+        next_alloc = 2;
+
+        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
+
+            regnode *noper   = REGNODE_AFTER( cur );
+            U32 state        = 1;         /* required init */
+            U16 charid       = 0;         /* sanity init */
+            U32 wordlen      = 0;         /* required init */
+
+            if (OP(noper) == NOTHING) {
+                regnode *noper_next= regnext(noper);
+                if (noper_next < tail)
+                    noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
+            }
+
+            if (    noper < tail
+                && (    OP(noper) == flags
+                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
+                                             || OP(noper) == EXACTFUP))))
+            {
+                const U8 *uc= (U8*)STRING(noper);
+                const U8 *e= uc + STR_LEN(noper);
+
+                for ( ; uc < e ; uc += len ) {
+
+                    TRIE_READ_CHAR;
+
+                    if ( uvc < 256 ) {
+                        charid = trie->charmap[ uvc ];
+                    } else {
+                        SV** const svpp = hv_fetch( widecharmap,
+                                                    (char*)&uvc,
+                                                    sizeof( UV ),
+                                                    0);
+                        if ( !svpp ) {
+                            charid = 0;
+                        } else {
+                            charid=(U16)SvIV( *svpp );
+                        }
+                    }
+                    /* charid is now 0 if we dont know the char read, or
+                     * nonzero if we do */
+                    if ( charid ) {
+
+                        U16 check;
+                        U32 newstate = 0;
+
+                        charid--;
+                        if ( !trie->states[ state ].trans.list ) {
+                            TRIE_LIST_NEW( state );
+                        }
+                        for ( check = 1;
+                              check <= TRIE_LIST_USED( state );
+                              check++ )
+                        {
+                            if ( TRIE_LIST_ITEM( state, check ).forid
+                                                                    == charid )
+                            {
+                                newstate = TRIE_LIST_ITEM( state, check ).newstate;
+                                break;
+                            }
+                        }
+                        if ( ! newstate ) {
+                            newstate = next_alloc++;
+                            prev_states[newstate] = state;
+                            TRIE_LIST_PUSH( state, charid, newstate );
+                            transcount++;
+                        }
+                        state = newstate;
+                    } else {
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
+                    }
+                }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD()
+                */
+                noper= REGNODE_AFTER(cur);
+            }
+            TRIE_HANDLE_WORD(state);
+
+        } /* end second pass */
+
+        /* next alloc is the NEXT state to be allocated */
+        trie->statecount = next_alloc;
+        trie->states = (reg_trie_state *)
+            PerlMemShared_realloc( trie->states,
+                                   next_alloc
+                                   * sizeof(reg_trie_state) );
+
+        /* and now dump it out before we compress it */
+        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
+                                                         revcharmap, next_alloc,
+                                                         depth+1)
+        );
+
+        trie->trans = (reg_trie_trans *)
+            PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
+        {
+            U32 state;
+            U32 tp = 0;
+            U32 zp = 0;
+
+
+            for( state=1 ; state < next_alloc ; state ++ ) {
+                U32 base=0;
+
+                /*
+                DEBUG_TRIE_COMPILE_MORE_r(
+                    Perl_re_printf( aTHX_  "tp: %d zp: %d ",tp,zp)
+                );
+                */
+
+                if (trie->states[state].trans.list) {
+                    U16 minid=TRIE_LIST_ITEM( state, 1).forid;
+                    U16 maxid=minid;
+                    U16 idx;
+
+                    for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
+                        const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
+                        if ( forid < minid ) {
+                            minid=forid;
+                        } else if ( forid > maxid ) {
+                            maxid=forid;
+                        }
+                    }
+                    if ( transcount < tp + maxid - minid + 1) {
+                        transcount *= 2;
+                        trie->trans = (reg_trie_trans *)
+                            PerlMemShared_realloc( trie->trans,
+                                                     transcount
+                                                     * sizeof(reg_trie_trans) );
+                        Zero( trie->trans + (transcount / 2),
+                              transcount / 2,
+                              reg_trie_trans );
+                    }
+                    base = trie->uniquecharcount + tp - minid;
+                    if ( maxid == minid ) {
+                        U32 set = 0;
+                        for ( ; zp < tp ; zp++ ) {
+                            if ( ! trie->trans[ zp ].next ) {
+                                base = trie->uniquecharcount + zp - minid;
+                                trie->trans[ zp ].next = TRIE_LIST_ITEM( state,
+                                                                   1).newstate;
+                                trie->trans[ zp ].check = state;
+                                set = 1;
+                                break;
+                            }
+                        }
+                        if ( !set ) {
+                            trie->trans[ tp ].next = TRIE_LIST_ITEM( state,
+                                                                   1).newstate;
+                            trie->trans[ tp ].check = state;
+                            tp++;
+                            zp = tp;
+                        }
+                    } else {
+                        for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
+                            const U32 tid = base
+                                           - trie->uniquecharcount
+                                           + TRIE_LIST_ITEM( state, idx ).forid;
+                            trie->trans[ tid ].next = TRIE_LIST_ITEM( state,
+                                                                idx ).newstate;
+                            trie->trans[ tid ].check = state;
+                        }
+                        tp += ( maxid - minid + 1 );
+                    }
+                    Safefree(trie->states[ state ].trans.list);
+                }
+                /*
+                DEBUG_TRIE_COMPILE_MORE_r(
+                    Perl_re_printf( aTHX_  " base: %d\n",base);
+                );
+                */
+                trie->states[ state ].trans.base=base;
+            }
+            trie->lasttrans = tp + 1;
+        }
+    } else {
+        /*
+           Second Pass -- Flat Table Representation.
+
+           we dont use the 0 slot of either trans[] or states[] so we add 1 to
+           each.  We know that we will need Charcount+1 trans at most to store
+           the data (one row per char at worst case) So we preallocate both
+           structures assuming worst case.
+
+           We then construct the trie using only the .next slots of the entry
+           structs.
+
+           We use the .check field of the first entry of the node temporarily
+           to make compression both faster and easier by keeping track of how
+           many non zero fields are in the node.
+
+           Since trans are numbered from 1 any 0 pointer in the table is a FAIL
+           transition.
+
+           There are two terms at use here: state as a TRIE_NODEIDX() which is
+           a number representing the first entry of the node, and state as a
+           TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1)
+           and TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3)
+           if there are 2 entrys per node. eg:
+
+             A B       A B
+          1. 2 4    1. 3 7
+          2. 0 3    3. 0 5
+          3. 0 0    5. 0 0
+          4. 0 0    7. 0 0
+
+           The table is internally in the right hand, idx form. However as we
+           also have to deal with the states array which is indexed by nodenum
+           we have to use TRIE_NODENUM() to convert.
+
+        */
+        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using table compiler\n",
+            depth+1));
+
+        trie->trans = (reg_trie_trans *)
+            PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
+                                  * trie->uniquecharcount + 1,
+                                  sizeof(reg_trie_trans) );
+        trie->states = (reg_trie_state *)
+            PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+                                  sizeof(reg_trie_state) );
+        next_alloc = trie->uniquecharcount + 1;
+
+
+        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
+
+            regnode *noper   = REGNODE_AFTER( cur );
+
+            U32 state        = 1;         /* required init */
+
+            U16 charid       = 0;         /* sanity init */
+            U32 accept_state = 0;         /* sanity init */
+
+            U32 wordlen      = 0;         /* required init */
+
+            if (OP(noper) == NOTHING) {
+                regnode *noper_next= regnext(noper);
+                if (noper_next < tail)
+                    noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
+            }
+
+            if (    noper < tail
+                && (    OP(noper) == flags
+                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
+                                             || OP(noper) == EXACTFUP))))
+            {
+                const U8 *uc= (U8*)STRING(noper);
+                const U8 *e= uc + STR_LEN(noper);
+
+                for ( ; uc < e ; uc += len ) {
+
+                    TRIE_READ_CHAR;
+
+                    if ( uvc < 256 ) {
+                        charid = trie->charmap[ uvc ];
+                    } else {
+                        SV* const * const svpp = hv_fetch( widecharmap,
+                                                           (char*)&uvc,
+                                                           sizeof( UV ),
+                                                           0);
+                        charid = svpp ? (U16)SvIV(*svpp) : 0;
+                    }
+                    if ( charid ) {
+                        charid--;
+                        if ( !trie->trans[ state + charid ].next ) {
+                            trie->trans[ state + charid ].next = next_alloc;
+                            trie->trans[ state ].check++;
+                            prev_states[TRIE_NODENUM(next_alloc)]
+                                    = TRIE_NODENUM(state);
+                            next_alloc += trie->uniquecharcount;
+                        }
+                        state = trie->trans[ state + charid ].next;
+                    } else {
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
+                    }
+                    /* charid is now 0 if we dont know the char read, or
+                     * nonzero if we do */
+                }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD().
+                */
+                noper= REGNODE_AFTER(cur);
+            }
+            accept_state = TRIE_NODENUM( state );
+            TRIE_HANDLE_WORD(accept_state);
+
+        } /* end second pass */
+
+        /* and now dump it out before we compress it */
+        DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
+                                                          revcharmap,
+                                                          next_alloc, depth+1));
+
+        {
+        /*
+           * Inplace compress the table.*
+
+           For sparse data sets the table constructed by the trie algorithm will
+           be mostly 0/FAIL transitions or to put it another way mostly empty.
+           (Note that leaf nodes will not contain any transitions.)
+
+           This algorithm compresses the tables by eliminating most such
+           transitions, at the cost of a modest bit of extra work during lookup:
+
+           - Each states[] entry contains a .base field which indicates the
+           index in the state[] array wheres its transition data is stored.
+
+           - If .base is 0 there are no valid transitions from that node.
+
+           - If .base is nonzero then charid is added to it to find an entry in
+           the trans array.
+
+           -If trans[states[state].base+charid].check!=state then the
+           transition is taken to be a 0/Fail transition. Thus if there are fail
+           transitions at the front of the node then the .base offset will point
+           somewhere inside the previous nodes data (or maybe even into a node
+           even earlier), but the .check field determines if the transition is
+           valid.
+
+           XXX - wrong maybe?
+           The following process inplace converts the table to the compressed
+           table: We first do not compress the root node 1,and mark all its
+           .check pointers as 1 and set its .base pointer as 1 as well. This
+           allows us to do a DFA construction from the compressed table later,
+           and ensures that any .base pointers we calculate later are greater
+           than 0.
+
+           - We set 'pos' to indicate the first entry of the second node.
+
+           - We then iterate over the columns of the node, finding the first and
+           last used entry at l and m. We then copy l..m into pos..(pos+m-l),
+           and set the .check pointers accordingly, and advance pos
+           appropriately and repreat for the next node. Note that when we copy
+           the next pointers we have to convert them from the original
+           NODEIDX form to NODENUM form as the former is not valid post
+           compression.
+
+           - If a node has no transitions used we mark its base as 0 and do not
+           advance the pos pointer.
+
+           - If a node only has one transition we use a second pointer into the
+           structure to fill in allocated fail transitions from other states.
+           This pointer is independent of the main pointer and scans forward
+           looking for null transitions that are allocated to a state. When it
+           finds one it writes the single transition into the "hole".  If the
+           pointer doesnt find one the single transition is appended as normal.
+
+           - Once compressed we can Renew/realloc the structures to release the
+           excess space.
+
+           See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
+           specifically Fig 3.47 and the associated pseudocode.
+
+           demq
+        */
+        const U32 laststate = TRIE_NODENUM( next_alloc );
+        U32 state, charid;
+        U32 pos = 0, zp=0;
+        trie->statecount = laststate;
+
+        for ( state = 1 ; state < laststate ; state++ ) {
+            U8 flag = 0;
+            const U32 stateidx = TRIE_NODEIDX( state );
+            const U32 o_used = trie->trans[ stateidx ].check;
+            U32 used = trie->trans[ stateidx ].check;
+            trie->trans[ stateidx ].check = 0;
+
+            for ( charid = 0;
+                  used && charid < trie->uniquecharcount;
+                  charid++ )
+            {
+                if ( flag || trie->trans[ stateidx + charid ].next ) {
+                    if ( trie->trans[ stateidx + charid ].next ) {
+                        if (o_used == 1) {
+                            for ( ; zp < pos ; zp++ ) {
+                                if ( ! trie->trans[ zp ].next ) {
+                                    break;
+                                }
+                            }
+                            trie->states[ state ].trans.base
+                                                    = zp
+                                                      + trie->uniquecharcount
+                                                      - charid ;
+                            trie->trans[ zp ].next
+                                = SAFE_TRIE_NODENUM( trie->trans[ stateidx
+                                                             + charid ].next );
+                            trie->trans[ zp ].check = state;
+                            if ( ++zp > pos ) pos = zp;
+                            break;
+                        }
+                        used--;
+                    }
+                    if ( !flag ) {
+                        flag = 1;
+                        trie->states[ state ].trans.base
+                                       = pos + trie->uniquecharcount - charid ;
+                    }
+                    trie->trans[ pos ].next
+                        = SAFE_TRIE_NODENUM(
+                                       trie->trans[ stateidx + charid ].next );
+                    trie->trans[ pos ].check = state;
+                    pos++;
+                }
+            }
+        }
+        trie->lasttrans = pos + 1;
+        trie->states = (reg_trie_state *)
+            PerlMemShared_realloc( trie->states, laststate
+                                   * sizeof(reg_trie_state) );
+        DEBUG_TRIE_COMPILE_MORE_r(
+            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
+                depth+1,
+                (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
+                       + 1 ),
+                (IV)next_alloc,
+                (IV)pos,
+                ( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
+            );
+
+        } /* end table compress */
+    }
+    DEBUG_TRIE_COMPILE_MORE_r(
+            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
+                depth+1,
+                (UV)trie->statecount,
+                (UV)trie->lasttrans)
+    );
+    /* resize the trans array to remove unused space */
+    trie->trans = (reg_trie_trans *)
+        PerlMemShared_realloc( trie->trans, trie->lasttrans
+                               * sizeof(reg_trie_trans) );
+
+    {   /* Modify the program and insert the new TRIE node */
+        U8 nodetype =(U8) flags;
+        char *str=NULL;
+
+#ifdef DEBUGGING
+        regnode *optimize = NULL;
+#endif /* DEBUGGING */
+        /*
+           This means we convert either the first branch or the first Exact,
+           depending on whether the thing following (in 'last') is a branch
+           or not and whther first is the startbranch (ie is it a sub part of
+           the alternation or is it the whole thing.)
+           Assuming its a sub part we convert the EXACT otherwise we convert
+           the whole branch sequence, including the first.
+         */
+        /* Find the node we are going to overwrite */
+        if ( first != startbranch || OP( last ) == BRANCH ) {
+            /* branch sub-chain */
+            NEXT_OFF( first ) = (U16)(last - first);
+            /* whole branch chain */
+        }
+        /* But first we check to see if there is a common prefix we can
+           split out as an EXACT and put in front of the TRIE node.  */
+        trie->startstate= 1;
+        if ( trie->bitmap && !widecharmap && !trie->jump  ) {
+            /* we want to find the first state that has more than
+             * one transition, if that state is not the first state
+             * then we have a common prefix which we can remove.
+             */
+            U32 state;
+            for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
+                U32 ofs = 0;
+                I32 first_ofs = -1; /* keeps track of the ofs of the first
+                                       transition, -1 means none */
+                U32 count = 0;
+                const U32 base = trie->states[ state ].trans.base;
+
+                /* does this state terminate an alternation? */
+                if ( trie->states[state].wordnum )
+                        count = 1;
+
+                for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
+                    if ( ( base + ofs >= trie->uniquecharcount ) &&
+                         ( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
+                         trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
+                    {
+                        if ( ++count > 1 ) {
+                            /* we have more than one transition */
+                            SV **tmp;
+                            U8 *ch;
+                            /* if this is the first state there is no common prefix
+                             * to extract, so we can exit */
+                            if ( state == 1 ) break;
+                            tmp = av_fetch_simple( revcharmap, ofs, 0);
+                            ch = (U8*)SvPV_nolen_const( *tmp );
+
+                            /* if we are on count 2 then we need to initialize the
+                             * bitmap, and store the previous char if there was one
+                             * in it*/
+                            if ( count == 2 ) {
+                                /* clear the bitmap */
+                                Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
+                                DEBUG_OPTIMISE_r(
+                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
+                                        depth+1,
+                                        (UV)state));
+                                if (first_ofs >= 0) {
+                                    SV ** const tmp = av_fetch_simple( revcharmap, first_ofs, 0);
+                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
+
+                                    TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
+                                    DEBUG_OPTIMISE_r(
+                                        Perl_re_printf( aTHX_  "%s", (char*)ch)
+                                    );
+                                }
+                            }
+                            /* store the current firstchar in the bitmap */
+                            TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
+                            DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
+                        }
+                        first_ofs = ofs;
+                    }
+                }
+                if ( count == 1 ) {
+                    /* This state has only one transition, its transition is part
+                     * of a common prefix - we need to concatenate the char it
+                     * represents to what we have so far. */
+                    SV **tmp = av_fetch_simple( revcharmap, first_ofs, 0);
+                    STRLEN len;
+                    char *ch = SvPV( *tmp, len );
+                    DEBUG_OPTIMISE_r({
+                        SV *sv=sv_newmortal();
+                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
+                            depth+1,
+                            (UV)state, (UV)first_ofs,
+                            pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
+                                PL_colors[0], PL_colors[1],
+                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+                                PERL_PV_ESCAPE_FIRSTCHAR
+                            )
+                        );
+                    });
+                    if ( state==1 ) {
+                        OP( convert ) = nodetype;
+                        str=STRING(convert);
+                        setSTR_LEN(convert, 0);
+                    }
+                    assert( ( STR_LEN(convert) + len ) < 256 );
+                    setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
+                    while (len--)
+                        *str++ = *ch++;
+                } else {
+#ifdef DEBUGGING
+                    if (state>1)
+                        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n"));
+#endif
+                    break;
+                }
+            }
+            trie->prefixlen = (state-1);
+            if (str) {
+                regnode *n = REGNODE_AFTER(convert);
+                assert( n - convert <= U16_MAX );
+                NEXT_OFF(convert) = n - convert;
+                trie->startstate = state;
+                trie->minlen -= (state - 1);
+                trie->maxlen -= (state - 1);
+#ifdef DEBUGGING
+               /* At least the UNICOS C compiler choked on this
+                * being argument to DEBUG_r(), so let's just have
+                * it right here. */
+               if (
+#ifdef PERL_EXT_RE_BUILD
+                   1
+#else
+                   DEBUG_r_TEST
+#endif
+                   ) {
+                   U32 word = trie->wordcount;
+                   while (word--) {
+                       SV ** const tmp = av_fetch_simple( trie_words, word, 0 );
+                       if (tmp) {
+                           if ( STR_LEN(convert) <= SvCUR(*tmp) )
+                               sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
+                           else
+                               sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
+                       }
+                   }
+               }
+#endif
+                if (trie->maxlen) {
+                    convert = n;
+                } else {
+                    NEXT_OFF(convert) = (U16)(tail - convert);
+                    DEBUG_r(optimize= n);
+                }
+            }
+        }
+        if (!jumper)
+            jumper = last;
+        if ( trie->maxlen ) {
+            NEXT_OFF( convert ) = (U16)(tail - convert);
+            ARG_SET( convert, data_slot );
+            /* Store the offset to the first unabsorbed branch in
+               jump[0], which is otherwise unused by the jump logic.
+               We use this when dumping a trie and during optimisation. */
+            if (trie->jump)
+                trie->jump[0] = (U16)(nextbranch - convert);
+
+            /* If the start state is not accepting (meaning there is no empty string/NOTHING)
+             *   and there is a bitmap
+             *   and the first "jump target" node we found leaves enough room
+             * then convert the TRIE node into a TRIEC node, with the bitmap
+             * embedded inline in the opcode - this is hypothetically faster.
+             */
+            if ( !trie->states[trie->startstate].wordnum
+                 && trie->bitmap
+                 && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
+            {
+                OP( convert ) = TRIEC;
+                Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
+                PerlMemShared_free(trie->bitmap);
+                trie->bitmap= NULL;
+            } else
+                OP( convert ) = TRIE;
+
+            /* store the type in the flags */
+            convert->flags = nodetype;
+            DEBUG_r({
+            optimize = convert
+                      + NODE_STEP_REGNODE
+                      + REGNODE_ARG_LEN( OP( convert ) );
+            });
+            /* XXX We really should free up the resource in trie now,
+                   as we won't use them - (which resources?) dmq */
+        }
+        /* needed for dumping*/
+        DEBUG_r(if (optimize) {
+            /*
+                Try to clean up some of the debris left after the
+                optimisation.
+             */
+            while( optimize < jumper ) {
+                OP( optimize ) = OPTIMIZED;
+                optimize++;
+            }
+        });
+    } /* end node insert */
+
+    /*  Finish populating the prev field of the wordinfo array.  Walk back
+     *  from each accept state until we find another accept state, and if
+     *  so, point the first word's .prev field at the second word. If the
+     *  second already has a .prev field set, stop now. This will be the
+     *  case either if we've already processed that word's accept state,
+     *  or that state had multiple words, and the overspill words were
+     *  already linked up earlier.
+     */
+    {
+        U16 word;
+        U32 state;
+        U16 prev;
+
+        for (word=1; word <= trie->wordcount; word++) {
+            prev = 0;
+            if (trie->wordinfo[word].prev)
+                continue;
+            state = trie->wordinfo[word].accept;
+            while (state) {
+                state = prev_states[state];
+                if (!state)
+                    break;
+                prev = trie->states[state].wordnum;
+                if (prev)
+                    break;
+            }
+            trie->wordinfo[word].prev = prev;
+        }
+        Safefree(prev_states);
+    }
+
+
+    /* and now dump out the compressed format */
+    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
+
+    RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
+#ifdef DEBUGGING
+    RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
+    RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
+#else
+    SvREFCNT_dec_NN(revcharmap);
+#endif
+    return trie->jump
+           ? MADE_JUMP_TRIE
+           : trie->startstate>1
+             ? MADE_EXACT_TRIE
+             : MADE_TRIE;
+}
+
+regnode *
+Perl_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
+{
+/* The Trie is constructed and compressed now so we can build a fail array if
+ * it's needed
+
+   This is basically the Aho-Corasick algorithm. Its from exercise 3.31 and
+   3.32 in the
+   "Red Dragon" -- Compilers, principles, techniques, and tools. Aho, Sethi,
+   Ullman 1985/88
+   ISBN 0-201-10088-6
+
+   We find the fail state for each state in the trie, this state is the longest
+   proper suffix of the current state's 'word' that is also a proper prefix of
+   another word in our trie. State 1 represents the word '' and is thus the
+   default fail state. This allows the DFA not to have to restart after its
+   tried and failed a word at a given point, it simply continues as though it
+   had been matching the other word in the first place.
+   Consider
+      'abcdgu'=~/abcdefg|cdgu/
+   When we get to 'd' we are still matching the first word, we would encounter
+   'g' which would fail, which would bring us to the state representing 'd' in
+   the second word where we would try 'g' and succeed, proceeding to match
+   'cdgu'.
+ */
+ /* add a fail transition */
+    const U32 trie_offset = ARG(source);
+    reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[trie_offset];
+    U32 *q;
+    const U32 ucharcount = trie->uniquecharcount;
+    const U32 numstates = trie->statecount;
+    const U32 ubound = trie->lasttrans + ucharcount;
+    U32 q_read = 0;
+    U32 q_write = 0;
+    U32 charid;
+    U32 base = trie->states[ 1 ].trans.base;
+    U32 *fail;
+    reg_ac_data *aho;
+    const U32 data_slot = reg_add_data( pRExC_state, STR_WITH_LEN("T"));
+    regnode *stclass;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
+    PERL_UNUSED_CONTEXT;
+#ifndef DEBUGGING
+    PERL_UNUSED_ARG(depth);
+#endif
+
+    if ( OP(source) == TRIE ) {
+        struct regnode_1 *op = (struct regnode_1 *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_1));
+        StructCopy(source, op, struct regnode_1);
+        stclass = (regnode *)op;
+    } else {
+        struct regnode_charclass *op = (struct regnode_charclass *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
+        StructCopy(source, op, struct regnode_charclass);
+        stclass = (regnode *)op;
+    }
+    OP(stclass)+=2; /* convert the TRIE type to its AHO-CORASICK equivalent */
+
+    ARG_SET( stclass, data_slot );
+    aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
+    RExC_rxi->data->data[ data_slot ] = (void*)aho;
+    aho->trie=trie_offset;
+    aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
+    Copy( trie->states, aho->states, numstates, reg_trie_state );
+    Newx( q, numstates, U32);
+    aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
+    aho->refcount = 1;
+    fail = aho->fail;
+    /* initialize fail[0..1] to be 1 so that we always have
+       a valid final fail state */
+    fail[ 0 ] = fail[ 1 ] = 1;
+
+    for ( charid = 0; charid < ucharcount ; charid++ ) {
+        const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
+        if ( newstate ) {
+            q[ q_write ] = newstate;
+            /* set to point at the root */
+            fail[ q[ q_write++ ] ]=1;
+        }
+    }
+    while ( q_read < q_write) {
+        const U32 cur = q[ q_read++ % numstates ];
+        base = trie->states[ cur ].trans.base;
+
+        for ( charid = 0 ; charid < ucharcount ; charid++ ) {
+            const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
+            if (ch_state) {
+                U32 fail_state = cur;
+                U32 fail_base;
+                do {
+                    fail_state = fail[ fail_state ];
+                    fail_base = aho->states[ fail_state ].trans.base;
+                } while ( !TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 ) );
+
+                fail_state = TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 );
+                fail[ ch_state ] = fail_state;
+                if ( !aho->states[ ch_state ].wordnum && aho->states[ fail_state ].wordnum )
+                {
+                        aho->states[ ch_state ].wordnum =  aho->states[ fail_state ].wordnum;
+                }
+                q[ q_write++ % numstates] = ch_state;
+            }
+        }
+    }
+    /* restore fail[0..1] to 0 so that we "fall out" of the AC loop
+       when we fail in state 1, this allows us to use the
+       charclass scan to find a valid start char. This is based on the principle
+       that theres a good chance the string being searched contains lots of stuff
+       that cant be a start char.
+     */
+    fail[ 0 ] = fail[ 1 ] = 0;
+    DEBUG_TRIE_COMPILE_r({
+        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
+                      depth, (UV)numstates
+        );
+        for( q_read=1; q_read<numstates; q_read++ ) {
+            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
+        }
+        Perl_re_printf( aTHX_  "\n");
+    });
+    Safefree(q);
+    /*RExC_seen |= REG_TRIEDFA_SEEN;*/
+    return stclass;
+}
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 00c4983e80..cb17a110dc 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -869,7 +869,7 @@ foreach my $charset (get_supported_code_pages()) {
     $max_PRINT_A = sprintf "0x%02X", $max_PRINT_A;
     print $out_fh <<"EOT";
 
-#   ifdef PERL_IN_REGCOMP_C
+#   ifdef PERL_IN_REGCOMP_ANY
 #     define MAX_PRINT_A  $max_PRINT_A   /* The max code point that isPRINT_A */
 #   endif
 EOT
@@ -931,7 +931,7 @@ $count = 0x110000 - $count;
 print $out_fh <<~"EOT";
 
     /* The number of code points not matching \\pC */
-    #ifdef PERL_IN_REGCOMP_C
+    #ifdef PERL_IN_REGCOMP_ANY
     #  define NON_OTHER_COUNT  $count
     #endif
     EOT
diff --git a/regexec.c b/regexec.c
index 4e396d20c3..f7d66e38c6 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,7 +2,7 @@
  */
 
 /*
- *      One Ring to rule them all, One Ring to find them
+ *	One Ring to rule them all, One Ring to find them
  *
  *     [p.v of _The Lord of the Rings_, opening poem]
  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
@@ -40,22 +40,22 @@
 /*
  * pregcomp and pregexec -- regsub and regerror are not used in perl
  *
- *      Copyright (c) 1986 by University of Toronto.
- *      Written by Henry Spencer.  Not derived from licensed software.
+ *	Copyright (c) 1986 by University of Toronto.
+ *	Written by Henry Spencer.  Not derived from licensed software.
  *
- *      Permission is granted to anyone to use this software for any
- *      purpose on any computer system, and to redistribute it freely,
- *      subject to the following restrictions:
+ *	Permission is granted to anyone to use this software for any
+ *	purpose on any computer system, and to redistribute it freely,
+ *	subject to the following restrictions:
  *
- *      1. The author is not responsible for the consequences of use of
- *              this software, no matter how awful, even if they arise
- *              from defects in it.
+ *	1. The author is not responsible for the consequences of use of
+ *		this software, no matter how awful, even if they arise
+ *		from defects in it.
  *
- *      2. The origin of this software must not be misrepresented, either
- *              by explicit claim or by omission.
+ *	2. The origin of this software must not be misrepresented, either
+ *		by explicit claim or by omission.
  *
- *      3. Altered versions must be plainly marked as such, and must not
- *              be misrepresented as being the original software.
+ *	3. Altered versions must be plainly marked as such, and must not
+ *		be misrepresented as being the original software.
  *
  ****    Alterations to Henry's code are...
  ****
@@ -71,6 +71,7 @@
  * regular-expression syntax might require a total rethink.
  */
 #include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
 #define PERL_IN_REGEXEC_C
 #include "perl.h"
 
@@ -119,7 +120,7 @@ static const char non_utf8_target_but_utf8_required[]
 } STMT_END
 
 #ifndef STATIC
-#define STATIC  static
+#define	STATIC	static
 #endif
 
 /*
@@ -138,8 +139,8 @@ static const char non_utf8_target_but_utf8_required[]
 #define HOPBACK3(pos, off, lim) \
         (reginfo->is_utf8_target                          \
             ? reghopmaybe3((U8*)pos, (SSize_t)0-off, (U8*)(lim)) \
-            : (pos - off >= lim)                                 \
-                ? (U8*)pos - off                                 \
+            : (pos - off >= lim)	                         \
+                ? (U8*)pos - off		                 \
                 : NULL)
 
 #define HOPBACKc(pos, off) ((char*)HOPBACK3(pos, off, reginfo->strbeg))
@@ -167,7 +168,7 @@ static const char non_utf8_target_but_utf8_required[]
     : (U8*)(pos + off))
 #define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
 
-#define PLACEHOLDER     /* Something for the preprocessor to grab onto */
+#define PLACEHOLDER	/* Something for the preprocessor to grab onto */
 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 
 /* for use after a quantifier and before an EXACT-like node -- japhy */
@@ -268,7 +269,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
     PL_savestack_ix += paren_elems_to_push;
 
     DEBUG_BUFFERS_r({
-        I32 p;
+	I32 p;
         for (p = parenfloor + 1; p <= (I32)maxopenparen; p++) {
             Perl_re_exec_indentf(aTHX_
                 "    \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
@@ -441,7 +442,7 @@ S_regcp_restore(pTHX_ regexp *rex, I32 ix, U32 *maxopenparen_p _pDEPTH)
     PL_savestack_ix = tmpix;
 }
 
-#define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
+#define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
 
 STATIC bool
 S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
@@ -889,7 +890,7 @@ Perl_re_intuit_start(pTHX_
     U8   other_ix = 1 - prog->substrs->check_ix;
     bool ml_anch = 0;
     char *other_last = strpos;/* latest pos 'other' substr already checked to */
-    char *check_at = NULL;              /* check substr found at this pos */
+    char *check_at = NULL;		/* check substr found at this pos */
     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
     RXi_GET_DECL(prog,progi);
     regmatch_info reginfo_buf;  /* create some info to pass to find_byclass */
@@ -1067,7 +1068,7 @@ Perl_re_intuit_start(pTHX_
 
     end_shift = prog->check_end_shift;
 
-#ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
+#ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
     if (end_shift < 0)
         Perl_croak(aTHX_ "panic: end_shift: %" IVdf " pattern:\n%s\n ",
                    (IV)end_shift, RX_PRECOMP(rx));
@@ -1306,7 +1307,7 @@ Perl_re_intuit_start(pTHX_
         }
 
         s = HOP3c(rx_origin, other->min_offset, strend);
-        if (s < other_last)     /* These positions already checked */
+        if (s < other_last)	/* These positions already checked */
             s = other_last;
 
         must = utf8_target ? other->utf8_substr : other->substr;
@@ -1665,7 +1666,7 @@ Perl_re_intuit_start(pTHX_
            cannot start at strpos. */
 
         DEBUG_EXECUTE_r(Perl_re_printf( aTHX_  "  try at offset...\n"));
-        ++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr);        /* hooray/5 */
+        ++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
     }
     else {
         /* The found rx_origin position does not prohibit matching at
@@ -1674,11 +1675,11 @@ Perl_re_intuit_start(pTHX_
          * zero, free it.  */
         if (!(prog->intflags & PREGf_NAUGHTY)
             && (utf8_target ? (
-                prog->check_utf8                /* Could be deleted already */
+                prog->check_utf8		/* Could be deleted already */
                 && --BmUSEFUL(prog->check_utf8) < 0
                 && (prog->check_utf8 == prog->float_utf8)
             ) : (
-                prog->check_substr              /* Could be deleted already */
+                prog->check_substr		/* Could be deleted already */
                 && --BmUSEFUL(prog->check_substr) < 0
                 && (prog->check_substr == prog->float_substr)
             )))
@@ -1688,9 +1689,9 @@ Perl_re_intuit_start(pTHX_
             /* XXX Does the destruction order has to change with utf8_target? */
             SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
             SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
-            prog->check_substr = prog->check_utf8 = NULL;       /* disable */
-            prog->float_substr = prog->float_utf8 = NULL;       /* clear */
-            check = NULL;                       /* abort */
+            prog->check_substr = prog->check_utf8 = NULL;	/* disable */
+            prog->float_substr = prog->float_utf8 = NULL;	/* clear */
+            check = NULL;			/* abort */
             /* XXXX This is a remnant of the old implementation.  It
                     looks wasteful, since now INTUIT can use many
                     other heuristics. */
@@ -1704,8 +1705,8 @@ Perl_re_intuit_start(pTHX_
 
     return rx_origin;
 
-  fail_finish:                          /* Substring not found */
-    if (prog->check_substr || prog->check_utf8)         /* could be removed already */
+  fail_finish:				/* Substring not found */
+    if (prog->check_substr || prog->check_utf8)		/* could be removed already */
         BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
   fail:
     DEBUG_EXECUTE_r(Perl_re_printf( aTHX_  "%sMatch rejected by optimizer%s\n",
@@ -2202,8 +2203,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
     const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
 
     char *pat_string;   /* The pattern's exactish string */
-    char *pat_end;          /* ptr to end char of pat_string */
-    re_fold_t folder;   /* Function for computing non-utf8 folds */
+    char *pat_end;	    /* ptr to end char of pat_string */
+    re_fold_t folder;	/* Function for computing non-utf8 folds */
     const U8 *fold_array;   /* array for folding ords < 256 */
     STRLEN ln;
     STRLEN lnc;
@@ -2473,7 +2474,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          * Unicode semantics and the german sharp ss, which hence should not be
          * compiled into a node that gets here. */
         pat_string = STRINGs(c);
-        ln  = STR_LENs(c);      /* length to match in octets/bytes */
+        ln  = STR_LENs(c);	/* length to match in octets/bytes */
 
         /* We know that we have to match at least 'ln' bytes (which is the same
          * as characters, since not utf8).  If we have to match 3 characters,
@@ -2598,7 +2599,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              * can have the same fold, or portion of a fold, or different-
              * length fold */
             pat_string = STRINGs(c);
-            ln  = STR_LENs(c);  /* length to match in octets/bytes */
+            ln  = STR_LENs(c);	/* length to match in octets/bytes */
             pat_end = pat_string + ln;
             lnc = is_utf8_pat       /* length to match in characters */
                   ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -3600,8 +3601,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     char *s;
     regnode *c;
     char *startpos;
-    SSize_t minlen;             /* must match at least this many chars */
-    SSize_t dontbother = 0;     /* how many characters not to try at end */
+    SSize_t minlen;		/* must match at least this many chars */
+    SSize_t dontbother = 0;	/* how many characters not to try at end */
     const bool utf8_target = cBOOL(DO_UTF8(sv));
     I32 multiline;
     RXi_GET_DECL(prog,progi);
@@ -3755,7 +3756,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     RXp_MATCH_TAINTED_off(prog);
     RXp_MATCH_UTF8_set(prog, utf8_target);
 
-    reginfo->prog = rx;  /* Yes, sorry that this is confusing.  */
+    reginfo->prog = rx;	 /* Yes, sorry that this is confusing.  */
     reginfo->intuit = 0;
     reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
     reginfo->warned = FALSE;
@@ -3964,7 +3965,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
         SSize_t back_max;
         SSize_t back_min;
         char *last;
-        char *last1;            /* Last position checked before */
+        char *last1;		/* Last position checked before */
 #ifdef DEBUGGING
         int did_match = 0;
 #endif
@@ -4006,14 +4007,14 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
         if (back_min<0) {
             last = strend;
         } else {
-            last = HOP3c(strend,        /* Cannot start after this */
+            last = HOP3c(strend,	/* Cannot start after this */
                   -(SSize_t)(CHR_SVLEN(must)
                          - (SvTAIL(must) != 0) + back_min), strbeg);
         }
         if (s > reginfo->strbeg)
             last1 = HOPc(s, -1);
         else
-            last1 = s - 1;      /* bogus */
+            last1 = s - 1;	/* bogus */
 
         /* XXXX check_substr already used to find "s", can optimize if
            check_substr==must. */
@@ -4169,7 +4170,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                     if (len)
                         last = rninstr(s, strend, little, little + len);
                     else
-                        last = strend;  /* matching "$" */
+                        last = strend;	/* matching "$" */
             }
             if (!last) {
                 /* at one point this block contained a comment which was
@@ -4188,7 +4189,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
         }
         if (minlen && (dontbother < minlen))
             dontbother = minlen - 1;
-        strend -= dontbother;              /* this one's always in bytes! */
+        strend -= dontbother; 		   /* this one's always in bytes! */
         /* We don't know much -- general case. */
         if (utf8_target) {
             for (;;) {
@@ -4275,16 +4276,16 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
  * Do inc before dec, in case old and new rex are the same */
 #define SET_reg_curpm(Re2)                          \
     if (reginfo->info_aux_eval) {                   \
-        (void)ReREFCNT_inc(Re2);                    \
-        ReREFCNT_dec(PM_GETRE(PL_reg_curpm));       \
-        PM_SETRE((PL_reg_curpm), (Re2));            \
+        (void)ReREFCNT_inc(Re2);		    \
+        ReREFCNT_dec(PM_GETRE(PL_reg_curpm));	    \
+        PM_SETRE((PL_reg_curpm), (Re2));	    \
     }
 
 
 /*
  - regtry - try match at specific point
  */
-STATIC bool                     /* 0 failure, 1 success */
+STATIC bool			/* 0 failure, 1 success */
 S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
 {
     CHECKPOINT lastcp;
@@ -6366,7 +6367,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     /* cache heavy used fields of st in registers */
     regnode *scan;
     regnode *next;
-    U32 n = 0;  /* general value; init to avoid compiler warning */
+    U32 n = 0;	/* general value; init to avoid compiler warning */
     U32 utmp = 0;  /* tmp variable - valid for at most one opcode */
     SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
     SSize_t endref = 0; /* offset of end of backref when ln is start */
@@ -6378,7 +6379,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     PERL_INT_FAST16_T nextbyte;   /* is always set to UCHARAT(locinput), or -1
                                      at EOS */
 
-    bool result = 0;        /* return value of S_regmatch */
+    bool result = 0;	    /* return value of S_regmatch */
     U32 depth = 0;            /* depth of backtrack stack */
     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
     const U32 max_nochange_depth =
@@ -6406,9 +6407,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      * the very next op. They have a useful lifetime of exactly one loop
      * iteration, and are not preserved or restored by state pushes/pops
      */
-    bool sw = 0;            /* the condition value in (?(cond)a|b) */
-    bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
-    int logical = 0;        /* the following EVAL is:
+    bool sw = 0;	    /* the condition value in (?(cond)a|b) */
+    bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
+    int logical = 0;	    /* the following EVAL is:
                                 0: (?{...})
                                 1: (?(?{...})X|Y)
                                 2: (??{...})
@@ -6419,8 +6420,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     PAD* last_pad = NULL;
     dMULTICALL;
     U8 gimme = G_SCALAR;
-    CV *caller_cv = NULL;       /* who called us */
-    CV *last_pushed_cv = NULL;  /* most recently called (?{}) CV */
+    CV *caller_cv = NULL;	/* who called us */
+    CV *last_pushed_cv = NULL;	/* most recently called (?{}) CV */
     U32 maxopenparen = 0;       /* max '(' index seen so far */
     int to_complement;  /* Invert the result? */
     char_class_number_ classnum;
@@ -7790,7 +7791,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
                 /* Match either CR LF  or '.', as all the other possibilities
                  * require utf8 */
-                locinput++;         /* Match the . or CR */
+                locinput++;	    /* Match the . or CR */
                 if (nextbyte == '\r' /* And if it was CR, and the next is LF,
                                        match the LF */
                     && locinput <  loceol
@@ -7924,12 +7925,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             endref = rex->offs[n].end;
             reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
             if (rex->lastparen < n || ln == -1 || endref == -1)
-                sayNO;                  /* Do not match unless seen CLOSEn. */
+                sayNO;			/* Do not match unless seen CLOSEn. */
             if (ln == endref)
                 break;
 
             s = reginfo->strbeg + ln;
-            if (type != REF     /* REF can do byte comparison */
+            if (type != REF	/* REF can do byte comparison */
                 && (utf8_target || type == REFFU || type == REFFL))
             {
                 char * limit = loceol;
@@ -8202,7 +8203,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  * the savestack frame */
                 before = (IV)(SP-PL_stack_base);
                 PL_op = nop;
-                CALLRUNOPS(aTHX);                       /* Scalar context. */
+                CALLRUNOPS(aTHX);			/* Scalar context. */
                 SPAGAIN;
                 if ((IV)(SP-PL_stack_base) == before)
                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
@@ -8674,7 +8675,7 @@ NULL
             ST.B = next;
             ST.minmod = minmod;
             minmod = 0;
-            ST.count = -1;      /* this will be updated by WHILEM */
+            ST.count = -1;	/* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
 
             PUSH_YES_STATE_GOTO(CURLYX_end, REGNODE_BEFORE(next), locinput, loceol,
@@ -8916,13 +8917,13 @@ NULL
 #undef  ST
 #define ST st->u.branch
 
-        case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
+        case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
             next = scan + ARG(scan);
             if (next == scan)
                 next = NULL;
             /* FALLTHROUGH */
 
-        case BRANCH:        /*  /(...|A|...)/ */
+        case BRANCH:	    /*  /(...|A|...)/ */
             scan = REGNODE_AFTER_opcode(scan,state_num); /* scan now points to inner node */
             assert(scan);
             ST.lastparen = rex->lastparen;
@@ -8988,7 +8989,7 @@ NULL
 #undef  ST
 #define ST st->u.curlym
 
-        case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
+        case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
 
             /* This is an optimisation of CURLYX that enables us to push
              * only a single backtracking state, no matter how many matches
@@ -9172,22 +9173,22 @@ NULL
         } \
     }
 
-        case STAR:              /*  /A*B/ where A is width 1 char */
+        case STAR:		/*  /A*B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = 0;
             ST.max = REG_INFTY;
             scan = REGNODE_AFTER_type(scan,tregnode_STAR);
             goto repeat;
 
-        case PLUS:              /*  /A+B/ where A is width 1 char */
+        case PLUS:		/*  /A+B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = 1;
             ST.max = REG_INFTY;
             scan = REGNODE_AFTER_type(scan,tregnode_PLUS);
             goto repeat;
 
-        case CURLYN:            /*  /(A){m,n}B/ where A is width 1 char */
-            ST.paren = scan->flags;     /* Which paren to set */
+        case CURLYN:		/*  /(A){m,n}B/ where A is width 1 char */
+            ST.paren = scan->flags;	/* Which paren to set */
             ST.lastparen      = rex->lastparen;
             ST.lastcloseparen = rex->lastcloseparen;
             if (ST.paren > maxopenparen)
@@ -9208,7 +9209,7 @@ NULL
 
             goto repeat;
 
-        case CURLY:             /*  /A{m,n}B/ where A is width 1 char */
+        case CURLY:		/*  /A{m,n}B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = ARG1(scan);  /* min to match */
             ST.max = ARG2(scan);  /* max to match */
@@ -9471,7 +9472,7 @@ NULL
                 /* we've just finished A in /(??{A})B/; now continue with B */
                 is_accepted= false;
                 SET_RECURSE_LOCINPUT("FAKE-END[before]", CUR_EVAL.prev_recurse_locinput);
-                st->u.eval.prev_rex = rex_sv;           /* inner */
+                st->u.eval.prev_rex = rex_sv;		/* inner */
 
                 /* Save *all* the positions. */
                 st->u.eval.cp = regcppush(rex, 0, maxopenparen);
@@ -9513,9 +9514,9 @@ NULL
                                       (long)(reginfo->till - startpos),
                                       PL_colors[5]));
 
-                sayNO_SILENT;           /* Cannot match: too short. */
+                sayNO_SILENT;		/* Cannot match: too short. */
             }
-            sayYES;                     /* Success! */
+            sayYES;			/* Success! */
 
         case LOOKBEHIND_END: /* validate that *lookbehind* UNLESSM/IFMATCH
                                 matches end at the right spot, required for
@@ -9536,23 +9537,23 @@ NULL
             Perl_re_exec_indentf( aTHX_
                 "%sSUCCEED: subpattern success...%s\n",
                 depth, PL_colors[4], PL_colors[5]));
-            sayYES;                     /* Success! */
+            sayYES;			/* Success! */
 
 #undef  ST
 #define ST st->u.ifmatch
 
-        case SUSPEND:   /* (?>A) */
+        case SUSPEND:	/* (?>A) */
             ST.wanted = 1;
             ST.start = locinput;
             ST.end = loceol;
             ST.count = 1;
             goto do_ifmatch;
 
-        case UNLESSM:   /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
+        case UNLESSM:	/* -ve lookaround: (?!A), or with 'flags', (?<!A) */
             ST.wanted = 0;
             goto ifmatch_trivial_fail_test;
 
-        case IFMATCH:   /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
+        case IFMATCH:	/* +ve lookaround: (?=A), or with 'flags', (?<=A) */
             ST.wanted = 1;
           ifmatch_trivial_fail_test:
             ST.prev_match_end= match_end;
@@ -11739,9 +11740,430 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
 
     return retval;
 }
-
 #endif /* ifndef PERL_IN_XSUB_RE */
 
+/* Buffer logic. */
+SV*
+Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
+                    const U32 flags)
+{
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF;
+
+    PERL_UNUSED_ARG(value);
+
+    if (flags & RXapif_FETCH) {
+        return reg_named_buff_fetch(rx, key, flags);
+    } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
+        Perl_croak_no_modify();
+        return NULL;
+    } else if (flags & RXapif_EXISTS) {
+        return reg_named_buff_exists(rx, key, flags)
+            ? &PL_sv_yes
+            : &PL_sv_no;
+    } else if (flags & RXapif_REGNAMES) {
+        return reg_named_buff_all(rx, flags);
+    } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
+        return reg_named_buff_scalar(rx, flags);
+    } else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
+                         const U32 flags)
+{
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
+    PERL_UNUSED_ARG(lastkey);
+
+    if (flags & RXapif_FIRSTKEY)
+        return reg_named_buff_firstkey(rx, flags);
+    else if (flags & RXapif_NEXTKEY)
+        return reg_named_buff_nextkey(rx, flags);
+    else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter",
+                                            (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
+                          const U32 flags)
+{
+    SV *ret;
+    struct regexp *const rx = ReANY(r);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
+        if (he_str) {
+            IV i;
+            SV* sv_dat=HeVAL(he_str);
+            I32 *nums=(I32*)SvPVX(sv_dat);
+            AV * const retarray = (flags & RXapif_ALL) ? newAV_alloc_x(SvIVX(sv_dat)) : NULL;
+            for ( i=0; i<SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->nparens) >= nums[i]
+                    && rx->offs[nums[i]].start != -1
+                    && rx->offs[nums[i]].end != -1)
+                {
+                    ret = newSVpvs("");
+                    CALLREG_NUMBUF_FETCH(r, nums[i], ret);
+                    if (!retarray)
+                        return ret;
+                } else {
+                    if (retarray)
+                        ret = newSV_type(SVt_NULL);
+                }
+                if (retarray)
+                    av_push_simple(retarray, ret);
+            }
+            if (retarray)
+                return newRV_noinc(MUTABLE_SV(retarray));
+        }
+    }
+    return NULL;
+}
+
+bool
+Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
+                           const U32 flags)
+{
+    struct regexp *const rx = ReANY(r);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        if (flags & RXapif_ALL) {
+            return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
+        } else {
+            SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
+            if (sv) {
+                SvREFCNT_dec_NN(sv);
+                return TRUE;
+            } else {
+                return FALSE;
+            }
+        }
+    } else {
+        return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = ReANY(r);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
+
+    if ( rx && RXp_PAREN_NAMES(rx) ) {
+        (void)hv_iterinit(RXp_PAREN_NAMES(rx));
+
+        return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
+    } else {
+        return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = ReANY(r);
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv = RXp_PAREN_NAMES(rx);
+        HE *temphe;
+        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+                return newSVhek(HeKEY_hek(temphe));
+            }
+        }
+    }
+    return NULL;
+}
+
+SV*
+Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
+{
+    SV *ret;
+    AV *av;
+    SSize_t length;
+    struct regexp *const rx = ReANY(r);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
+            return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
+        } else if (flags & RXapif_ONE) {
+            ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
+            av = MUTABLE_AV(SvRV(ret));
+            length = av_count(av);
+            SvREFCNT_dec_NN(ret);
+            return newSViv(length);
+        } else {
+            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
+                                                (int)flags);
+            return NULL;
+        }
+    }
+    return &PL_sv_undef;
+}
+
+SV*
+Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = ReANY(r);
+    AV *av = newAV();
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv= RXp_PAREN_NAMES(rx);
+        HE *temphe;
+        (void)hv_iterinit(hv);
+        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+                av_push_simple(av, newSVhek(HeKEY_hek(temphe)));
+            }
+        }
+    }
+
+    return newRV_noinc(MUTABLE_SV(av));
+}
+
+void
+Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
+                             SV * const sv)
+{
+    struct regexp *const rx = ReANY(r);
+    char *s = NULL;
+    SSize_t i = 0;
+    SSize_t s1, t1;
+    I32 n = paren;
+
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
+
+    if (      n == RX_BUFF_IDX_CARET_PREMATCH
+           || n == RX_BUFF_IDX_CARET_FULLMATCH
+           || n == RX_BUFF_IDX_CARET_POSTMATCH
+       )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto ret_undef;
+    }
+
+    if (!rx->subbeg)
+        goto ret_undef;
+
+    if (n == RX_BUFF_IDX_CARET_FULLMATCH)
+        /* no need to distinguish between them any more */
+        n = RX_BUFF_IDX_FULLMATCH;
+
+    if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
+        && rx->offs[0].start != -1)
+    {
+        /* $`, ${^PREMATCH} */
+        i = rx->offs[0].start;
+        s = rx->subbeg;
+    }
+    else
+    if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
+        && rx->offs[0].end != -1)
+    {
+        /* $', ${^POSTMATCH} */
+        s = rx->subbeg - rx->suboffset + rx->offs[0].end;
+        i = rx->sublen + rx->suboffset - rx->offs[0].end;
+    }
+    else
+    if (inRANGE(n, 0, (I32)rx->nparens) &&
+        (s1 = rx->offs[n].start) != -1  &&
+        (t1 = rx->offs[n].end) != -1)
+    {
+        /* $&, ${^MATCH},  $1 ... */
+        i = t1 - s1;
+        s = rx->subbeg + s1 - rx->suboffset;
+    } else {
+        goto ret_undef;
+    }
+
+    assert(s >= rx->subbeg);
+    assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
+    if (i >= 0) {
+#ifdef NO_TAINT_SUPPORT
+        sv_setpvn(sv, s, i);
+#else
+        const int oldtainted = TAINT_get;
+        TAINT_NOT;
+        sv_setpvn(sv, s, i);
+        TAINT_set(oldtainted);
+#endif
+        if (RXp_MATCH_UTF8(rx))
+            SvUTF8_on(sv);
+        else
+            SvUTF8_off(sv);
+        if (TAINTING_get) {
+            if (RXp_MATCH_TAINTED(rx)) {
+                if (SvTYPE(sv) >= SVt_PVMG) {
+                    MAGIC* const mg = SvMAGIC(sv);
+                    MAGIC* mgt;
+                    TAINT;
+                    SvMAGIC_set(sv, mg->mg_moremagic);
+                    SvTAINT(sv);
+                    if ((mgt = SvMAGIC(sv))) {
+                        mg->mg_moremagic = mgt;
+                        SvMAGIC_set(sv, mg);
+                    }
+                } else {
+                    TAINT;
+                    SvTAINT(sv);
+                }
+            } else
+                SvTAINTED_off(sv);
+        }
+    } else {
+      ret_undef:
+        sv_set_undef(sv);
+        return;
+    }
+}
+
+void
+Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+                                                         SV const * const value)
+{
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
+
+    PERL_UNUSED_ARG(rx);
+    PERL_UNUSED_ARG(paren);
+    PERL_UNUSED_ARG(value);
+
+    if (!PL_localizing)
+        Perl_croak_no_modify();
+}
+
+I32
+Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
+                              const I32 paren)
+{
+    struct regexp *const rx = ReANY(r);
+    I32 i;
+    I32 s1, t1;
+
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
+
+    if (   paren == RX_BUFF_IDX_CARET_PREMATCH
+        || paren == RX_BUFF_IDX_CARET_FULLMATCH
+        || paren == RX_BUFF_IDX_CARET_POSTMATCH
+    )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto warn_undef;
+    }
+
+    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
+    switch (paren) {
+      case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
+      case RX_BUFF_IDX_PREMATCH:       /* $` */
+        if (rx->offs[0].start != -1) {
+                        i = rx->offs[0].start;
+                        if (i > 0) {
+                                s1 = 0;
+                                t1 = i;
+                                goto getlen;
+                        }
+            }
+        return 0;
+
+      case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
+      case RX_BUFF_IDX_POSTMATCH:       /* $' */
+            if (rx->offs[0].end != -1) {
+                        i = rx->sublen - rx->offs[0].end;
+                        if (i > 0) {
+                                s1 = rx->offs[0].end;
+                                t1 = rx->sublen;
+                                goto getlen;
+                        }
+            }
+        return 0;
+
+      default: /* $& / ${^MATCH}, $1, $2, ... */
+            if (paren <= (I32)rx->nparens &&
+            (s1 = rx->offs[paren].start) != -1 &&
+            (t1 = rx->offs[paren].end) != -1)
+            {
+            i = t1 - s1;
+            goto getlen;
+        } else {
+          warn_undef:
+            if (ckWARN(WARN_UNINITIALIZED))
+                report_uninit((const SV *)sv);
+            return 0;
+        }
+    }
+  getlen:
+    if (i > 0 && RXp_MATCH_UTF8(rx)) {
+        const char * const s = rx->subbeg - rx->suboffset + s1;
+        const U8 *ep;
+        STRLEN el;
+
+        i = t1 - s1;
+        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
+            i = el;
+    }
+    return i;
+}
+
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */
diff --git a/regexp.h b/regexp.h
index 1a0828ae8c..234db6573e 100644
--- a/regexp.h
+++ b/regexp.h
@@ -30,7 +30,7 @@ struct regnode_meta {
 };
 
 struct regnode {
-    U8  flags;
+    U8	flags;
     U8  type;
     U16 next_off;
 };
@@ -47,13 +47,13 @@ struct regexp;
 struct reg_substr_datum {
     SSize_t min_offset; /* min pos (in chars) that substr must appear */
     SSize_t max_offset; /* max pos (in chars) that substr must appear */
-    SV *substr;         /* non-utf8 variant */
-    SV *utf8_substr;    /* utf8 variant */
+    SV *substr;		/* non-utf8 variant */
+    SV *utf8_substr;	/* utf8 variant */
     SSize_t end_shift;  /* how many fixed chars must end the string */
 };
 struct reg_substr_data {
     U8      check_ix;   /* index into data[] of check substr */
-    struct reg_substr_datum data[3];    /* Actual array */
+    struct reg_substr_datum data[3];	/* Actual array */
 };
 
 #  ifdef PERL_ANY_COW
@@ -70,13 +70,13 @@ typedef struct regexp_paren_pair {
     /* 'start_tmp' records a new opening position before the matching end
      * has been found, so that the old start and end values are still
      * valid, e.g.
-     *    "abc" =~ /(.(?{print "[$1]"}))+/
+     *	  "abc" =~ /(.(?{print "[$1]"}))+/
      *outputs [][a][b]
      * This field is not part of the API.  */
     SSize_t start_tmp;
 } regexp_paren_pair;
 
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
+#  if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_UTF8_C)
 #    define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
 #    define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
 
@@ -174,13 +174,13 @@ typedef struct regexp {
 } regexp;
 
 
-#  define RXp_PAREN_NAMES(rx)   ((rx)->paren_names)
+#  define RXp_PAREN_NAMES(rx)	((rx)->paren_names)
 
 /* used for high speed searches */
 typedef struct re_scream_pos_data_s
 {
-    char **scream_olds;         /* match pos */
-    SSize_t *scream_pos;        /* Internal iterator of scream. */
+    char **scream_olds;		/* match pos */
+    SSize_t *scream_pos;	/* Internal iterator of scream. */
 } re_scream_pos_data;
 
 /* regexp_engine structure. This is the dispatch table for regexes.
@@ -302,7 +302,7 @@ and check for NULL.
 
 #  include "op_reg_common.h"
 
-#  define RXf_PMf_STD_PMMOD     (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_NOCAPTURE)
+#  define RXf_PMf_STD_PMMOD	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_NOCAPTURE)
 
 #  define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, x_count)                       \
     case IGNORE_PAT_MOD:    *(pmfl) |= RXf_PMf_FOLD;       break;           \
@@ -372,7 +372,7 @@ and check for NULL.
 #  define INT_PAT_MODS    STD_PAT_MODS    KEEPCOPY_PAT_MODS
 
 #  define EXT_PAT_MODS    ONCE_PAT_MODS   KEEPCOPY_PAT_MODS  NOCAPTURE_PAT_MODS
-#  define QR_PAT_MODS     STD_PAT_MODS    EXT_PAT_MODS     CHARSET_PAT_MODS
+#  define QR_PAT_MODS     STD_PAT_MODS    EXT_PAT_MODS	   CHARSET_PAT_MODS
 #  define M_PAT_MODS      QR_PAT_MODS     LOOP_PAT_MODS
 #  define S_PAT_MODS      M_PAT_MODS      EXEC_PAT_MODS      NONDESTRUCT_PAT_MODS
 
@@ -425,38 +425,38 @@ and check for NULL.
 
 /* What we have seen */
 #  define RXf_NO_INPLACE_SUBST  (1U<<(RXf_BASE_SHIFT+2))
-#  define RXf_EVAL_SEEN         (1U<<(RXf_BASE_SHIFT+3))
+#  define RXf_EVAL_SEEN   	(1U<<(RXf_BASE_SHIFT+3))
 
 /* Special */
 #  define RXf_UNBOUNDED_QUANTIFIER_SEEN   (1U<<(RXf_BASE_SHIFT+4))
-#  define RXf_CHECK_ALL         (1U<<(RXf_BASE_SHIFT+5))
+#  define RXf_CHECK_ALL   	(1U<<(RXf_BASE_SHIFT+5))
 
 /* UTF8 related */
-#  define RXf_MATCH_UTF8        (1U<<(RXf_BASE_SHIFT+6)) /* $1 etc are utf8 */
+#  define RXf_MATCH_UTF8  	(1U<<(RXf_BASE_SHIFT+6)) /* $1 etc are utf8 */
 
 /* Intuit related */
-#  define RXf_USE_INTUIT_NOML   (1U<<(RXf_BASE_SHIFT+7))
-#  define RXf_USE_INTUIT_ML     (1U<<(RXf_BASE_SHIFT+8))
-#  define RXf_INTUIT_TAIL       (1U<<(RXf_BASE_SHIFT+9))
+#  define RXf_USE_INTUIT_NOML	(1U<<(RXf_BASE_SHIFT+7))
+#  define RXf_USE_INTUIT_ML	(1U<<(RXf_BASE_SHIFT+8))
+#  define RXf_INTUIT_TAIL 	(1U<<(RXf_BASE_SHIFT+9))
 #  define RXf_USE_INTUIT        (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
 
 /* Do we have some sort of anchor? */
 #  define RXf_IS_ANCHORED       (1U<<(RXf_BASE_SHIFT+10))
 
 /* Copy and tainted info */
-#  define RXf_COPY_DONE         (1U<<(RXf_BASE_SHIFT+11))
+#  define RXf_COPY_DONE   	(1U<<(RXf_BASE_SHIFT+11))
 
 /* post-execution: $1 et al are tainted */
-#  define RXf_TAINTED_SEEN      (1U<<(RXf_BASE_SHIFT+12))
+#  define RXf_TAINTED_SEEN	(1U<<(RXf_BASE_SHIFT+12))
 /* this pattern was tainted during compilation */
-#  define RXf_TAINTED           (1U<<(RXf_BASE_SHIFT+13))
+#  define RXf_TAINTED		(1U<<(RXf_BASE_SHIFT+13))
 
 /* Flags indicating special patterns */
 #  define RXf_START_ONLY        (1U<<(RXf_BASE_SHIFT+14)) /* Pattern is /^/ */
 #  define RXf_SKIPWHITE         (1U<<(RXf_BASE_SHIFT+15)) /* Pattern is for a */
                                                           /* split " " */
-#  define RXf_WHITE             (1U<<(RXf_BASE_SHIFT+16)) /* Pattern is /\s+/ */
-#  define RXf_NULL              (1U<<(RXf_BASE_SHIFT+17)) /* Pattern is // */
+#  define RXf_WHITE		(1U<<(RXf_BASE_SHIFT+16)) /* Pattern is /\s+/ */
+#  define RXf_NULL		(1U<<(RXf_BASE_SHIFT+17)) /* Pattern is // */
 
 /* See comments at the beginning of these defines about adding bits.  The
  * highest bit position should be used, so that if RXf_BASE_SHIFT gets
@@ -619,35 +619,35 @@ and check for NULL.
                                         on second iteration */
 
 #if defined(PERL_USE_GCC_BRACE_GROUPS)
-#  define ReREFCNT_inc(re)                                              \
-    ({                                                                  \
-        /* This is here to generate a casting warning if incorrect.  */ \
-        REGEXP *const _rerefcnt_inc = (re);                             \
-        assert(SvTYPE(_rerefcnt_inc) == SVt_REGEXP);                    \
-        SvREFCNT_inc(_rerefcnt_inc);                                    \
-        _rerefcnt_inc;                                                  \
+#  define ReREFCNT_inc(re)						\
+    ({									\
+        /* This is here to generate a casting warning if incorrect.  */	\
+        REGEXP *const _rerefcnt_inc = (re);				\
+        assert(SvTYPE(_rerefcnt_inc) == SVt_REGEXP);			\
+        SvREFCNT_inc(_rerefcnt_inc);					\
+        _rerefcnt_inc;							\
     })
-#  define ReREFCNT_dec(re)                                              \
-    ({                                                                  \
-        /* This is here to generate a casting warning if incorrect.  */ \
-        REGEXP *const _rerefcnt_dec = (re);                             \
-        SvREFCNT_dec(_rerefcnt_dec);                                    \
+#  define ReREFCNT_dec(re)						\
+    ({									\
+        /* This is here to generate a casting warning if incorrect.  */	\
+        REGEXP *const _rerefcnt_dec = (re);				\
+        SvREFCNT_dec(_rerefcnt_dec);					\
     })
 #else
-#  define ReREFCNT_dec(re)      SvREFCNT_dec(re)
-#  define ReREFCNT_inc(re)      ((REGEXP *) SvREFCNT_inc(re))
+#  define ReREFCNT_dec(re)	SvREFCNT_dec(re)
+#  define ReREFCNT_inc(re)	((REGEXP *) SvREFCNT_inc(re))
 #endif
-#define ReANY(re)               Perl_ReANY((const REGEXP *)(re))
+#define ReANY(re)		Perl_ReANY((const REGEXP *)(re))
 
 /* FIXME for plugins. */
 
-#define FBMcf_TAIL_DOLLAR       1
-#define FBMcf_TAIL_DOLLARM      2
-#define FBMcf_TAIL_Z            4
-#define FBMcf_TAIL_z            8
-#define FBMcf_TAIL              (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z)
+#define FBMcf_TAIL_DOLLAR	1
+#define FBMcf_TAIL_DOLLARM	2
+#define FBMcf_TAIL_Z		4
+#define FBMcf_TAIL_z		8
+#define FBMcf_TAIL		(FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z)
 
-#define FBMrf_MULTILINE 1
+#define FBMrf_MULTILINE	1
 
 struct regmatch_state;
 struct regmatch_slab;
@@ -681,7 +681,7 @@ typedef struct {
     regmatch_info_aux_eval *info_aux_eval;
     struct regmatch_state *old_regmatch_state; /* saved PL_regmatch_state */
     struct regmatch_slab  *old_regmatch_slab;  /* saved PL_regmatch_slab */
-    char *poscache;     /* S-L cache of fail positions of WHILEMs */
+    char *poscache;	/* S-L cache of fail positions of WHILEMs */
 } regmatch_info_aux;
 
 
@@ -753,8 +753,8 @@ struct next_matchable_info {
 typedef I32 CHECKPOINT;
 
 typedef struct regmatch_state {
-    int resume_state;           /* where to jump to on return */
-    char *locinput;             /* where to backtrack in string on failure */
+    int resume_state;		/* where to jump to on return */
+    char *locinput;		/* where to backtrack in string on failure */
     char *loceol;
     U8 *sr0;                    /* position of start of script run, or NULL */
 
@@ -819,14 +819,14 @@ typedef struct regmatch_state {
             U32 lastcloseparen;
             CHECKPOINT cp;
 
-            U32         accepted; /* how many accepting states left */
-            bool        longfold;/* saw a fold with a 1->n char mapping */
+            U32		accepted; /* how many accepting states left */
+            bool	longfold;/* saw a fold with a 1->n char mapping */
             U16         *jump;  /* positive offsets from me */
-            regnode     *me;    /* Which node am I - needed for jump tries*/
-            U8          *firstpos;/* pos in string of first trie match */
-            U32         firstchars;/* len in chars of firstpos from start */
-            U16         nextword;/* next word to try */
-            U16         topword; /* longest accepted word */
+            regnode	*me;	/* Which node am I - needed for jump tries*/
+            U8		*firstpos;/* pos in string of first trie match */
+            U32		firstchars;/* len in chars of firstpos from start */
+            U16		nextword;/* next word to try */
+            U16		topword; /* longest accepted word */
         } trie;
 
         /* special types - these members are used to store state for special
@@ -836,11 +836,11 @@ typedef struct regmatch_state {
             struct regmatch_state *prev_yes_state;
             struct regmatch_state *prev_curlyx;
             struct regmatch_state *prev_eval;
-            REGEXP      *prev_rex;
-            CHECKPOINT  cp;     /* remember current savestack indexes */
-            CHECKPOINT  lastcp;
+            REGEXP	*prev_rex;
+            CHECKPOINT	cp;	/* remember current savestack indexes */
+            CHECKPOINT	lastcp;
             U32         close_paren; /* which close bracket is our end (+1) */
-            regnode     *B;     /* the node following us  */
+            regnode	*B;	/* the node following us  */
             char        *prev_recurse_locinput;
         } eval;
 
@@ -848,7 +848,7 @@ typedef struct regmatch_state {
             /* this first element must match u.yes */
             struct regmatch_state *prev_yes_state;
             I32 wanted;
-            I32 logical;        /* saved copy of 'logical' var */
+            I32 logical;	/* saved copy of 'logical' var */
             U8  count;          /* number of beginning positions */
             char *start;
             char *end;
@@ -874,26 +874,26 @@ typedef struct regmatch_state {
             /* this first element must match u.yes */
             struct regmatch_state *prev_yes_state;
             struct regmatch_state *prev_curlyx; /* previous cur_curlyx */
-            regnode     *me;    /* the CURLYX node  */
-            regnode     *B;     /* the B node in /A*B/  */
-            CHECKPOINT  cp;     /* remember current savestack index */
-            bool        minmod;
-            int         parenfloor;/* how far back to strip paren data */
+            regnode	*me;	/* the CURLYX node  */
+            regnode	*B;	/* the B node in /A*B/  */
+            CHECKPOINT	cp;	/* remember current savestack index */
+            bool	minmod;
+            int		parenfloor;/* how far back to strip paren data */
 
             /* these two are modified by WHILEM */
-            int         count;  /* how many instances of A we've matched */
-            char        *lastloc;/* where previous A matched (0-len detect) */
+            int		count;	/* how many instances of A we've matched */
+            char	*lastloc;/* where previous A matched (0-len detect) */
         } curlyx;
 
         struct {
             /* this first element must match u.yes */
             struct regmatch_state *prev_yes_state;
             struct regmatch_state *save_curlyx;
-            CHECKPOINT  cp;     /* remember current savestack indexes */
-            CHECKPOINT  lastcp;
-            char        *save_lastloc;  /* previous curlyx.lastloc */
-            I32         cache_offset;
-            I32         cache_mask;
+            CHECKPOINT	cp;	/* remember current savestack indexes */
+            CHECKPOINT	lastcp;
+            char	*save_lastloc;	/* previous curlyx.lastloc */
+            I32		cache_offset;
+            I32		cache_mask;
         } whilem;
 
         struct {
@@ -902,11 +902,11 @@ typedef struct regmatch_state {
             CHECKPOINT cp;
             U32 lastparen;
             U32 lastcloseparen;
-            I32 alen;           /* length of first-matched A string */
+            I32 alen;		/* length of first-matched A string */
             I32 count;
             bool minmod;
-            regnode *A, *B;     /* the nodes corresponding to /A*B/  */
-            regnode *me;        /* the curlym node */
+            regnode *A, *B;	/* the nodes corresponding to /A*B/  */
+            regnode *me;	/* the curlym node */
             struct next_matchable_info Binfo;
         } curlym;
 
@@ -915,11 +915,11 @@ typedef struct regmatch_state {
             CHECKPOINT cp;
             U32 lastparen;
             U32 lastcloseparen;
-            char *maxpos;       /* highest possible point in string to match */
-            char *oldloc;       /* the previous locinput */
+            char *maxpos;	/* highest possible point in string to match */
+            char *oldloc;	/* the previous locinput */
             int count;
-            int min, max;       /* {m,n} */
-            regnode *A, *B;     /* the nodes corresponding to /A*B/  */
+            int min, max;	/* {m,n} */
+            regnode *A, *B;	/* the nodes corresponding to /A*B/  */
             struct next_matchable_info Binfo;
         } curly; /* and CURLYN/PLUS/STAR */
 
diff --git a/unicode_constants.h b/unicode_constants.h
index 1b295e0dda..0f176f279a 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -103,7 +103,7 @@ bytes.
 #      define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\xAB\xBB"
 #   endif
 
-#   ifdef PERL_IN_REGCOMP_C
+#   ifdef PERL_IN_REGCOMP_ANY
 #     define MAX_PRINT_A  0x7E   /* The max code point that isPRINT_A */
 #   endif
 #endif	/* ASCII/Latin1 */
@@ -164,7 +164,7 @@ bytes.
 #      define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\x8A\x8B"
 #   endif
 
-#   ifdef PERL_IN_REGCOMP_C
+#   ifdef PERL_IN_REGCOMP_ANY
 #     define MAX_PRINT_A  0xF9   /* The max code point that isPRINT_A */
 #   endif
 #endif	/* EBCDIC 1047 */
@@ -225,13 +225,13 @@ bytes.
 #      define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\x8A\x8B"
 #   endif
 
-#   ifdef PERL_IN_REGCOMP_C
+#   ifdef PERL_IN_REGCOMP_ANY
 #     define MAX_PRINT_A  0xF9   /* The max code point that isPRINT_A */
 #   endif
 #endif	/* EBCDIC 037 */
 
 /* The number of code points not matching \pC */
-#ifdef PERL_IN_REGCOMP_C
+#ifdef PERL_IN_REGCOMP_ANY
 #  define NON_OTHER_COUNT  149016
 #endif
 
diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template
index ba0d6ab6d3..e8e1c685f6 100644
--- a/vms/descrip_mms.template
+++ b/vms/descrip_mms.template
@@ -211,25 +211,27 @@ FULLLIBS2 = $(LIBS2)|$(THRLIBS1)|$(THRLIBS2)
 
 c0 = $(MALLOC_C) av.c builtin.c caretx.c deb.c doio.c doop.c dquote.c dump.c globals.c gv.c hv.c mro_core.c
 c1 = mg.c locale.c mathoms.c miniperlmain.c numeric.c op.c pad.c peep.c perl.c perlio.c
-c2 = perly.c pp.c pp_ctl.c pp_hot.c pp_pack.c pp_sort.c pp_sys.c regcomp.c regexec.c reentr.c
-c3 = run.c scope.c sv.c taint.c time64.c toke.c universal.c utf8.c util.c vms.c keywords.c
-c = $(c0) $(c1) $(c2) $(c3)
+c2 = perly.c pp.c pp_ctl.c pp_hot.c pp_pack.c pp_sort.c pp_sys.c regcomp.c regcomp_debug.c
+c3 = regcomp_invlist.c regcomp_study.c regcomp_trie.c regexec.c reentr.c
+c4 = run.c scope.c sv.c taint.c time64.c toke.c universal.c utf8.c util.c vms.c keywords.c
+c = $(c0) $(c1) $(c2) $(c3) $(c4)
 
 obj0 = perl$(O)
 obj1 = $(MALLOC_O) av$(O) builtin$(O) caretx$(O) deb$(O) doio$(O) doop$(O) dquote$(O) dump$(O) mro_core$(O) globals$(O) gv$(O) hv$(O)
 obj2 = keywords$(O) locale$(O) mathoms$(O) mg$(O) miniperlmain$(O) numeric$(O) op$(O) pad$(O) peep$(O) perlio$(O) 
 obj3 = perly$(O) pp$(O) pp_ctl$(O) pp_hot$(O) reentr$(O) pp_pack$(O) pp_sort$(O) pp_sys$(O) regcomp$(O) 
-obj4 = regexec$(O) run$(O) scope$(O) sv$(O) taint$(O) time64$(O) toke$(O) universal$(O) utf8$(O) util$(O) vms$(O)
+obj4 = regcomp_debug$(O) regcomp_invlist$(O) regcomp_study$(O) regcomp_trie$(O)
+obj5 = regexec$(O) run$(O) scope$(O) sv$(O) taint$(O) time64$(O) toke$(O) universal$(O) utf8$(O) util$(O) vms$(O)
 
-mini_obj = perlmini$(O) $(obj1) $(obj2) $(obj3) $(obj4)
-obj = $(obj0) $(obj1) $(obj2) $(obj3) $(obj4)
+mini_obj = perlmini$(O) $(obj1) $(obj2) $(obj3) $(obj4) $(obj5)
+obj = $(obj0) $(obj1) $(obj2) $(obj3) $(obj4) $(obj5)
 
 h0 = av.h config.h cop.h cv.h embed.h embedvar.h
 h1 = EXTERN.h form.h gv.h handy.h hv.h l1_char_class_tab.h INTERN.h intrpvar.h
 h2 = iperlsys.h keywords.h mydtrace.h mg.h mg_vtable.h nostdio.h op.h 
 h3 = op_reg_common.h opcode.h opnames.h overload.h pad.h parser.h patchlevel.h 
 h4 = perl.h perlapi.h perlio.h perlsdio.h perlvars.h perly.h
-h5 = pp.h pp_proto.h proto.h regcomp.h regexp.h regnodes.h scope.h
+h5 = pp.h pp_proto.h proto.h regcomp.h regcomp_internal.h regexp.h regnodes.h scope.h
 h6 = sv.h thread.h utf8.h util.h vmsish.h warnings.h xsub.h
 h = $(h0) $(h1) $(h2) $(h3) $(h4) $(h5) $(h6)
 
@@ -672,6 +674,14 @@ reentr$(O) : reentr.c $(h)
 	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
 regcomp$(O) : regcomp.c $(h)
 	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
+regcomp_debug$(O) : regcomp_debug.c $(h)
+	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
+regcomp_invlist$(O) : regcomp_invlist.c $(h)
+	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
+regcomp_study$(O) : regcomp_study.c $(h)
+	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
+regcomp_trie$(O) : regcomp_trie.c $(h)
+	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
 regexec$(O) : regexec.c $(h)
 	$(CC) $(CORECFLAGS) $(MMS$SOURCE)
 run$(O) : run.c $(h)
diff --git a/win32/GNUmakefile b/win32/GNUmakefile
index 6dd59e069e..7ba37332f3 100644
--- a/win32/GNUmakefile
+++ b/win32/GNUmakefile
@@ -941,6 +941,10 @@ NOOP		= @rem
 MICROCORE_SRC	=		\
 		..\toke.c	\
 		..\regcomp.c	\
+		..\regcomp_trie.c	\
+		..\regcomp_debug.c	\
+		..\regcomp_invlist.c	\
+		..\regcomp_study.c	\
 		..\regexec.c	\
 		..\op.c		\
 		..\sv.c		\
@@ -1017,6 +1021,7 @@ CORE_NOCFG_H	=		\
 		..\pp.h		\
 		..\proto.h	\
 		..\regcomp.h	\
+		..\regcomp_internal.h	\
 		..\regexp.h	\
 		..\scope.h	\
 		..\sv.h		\
@@ -1154,7 +1159,15 @@ ifeq ($(CCTYPE),)
 endif
 
 
-..\regcomp$(o) : ..\regnodes.h ..\regcharclass.h
+..\regcomp$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_debug$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_invlist$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_study$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_trie$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
 
 ..\regexec$(o) : ..\regnodes.h ..\regcharclass.h
 
@@ -1894,4 +1907,3 @@ nok: utils $(PERLEXE) $(PERLDLL) Extensions_nonxs Extensions
 
 nokfile: utils $(PERLEXE) $(PERLDLL) Extensions_nonxs Extensions
 	$(PERLEXE) ..\utils\perlbug -nok -s "(UNINSTALLED)" -F perl.nok
-
diff --git a/win32/Makefile b/win32/Makefile
index 4b347bde9f..b403767586 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -699,6 +699,10 @@ MICROCORE_SRC	=		\
 		..\pp_sys.c	\
 		..\reentr.c	\
 		..\regcomp.c	\
+		..\regcomp_trie.c	\
+		..\regcomp_debug.c	\
+		..\regcomp_invlist.c	\
+		..\regcomp_study.c	\
 		..\regexec.c	\
 		..\run.c	\
 		..\scope.c	\
@@ -747,6 +751,7 @@ CORE_NOCFG_H	=		\
 		..\pp.h		\
 		..\proto.h	\
 		..\regcomp.h	\
+		..\regcomp_internal.h	\
 		..\regexp.h	\
 		..\scope.h	\
 		..\sv.h		\
@@ -848,7 +853,15 @@ all : ..\git_version.h $(GLOBEXE) $(CONFIGPM) \
 
 regnodes : ..\regnodes.h
 
-..\regcomp$(o) : ..\regnodes.h ..\regcharclass.h
+..\regcomp$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_debug$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_invlist$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_study$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
+
+..\regcomp_trie$(o) : ..\regcomp.h ..\regcomp_internal.h ..\regnodes.h ..\regcharclass.h
 
 ..\regexec$(o) : ..\regnodes.h ..\regcharclass.h