10 files changed, 1341 insertions, 834 deletions
diff --git a/doop.c b/doop.c
index e0d63f13d1..3cb1354540 100644
--- a/doop.c
+++ b/doop.c
@@ -22,6 +22,7 @@
 #include "EXTERN.h"
 #define PERL_IN_DOOP_C
 #include "perl.h"
+#include "invlist_inline.h"
 
 #ifndef PERL_MICRO
 #include <signal.h>
@@ -297,328 +298,240 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
 
 
 /* Helper function for do_trans().
- * Handles utf8 cases(*) not involving the /c, /d, /s flags,
- * and where search and replacement charlists aren't identical.
- * (*) i.e. where the search or replacement charlists are utf8. sv may
- * or may not be utf8.
+ * Handles cases where an inversion map implementation is to be used and the
+ * search and replacement charlists are identical: so the string isn't
+ * modified, and only a count of modifiable chars is needed.
+ *
+ * Note that it doesn't handle /d nor /s, since these modify the string
+ * even if the replacement charlist is empty.
+ *
+ * sv may or may not be utf8.
  */
 
 STATIC Size_t
-S_do_trans_simple_utf8(pTHX_ SV * const sv)
+S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
 {
     U8 *s;
     U8 *send;
-    U8 *d;
-    U8 *start;
-    U8 *dstart, *dend;
     Size_t matches = 0;
-    const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
     STRLEN len;
-    SV* const  rv =
-#ifdef USE_ITHREADS
-		    PAD_SVl(cPADOP->op_padix);
-#else
-		    MUTABLE_SV(cSVOP->op_sv);
-#endif
-    HV* const  hv = MUTABLE_HV(SvRV(rv));
-    SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
-    const UV none = svp ? SvUV(*svp) : 0x7fffffff;
-    const UV extra = none + 1;
-    UV final = 0;
-    U8 hibit = 0;
+    SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
+    SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
+    SV* from_invlist = *from_invlist_ptr;
+    SV* to_invmap_sv = *to_invmap_ptr;
+    UV* map = (UV *) SvPVX(to_invmap_sv);
 
-    PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8;
+    PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP;
 
     s = (U8*)SvPV_nomg(sv, len);
-    if (!SvUTF8(sv)) {
-        hibit = ! is_utf8_invariant_string(s, len);
-        if (hibit) {
-            s = bytes_to_utf8(s, &len);
-	}
-    }
+
     send = s + len;
-    start = s;
 
-    svp = hv_fetchs(hv, "FINAL", FALSE);
-    if (svp)
-	final = SvUV(*svp);
+    while (s < send) {
+        UV from;
+        SSize_t i;
+        STRLEN s_len;
+
+        /* Get the code point of the next character in the string */
+        if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
+            from = *s;
+            s_len = 1;
+        }
+        else {
+            from = utf8_to_uvchr_buf(s, send, &s_len);
+            if (from == 0 && *s != '\0') {
+                _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
+            }
+        }
 
-    if (grows) {
-	/* d needs to be bigger than s, in case e.g. upgrading is required */
-	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
-	dend = d + len * 3;
-	dstart = d;
-    }
-    else {
-	dstart = d = s;
-	dend = d + len;
-    }
+        /* Look the code point up in the data structure for this tr/// to get
+         * what it maps to */
+        i = _invlist_search(from_invlist, from);
+        assert(i >= 0);
 
-    while (s < send) {
-	const UV uv = swash_fetch(rv, s, TRUE);
-	if (uv < none) {
-	    s += UTF8SKIP(s);
-	    matches++;
-	    d = uvchr_to_utf8(d, uv);
-	}
-	else if (uv == none) {
-	    const int i = UTF8SKIP(s);
-	    Move(s, d, i, U8);
-	    d += i;
-	    s += i;
-	}
-	else if (uv == extra) {
-	    s += UTF8SKIP(s);
-	    matches++;
-	    d = uvchr_to_utf8(d, final);
-	}
-	else
-	    s += UTF8SKIP(s);
-
-	if (d > dend) {
-	    const STRLEN clen = d - dstart;
-	    const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
-	    if (!grows)
-		Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
-	    Renew(dstart, nlen + UTF8_MAXBYTES, U8);
-	    d = dstart + clen;
-	    dend = dstart + nlen;
-	}
-    }
-    if (grows || hibit) {
-	sv_setpvn(sv, (char*)dstart, d - dstart);
-	Safefree(dstart);
-	if (grows && hibit)
-	    Safefree(start);
-    }
-    else {
-	*d = '\0';
-	SvCUR_set(sv, d - dstart);
+        if (map[i] != (UV) TR_UNLISTED) {
+            matches++;
+        }
+
+        s += s_len;
     }
-    SvSETMAGIC(sv);
-    SvUTF8_on(sv);
 
     return matches;
 }
 
 
 /* Helper function for do_trans().
- * Handles utf8 cases(*) where search and replacement charlists are
- * identical: so the string isn't modified, and only a count of modifiable
- * chars is needed.
- * Note that it doesn't handle /d or /s, since these modify the string
- * even if the replacement charlist is empty.
- * (*) i.e. where the search or replacement charlists are utf8. sv may
- * or may not be utf8.
+ * Handles cases where an inversion map implementation is to be used and the
+ * search and replacement charlists are either not identical or flags are
+ * present.
+ *
+ * sv may or may not be utf8.
  */
 
 STATIC Size_t
-S_do_trans_count_utf8(pTHX_ SV * const sv)
+S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
 {
-    const U8 *s;
-    const U8 *start = NULL;
-    const U8 *send;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    U8 *s0;
+    U8 *d0;
     Size_t matches = 0;
     STRLEN len;
-    SV* const  rv =
-#ifdef USE_ITHREADS
-		    PAD_SVl(cPADOP->op_padix);
-#else
-		    MUTABLE_SV(cSVOP->op_sv);
-#endif
-    HV* const hv = MUTABLE_HV(SvRV(rv));
-    SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
-    const UV none = svp ? SvUV(*svp) : 0x7fffffff;
-    const UV extra = none + 1;
-    U8 hibit = 0;
-
-    PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8;
+    SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
+    SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
+    SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE);
+    NV max_expansion = SvNV(*to_expansion_ptr);
+    SV* from_invlist = *from_invlist_ptr;
+    SV* to_invmap_sv = *to_invmap_ptr;
+    UV* map = (UV *) SvPVX(to_invmap_sv);
+    UV previous_map = TR_OOB;
+    const bool squash         = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
+    const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE);
+    bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS);
+    const UV* from_array = invlist_array(from_invlist);
+    UV final_map;
+    bool out_is_utf8 = SvUTF8(sv);
+    STRLEN s_len;
+
+    PERL_ARGS_ASSERT_DO_TRANS_INVMAP;
+
+    /* A third element in the array indicates that the replacement list was
+     * shorter than the search list, and this element contains the value to use
+     * for the items that don't correspond */
+    if (av_top_index(invmap) >= 3) {
+        SV** const final_map_ptr = av_fetch(invmap, 3, TRUE);
+        SV*  const final_map_sv = *final_map_ptr;
+        final_map = SvUV(final_map_sv);
+    }
 
-    s = (const U8*)SvPV_nomg_const(sv, len);
-    if (!SvUTF8(sv)) {
-        hibit = ! is_utf8_invariant_string(s, len);
-        if (hibit) {
-            start = s = bytes_to_utf8(s, &len);
-	}
+    /* If there is something in the transliteration that could force the input
+     * to be changed to UTF-8, we don't know if we can do it in place, so
+     * assume cannot */
+    if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) {
+        inplace = FALSE;
+        if (max_expansion < 2) {
+            max_expansion = 2;
+        }
     }
+
+    s = (U8*)SvPV_nomg(sv, len);
     send = s + len;
+    s0 = s;
 
-    while (s < send) {
-	const UV uv = swash_fetch(rv, s, TRUE);
-	if (uv < none || uv == extra)
-	    matches++;
-	s += UTF8SKIP(s);
+    /* We know by now if there are some possible input strings whose
+     * transliterations are longer than the input.  If none can, we just edit
+     * in place. */
+    if (inplace) {
+	d0 = d = s;
+    }
+    else {
+        /* Here, we can't edit in place.  We have no idea how much, if any,
+         * this particular input string will grow.  However, the compilation
+         * calculated the maximum expansion possible.  Use that to allocale
+         * based on the worst case scenario. */
+	Newx(d, len * max_expansion + 1, U8);
+	d0 = d;
     }
-    if (hibit)
-        Safefree(start);
 
-    return matches;
-}
+  restart:
 
+    /* Do the actual transliteration */
+    while (s < send) {
+        UV from;
+        UV to;
+        SSize_t i;
+        STRLEN s_len;
+
+        /* Get the code point of the next character in the string */
+        if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
+            from = *s;
+            s_len = 1;
+        }
+        else {
+            from = utf8_to_uvchr_buf(s, send, &s_len);
+            if (from == 0 && *s != '\0') {
+                _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
+            }
+        }
 
-/* Helper function for do_trans().
- * Handles utf8 cases(*) involving the /c, /d, /s flags,
- * and where search and replacement charlists aren't identical.
- * (*) i.e. where the search or replacement charlists are utf8. sv may
- * or may not be utf8.
- */
+        /* Look the code point up in the data structure for this tr/// to get
+         * what it maps to */
+        i = _invlist_search(from_invlist, from);
+        assert(i >= 0);
 
-STATIC Size_t
-S_do_trans_complex_utf8(pTHX_ SV * const sv)
-{
-    U8 *start, *send;
-    U8 *d;
-    Size_t matches = 0;
-    const bool squash   = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
-    const bool del      = cBOOL(PL_op->op_private & OPpTRANS_DELETE);
-    const bool grows    = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
-    SV* const  rv =
-#ifdef USE_ITHREADS
-		    PAD_SVl(cPADOP->op_padix);
-#else
-		    MUTABLE_SV(cSVOP->op_sv);
-#endif
-    HV * const hv = MUTABLE_HV(SvRV(rv));
-    SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
-    const UV none = svp ? SvUV(*svp) : 0x7fffffff;
-    const UV extra = none + 1;
-    UV final = 0;
-    bool havefinal = FALSE;
-    STRLEN len;
-    U8 *dstart, *dend;
-    U8 hibit = 0;
-    U8 *s = (U8*)SvPV_nomg(sv, len);
+        to = map[i];
 
-    PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8;
+        if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */
+            if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) {
+                *d++ = from;
+            }
+            else if (SvUTF8(sv)) {
+                Move(s, d, s_len, U8);
+                d += s_len;
+            }
+            else {  /* Convert to UTF-8 */
+                append_utf8_from_native_byte(*s, &d);
+            }
 
-    if (!SvUTF8(sv)) {
-        hibit = ! is_utf8_invariant_string(s, len);
-        if (hibit) {
-            s = bytes_to_utf8(s, &len);
+            previous_map = to;
+            s += s_len;
+            continue;
 	}
-    }
-    send = s + len;
-    start = s;
 
-    svp = hv_fetchs(hv, "FINAL", FALSE);
-    if (svp) {
-	final = SvUV(*svp);
-	havefinal = TRUE;
-    }
+        /* Everything else is counted as a match */
+        matches++;
 
-    if (grows) {
-	/* d needs to be bigger than s, in case e.g. upgrading is required */
-	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
-	dend = d + len * 3;
-	dstart = d;
-    }
-    else {
-	dstart = d = s;
-	dend = d + len;
-    }
+        if (to == (UV) TR_SPECIAL_HANDLING) {
+            if (delete_unfound) {
+                previous_map = to;
+                s += s_len;
+                continue;
+            }
 
-    if (squash) {
-	UV puv = 0xfeedface;
-	while (s < send) {
-	    UV uv = swash_fetch(rv, s, TRUE);
-
-	    if (d > dend) {
-		const STRLEN clen = d - dstart;
-		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
-		if (!grows)
-		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
-		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
-		d = dstart + clen;
-		dend = dstart + nlen;
-	    }
-	    if (uv < none) {
-		matches++;
-		s += UTF8SKIP(s);
-		if (uv != puv) {
-		    d = uvchr_to_utf8(d, uv);
-		    puv = uv;
-		}
-		continue;
-	    }
-	    else if (uv == none) {	/* "none" is unmapped character */
-		const int i = UTF8SKIP(s);
-		Move(s, d, i, U8);
-		d += i;
-		s += i;
-		puv = 0xfeedface;
-		continue;
-	    }
-	    else if (uv == extra && !del) {
-		matches++;
-		if (havefinal) {
-		    s += UTF8SKIP(s);
-		    if (puv != final) {
-			d = uvchr_to_utf8(d, final);
-			puv = final;
-		    }
-		}
-		else {
-		    STRLEN len;
-		    uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT);
-		    if (uv != puv) {
-			Move(s, d, len, U8);
-			d += len;
-			puv = uv;
-		    }
-		    s += len;
-		}
-		continue;
-	    }
-	    matches++;			/* "none+1" is delete character */
-	    s += UTF8SKIP(s);
-	}
-    }
-    else {
-	while (s < send) {
-	    const UV uv = swash_fetch(rv, s, TRUE);
-	    if (d > dend) {
-	        const STRLEN clen = d - dstart;
-		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
-		if (!grows)
-		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
-		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
-		d = dstart + clen;
-		dend = dstart + nlen;
-	    }
-	    if (uv < none) {
-		matches++;
-		s += UTF8SKIP(s);
-		d = uvchr_to_utf8(d, uv);
-		continue;
-	    }
-	    else if (uv == none) {	/* "none" is unmapped character */
-		const int i = UTF8SKIP(s);
-		Move(s, d, i, U8);
-		d += i;
-		s += i;
-		continue;
-	    }
-	    else if (uv == extra && !del) {
-		matches++;
-		s += UTF8SKIP(s);
-		d = uvchr_to_utf8(d, final);
-		continue;
-	    }
-	    matches++;			/* "none+1" is delete character */
-	    s += UTF8SKIP(s);
-	}
+            /* Use the final character in the replacement list */
+            to = final_map;
+        }
+        else { /* Here the input code point is to be remapped.  The actual
+                  value is offset from the base of this entry */
+            to += from - from_array[i];
+        }
+
+        /* If copying all occurrences, or this is the first occurrence, copy it
+         * to the output */
+        if (! squash || to != previous_map) {
+            if (out_is_utf8) {
+                d = uvchr_to_utf8(d, to);
+            }
+            else {
+                if (to >= 256) {    /* If need to convert to UTF-8, restart */
+                    out_is_utf8 = TRUE;
+                    s = s0;
+                    d = d0;
+                    matches = 0;
+                    goto restart;
+                }
+                *d++ = to;
+            }
+        }
+
+        previous_map = to;
+        s += s_len;
     }
-    if (grows || hibit) {
-	sv_setpvn(sv, (char*)dstart, d - dstart);
-	Safefree(dstart);
-	if (grows && hibit)
-	    Safefree(start);
+
+    s_len = 0;
+    s += s_len;
+    if (! inplace) {
+	sv_setpvn(sv, (char*)d0, d - d0);
     }
     else {
 	*d = '\0';
-	SvCUR_set(sv, d - dstart);
+	SvCUR_set(sv, d - d0);
+    }
+
+    if (! SvUTF8(sv) && out_is_utf8) {
+        SvUTF8_on(sv);
     }
-    SvUTF8_on(sv);
     SvSETMAGIC(sv);
 
     return matches;
@@ -627,7 +540,8 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
 
 /* Execute a tr//. sv is the value to be translated, while PL_op
  * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a
- * translation table or whose op_sv field contains a swash.
+ * translation table or whose op_sv field contains an inversion map.
+ *
  * Returns a count of number of characters translated
  */
 
@@ -636,31 +550,49 @@ Perl_do_trans(pTHX_ SV *sv)
 {
     STRLEN len;
     const U8 flags = PL_op->op_private;
-    const U8 hasutf = flags & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF);
+    bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP);
+    bool identical     = cBOOL(flags & OPpTRANS_IDENTICAL);
 
     PERL_ARGS_ASSERT_DO_TRANS;
 
-    if (SvREADONLY(sv) && !(flags & OPpTRANS_IDENTICAL)) {
+    if (SvREADONLY(sv) && ! identical) {
         Perl_croak_no_modify();
     }
     (void)SvPV_const(sv, len);
     if (!len)
 	return 0;
-    if (!(flags & OPpTRANS_IDENTICAL)) {
+    if (! identical) {
 	if (!SvPOKp(sv) || SvTHINKFIRST(sv))
 	    (void)SvPV_force_nomg(sv, len);
 	(void)SvPOK_only_UTF8(sv);
     }
 
-    /* If we use only OPpTRANS_IDENTICAL to bypass the READONLY check,
-     * we must also rely on it to choose the readonly strategy.
-     */
-    if (flags & OPpTRANS_IDENTICAL) {
-        return hasutf ? do_trans_count_utf8(sv) : do_trans_count(sv, (OPtrans_map*)cPVOP->op_pv);
-    } else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
-        return hasutf ? do_trans_complex_utf8(sv) : do_trans_complex(sv, (OPtrans_map*)cPVOP->op_pv);
-    } else {
-        return hasutf ? do_trans_simple_utf8(sv) : do_trans_simple(sv, (OPtrans_map*)cPVOP->op_pv);
+    if (use_utf8_fcns) {
+        SV* const map =
+#ifdef USE_ITHREADS
+                        PAD_SVl(cPADOP->op_padix);
+#else
+                        MUTABLE_SV(cSVOP->op_sv);
+#endif
+
+        if (identical) {
+            return do_trans_count_invmap(sv, (AV *) map);
+        }
+        else {
+            return do_trans_invmap(sv, (AV *) map);
+        }
+    }
+    else {
+        const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv;
+
+        if (identical) {
+            return do_trans_count(sv, map);
+        }
+        else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
+            return do_trans_complex(sv, map);
+        }
+        else
+            return do_trans_simple(sv, map);
     }
 }
 
diff --git a/dump.c b/dump.c
index 78c151da16..f03c3f6153 100644
--- a/dump.c
+++ b/dump.c
@@ -1305,13 +1305,13 @@ S_do_op_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const OP *o)
 
     case OP_TRANS:
     case OP_TRANSR:
-        if (o->op_private & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF)) {
-            /* utf8: table stored as a swash */
+        if (o->op_private & OPpTRANS_USE_SVOP) {
+            /* utf8: table stored as an inversion map */
 #ifndef USE_ITHREADS
-	/* with ITHREADS, swash is stored in the pad, and the right pad
+	/* with ITHREADS, it is stored in the pad, and the right pad
 	 * may not be active here, so skip */
             S_opdump_indent(aTHX_ o, level, bar, file,
-                            "SWASH = 0x%" UVxf "\n",
+                            "INVMAP = 0x%" UVxf "\n",
                             PTR2UV(MUTABLE_SV(cSVOPo->op_sv)));
 #endif
         }
@@ -2986,11 +2986,10 @@ Perl_op_class(pTHX_ const OP *o)
          * pointer to a table of shorts used to look up translations.
          * Under utf8, however, a simple table isn't practical; instead,
          * the OP is an SVOP (or, under threads, a PADOP),
-         * and the SV is a reference to a swash
-         * (i.e., an RV pointing to an HV).
+         * and the SV is an AV.
          */
 	return (!custom &&
-		   (o->op_private & (OPpTRANS_TO_UTF|OPpTRANS_FROM_UTF))
+		   (o->op_private & OPpTRANS_USE_SVOP)
 	       )
 #if  defined(USE_ITHREADS)
 		? OPclass_PADOP : OPclass_PVOP;
diff --git a/embed.fnc b/embed.fnc
index 87c5159b8c..76ec0c0909 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1870,7 +1870,7 @@ Apd	|void	|sv_vsetpvfn	|NN SV *const sv|NN const char *const pat|const STRLEN pa
 ApR	|NV	|str_to_version	|NN SV *sv
 EXpR	|SV*	|swash_init	|NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits|I32 none
 EXp	|UV	|swash_fetch	|NN SV *swash|NN const U8 *ptr|bool do_utf8
-#if defined(PERL_IN_REGCOMP_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
 EiR	|SV*	|add_cp_to_invlist	|NULLOK SV* invlist|const UV cp
 Ei	|void	|invlist_extend    |NN SV* const invlist|const UV len
 Ei	|void	|invlist_set_len|NN SV* const invlist|const UV len|const bool offset
@@ -1922,7 +1922,8 @@ EpX	|SV*	|invlist_clone	|NN SV* const invlist|NULLOK SV* newlist
 #endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	\
  || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		\
- || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)
+ || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		\
+ || defined(PERL_IN_DOOP_C)
 EiRT	|UV*	|invlist_array	|NN SV* const invlist
 EiRT	|bool	|is_invlist	|NULLOK SV* const invlist
 EiRT	|bool*	|get_invlist_offset_addr|NN SV* invlist
@@ -2308,9 +2309,8 @@ p	|void	|init_constants
 SR	|Size_t	|do_trans_simple	|NN SV * const sv|NN const OPtrans_map * const tbl
 SR	|Size_t	|do_trans_count		|NN SV * const sv|NN const OPtrans_map * const tbl
 SR	|Size_t	|do_trans_complex	|NN SV * const sv|NN const OPtrans_map * const tbl
-SR	|Size_t	|do_trans_simple_utf8	|NN SV * const sv
-SR	|Size_t	|do_trans_count_utf8	|NN SV * const sv
-SR	|Size_t	|do_trans_complex_utf8	|NN SV * const sv
+SR	|Size_t	|do_trans_invmap	|NN SV * const sv|NN AV * const map
+SR	|Size_t	|do_trans_count_invmap  |NN SV * const sv|NN AV * const map
 #endif
 
 #if defined(PERL_IN_GV_C)
diff --git a/embed.h b/embed.h
index 425ba304da..5a1c6fe190 100644
--- a/embed.h
+++ b/embed.h
@@ -1014,7 +1014,6 @@
 #  endif
 #  if defined(PERL_IN_REGCOMP_C)
 #define add_above_Latin1_folds(a,b,c)	S_add_above_Latin1_folds(aTHX_ a,b,c)
-#define add_cp_to_invlist(a,b)	S_add_cp_to_invlist(aTHX_ a,b)
 #define add_data		S_add_data
 #define add_multi_match(a,b,c)	S_add_multi_match(aTHX_ a,b,c)
 #define change_engine_size(a,b)	S_change_engine_size(aTHX_ a,b)
@@ -1024,20 +1023,13 @@
 #define edit_distance		S_edit_distance
 #define get_ANYOFM_contents(a)	S_get_ANYOFM_contents(aTHX_ a)
 #define get_ANYOF_cp_list_for_ssc(a,b)	S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
-#define get_invlist_iter_addr	S_get_invlist_iter_addr
 #define grok_bslash_N(a,b,c,d,e,f,g)	S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
 #define handle_named_backref(a,b,c,d)	S_handle_named_backref(aTHX_ a,b,c,d)
 #define handle_possible_posix(a,b,c,d,e)	S_handle_possible_posix(aTHX_ a,b,c,d,e)
 #define handle_regex_sets(a,b,c,d,e)	S_handle_regex_sets(aTHX_ a,b,c,d,e)
 #define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j)	Perl_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
 #define invlist_contents(a,b)	S_invlist_contents(aTHX_ a,b)
-#define invlist_extend(a,b)	S_invlist_extend(aTHX_ a,b)
-#define invlist_highest		S_invlist_highest
 #define invlist_is_iterating	S_invlist_is_iterating
-#define invlist_iterfinish	S_invlist_iterfinish
-#define invlist_iterinit	S_invlist_iterinit
-#define invlist_iternext	S_invlist_iternext
-#define invlist_set_len(a,b,c)	S_invlist_set_len(aTHX_ a,b,c)
 #define is_ssc_worth_it		S_is_ssc_worth_it
 #define join_exact(a,b,c,d,e,f,g)	S_join_exact(aTHX_ a,b,c,d,e,f,g)
 #define make_exactf_invlist(a,b)	S_make_exactf_invlist(aTHX_ a,b)
@@ -1083,6 +1075,16 @@
 #  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
 #define _invlist_dump(a,b,c,d)	Perl__invlist_dump(aTHX_ a,b,c,d)
 #  endif
+#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
+#define add_cp_to_invlist(a,b)	S_add_cp_to_invlist(aTHX_ a,b)
+#define get_invlist_iter_addr	S_get_invlist_iter_addr
+#define invlist_extend(a,b)	S_invlist_extend(aTHX_ a,b)
+#define invlist_highest		S_invlist_highest
+#define invlist_iterfinish	S_invlist_iterfinish
+#define invlist_iterinit	S_invlist_iterinit
+#define invlist_iternext	S_invlist_iternext
+#define invlist_set_len(a,b,c)	S_invlist_set_len(aTHX_ a,b,c)
+#  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
 #define _invlistEQ(a,b,c)	Perl__invlistEQ(aTHX_ a,b,c)
 #define _new_invlist_C_array(a)	Perl__new_invlist_C_array(aTHX_ a)
@@ -1094,7 +1096,7 @@
 #endif
 #define regprop(a,b,c,d,e)	Perl_regprop(aTHX_ a,b,c,d,e)
 #  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)
+#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_DOOP_C)
 #define _invlist_contains_cp	S__invlist_contains_cp
 #define _invlist_len		S__invlist_len
 #define _invlist_search		Perl__invlist_search
@@ -1603,11 +1605,10 @@
 #  endif
 #  if defined(PERL_IN_DOOP_C)
 #define do_trans_complex(a,b)	S_do_trans_complex(aTHX_ a,b)
-#define do_trans_complex_utf8(a)	S_do_trans_complex_utf8(aTHX_ a)
 #define do_trans_count(a,b)	S_do_trans_count(aTHX_ a,b)
-#define do_trans_count_utf8(a)	S_do_trans_count_utf8(aTHX_ a)
+#define do_trans_count_invmap(a,b)	S_do_trans_count_invmap(aTHX_ a,b)
+#define do_trans_invmap(a,b)	S_do_trans_invmap(aTHX_ a,b)
 #define do_trans_simple(a,b)	S_do_trans_simple(aTHX_ a,b)
-#define do_trans_simple_utf8(a)	S_do_trans_simple_utf8(aTHX_ a)
 #  endif
 #  if defined(PERL_IN_DUMP_C)
 #define deb_curcv(a)		S_deb_curcv(aTHX_ a)
diff --git a/invlist_inline.h b/invlist_inline.h
index 76d6dda998..33f8aee385 100644
--- a/invlist_inline.h
+++ b/invlist_inline.h
@@ -14,7 +14,8 @@
  || defined(PERL_IN_REGEXEC_C)          \
  || defined(PERL_IN_TOKE_C)             \
  || defined(PERL_IN_PP_C)               \
- || defined(PERL_IN_OP_C)
+ || defined(PERL_IN_OP_C)               \
+ || defined(PERL_IN_DOOP_C)
 
 /* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
  * etc */
@@ -92,7 +93,7 @@ S_invlist_array(SV* const invlist)
 }
 
 #endif
-#if defined(PERL_IN_REGCOMP_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
 
 PERL_STATIC_INLINE void
 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm
index 88555ffbe3..1ae4619d5d 100644
--- a/lib/B/Deparse.pm
+++ b/lib/B/Deparse.pm
@@ -279,6 +279,8 @@ BEGIN { for (qw[ const stringify rv2sv list glob pushmark null aelem
 # _pessimise_walk(): recursively walk the optree of a sub,
 # possibly undoing optimisations along the way.
 
+sub DEBUG { 0 }
+
 sub _pessimise_walk {
     my ($self, $startop) = @_;
 
@@ -5714,100 +5716,81 @@ sub tr_chr {
     }
 }
 
-# XXX This doesn't yet handle all cases correctly either
+sub tr_invmap {
+    my ($invlist_ref, $map_ref) = @_;
 
-sub tr_decode_utf8 {
-    my($swash_hv, $flags) = @_;
-    my %swash = $swash_hv->ARRAY;
-    my $final = undef;
-    $final = $swash{'FINAL'}->IV if exists $swash{'FINAL'};
-    my $none = $swash{"NONE"}->IV;
-    my $extra = $none + 1;
-    my(@from, @delfrom, @to);
-    my $line;
-    foreach $line (split /\n/, $swash{'LIST'}->PV) {
-	my($min, $max, $result) = split(/\t/, $line);
-	$min = hex $min;
-	if (length $max) {
-	    $max = hex $max;
-	} else {
-	    $max = $min;
-	}
-	$result = hex $result;
-	if ($result == $extra) {
-	    push @delfrom, [$min, $max];
-	} else {
-	    push @from, [$min, $max];
-	    push @to, [$result, $result + $max - $min];
-	}
-    }
-    for my $i (0 .. $#from) {
-	if ($from[$i][0] == ord '-') {
-	    unshift @from, splice(@from, $i, 1);
-	    unshift @to, splice(@to, $i, 1);
-	    last;
-	} elsif ($from[$i][1] == ord '-') {
-	    $from[$i][1]--;
-	    $to[$i][1]--;
-	    unshift @from, ord '-';
-	    unshift @to, ord '-';
-	    last;
-	}
-    }
-    for my $i (0 .. $#delfrom) {
-	if ($delfrom[$i][0] == ord '-') {
-	    push @delfrom, splice(@delfrom, $i, 1);
-	    last;
-	} elsif ($delfrom[$i][1] == ord '-') {
-	    $delfrom[$i][1]--;
-	    push @delfrom, ord '-';
-	    last;
-	}
+    my $infinity = ~0 >> 1;     # IV_MAX
+    my $from = "";
+    my $to = "";
+
+    for my $i (0.. @$invlist_ref - 1) {
+        my $this_from = $invlist_ref->[$i];
+        my $map = $map_ref->[$i];
+        my $upper = ($i < @$invlist_ref - 1)
+                     ? $invlist_ref->[$i+1]
+                     : $infinity;
+        my $range = $upper - $this_from - 1;
+        if (DEBUG) {
+            print STDERR "i=$i, from=$this_from, upper=$upper, range=$range\n";
+        }
+        next if $map == ~0;
+        next if $map == ~0 - 1;
+        $from .= tr_chr($this_from);
+        $to .= tr_chr($map);
+        next if $range == 0;    # Single code point
+        if ($range == 1) {      # Adjacent code points
+            $from .= tr_chr($this_from + 1);
+            $to   .= tr_chr($map + 1);
+        }
+        elsif ($upper != $infinity) {
+            $from .= "-" . tr_chr($this_from + $range);
+            $to   .= "-" . tr_chr($map + $range);
+        }
+        else {
+            $from .= "-INFTY";
+            $to   .= "-INFTY";
+        }
     }
-    if (defined $final and $to[$#to][1] != $final) {
-	push @to, [$final, $final];
+
+    return ($from, $to);
+}
+
+sub tr_decode_utf8 {
+    my($tr_av, $flags) = @_;
+    printf STDERR "flags=0x%x\n", $flags if DEBUG;
+    my $invlist = $tr_av->ARRAYelt(0);
+    my @invlist = unpack("J*", $invlist->PV);
+    my @map = unpack("J*", $tr_av->ARRAYelt(1)->PV);
+
+    if (DEBUG) {
+        for my $i (0 .. @invlist - 1) {
+            printf STDERR "[%d]\t%x\t", $i, $invlist[$i];
+            my $map = $map[$i];
+            if ($map == ~0) {
+                print STDERR "TR_UNMAPPED\n";
+            }
+            elsif ($map == ~0 - 1) {
+                print STDERR "TR_SPECIAL\n";
+            }
+            else {
+                printf STDERR "%x\n", $map;
+            }
+        }
     }
-    push @from, @delfrom;
+
+    my ($from, $to) = tr_invmap(\@invlist, \@map);
+
     if ($flags & OPpTRANS_COMPLEMENT) {
-	my @newfrom;
-	my $next = 0;
-	for my $i (0 .. $#from) {
-	    push @newfrom, [$next, $from[$i][0] - 1];
-	    $next = $from[$i][1] + 1;
-	}
-	@from = ();
-	for my $range (@newfrom) {
-	    if ($range->[0] <= $range->[1]) {
-		push @from, $range;
-	    }
-	}
-    }
-    my($from, $to, $diff);
-    for my $chunk (@from) {
-	$diff = $chunk->[1] - $chunk->[0];
-	if ($diff > 1) {
-	    $from .= tr_chr($chunk->[0]) . "-" . tr_chr($chunk->[1]);
-	} elsif ($diff == 1) {
-	    $from .= tr_chr($chunk->[0]) . tr_chr($chunk->[1]);
-	} else {
-	    $from .= tr_chr($chunk->[0]);
-	}
+        shift @map;
+        pop @invlist;
+        my $throw_away;
+        ($from, $throw_away) = tr_invmap(\@invlist, \@map);
     }
-    for my $chunk (@to) {
-	$diff = $chunk->[1] - $chunk->[0];
-	if ($diff > 1) {
-	    $to .= tr_chr($chunk->[0]) . "-" . tr_chr($chunk->[1]);
-	} elsif ($diff == 1) {
-	    $to .= tr_chr($chunk->[0]) . tr_chr($chunk->[1]);
-	} else {
-	    $to .= tr_chr($chunk->[0]);
-	}
+
+    if (DEBUG) {
+        print STDERR "Returning ", escape_str($from), "/",
+                                   escape_str($to), "\n";
     }
-    #$final = sprintf("%04x", $final) if defined $final;
-    #$none = sprintf("%04x", $none) if defined $none;
-    #$extra = sprintf("%04x", $extra) if defined $extra;
-    #print STDERR "final: $final\n none: $none\nextra: $extra\n";
-    #print STDERR $swash{'LIST'}->PV;
     return (escape_str($from), escape_str($to));
 }
 
@@ -5821,9 +5804,9 @@ sub pp_trans {
 	($from, $to) = tr_decode_byte($op->pv, $priv_flags);
     } elsif ($class eq "PADOP") {
 	($from, $to)
-	  = tr_decode_utf8($self->padval($op->padix)->RV, $priv_flags);
+	  = tr_decode_utf8($self->padval($op->padix), $priv_flags);
     } else { # class($op) eq "SVOP"
-	($from, $to) = tr_decode_utf8($op->sv->RV, $priv_flags);
+	($from, $to) = tr_decode_utf8($op->sv, $priv_flags);
     }
     my $flags = "";
     $flags .= "c" if $priv_flags & OPpTRANS_COMPLEMENT;
diff --git a/op.c b/op.c
index 12ee52a453..91eb50aee9 100644
--- a/op.c
+++ b/op.c
@@ -1059,7 +1059,7 @@ Perl_op_clear(pTHX_ OP *o)
     case OP_TRANS:
     case OP_TRANSR:
 	if (   (o->op_type == OP_TRANS || o->op_type == OP_TRANSR)
-            && (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)))
+            && (o->op_private & OPpTRANS_USE_SVOP))
         {
 #ifdef USE_ITHREADS
 	    if (cPADOPo->op_padix > 0) {
@@ -6784,8 +6784,8 @@ static int uvcompare(const void *a, const void *b)
  *   OPpTRANS_SQUASH
  *   OPpTRANS_DELETE
  * flags as appropriate; this function may add
- *   OPpTRANS_FROM_UTF
- *   OPpTRANS_TO_UTF
+ *   OPpTRANS_USE_SVOP
+ *   OPpTRANS_CAN_FORCE_UTF8
  *   OPpTRANS_IDENTICAL
  *   OPpTRANS_GROWS
  * flags
@@ -6794,416 +6794,1028 @@ static int uvcompare(const void *a, const void *b)
 static OP *
 S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 {
+    /* This function compiles a tr///, from data gathered from toke.c, into a
+     * form suitable for use by do_trans() in doop.c at runtime.
+     *
+     * It first normalizes the data, while discarding extraneous inputs; then
+     * writes out the compiled data.  The normalization allows for complete
+     * analysis, and avoids some false negatives and positives earlier versions
+     * of this code had.
+     *
+     * The normalization form is an inversion map (described below in detail).
+     * This is essentially the compiled form for tr///'s that require UTF-8,
+     * and its easy to use it to write the 257-byte table for tr///'s that
+     * don't need UTF-8.  That table is identical to what's been in use for
+     * many perl versions, except that it doesn't handle some edge cases that
+     * it used to, involving code points above 255.  The UTF-8 form now handles
+     * these.  (This could be changed with extra coding should it shown to be
+     * desirable.)
+     *
+     * If the complement (/c) option is specified, the lhs string (tstr) is
+     * parsed into an inversion list.  Complementing these is trivial.  Then a
+     * complemented tstr is built from that, and used thenceforth.  This hides
+     * the fact that it was complemented from almost all successive code.
+     *
+     * One of the important characteristics to know about the input is whether
+     * the transliteration may be done in place, or does a temporary need to be
+     * allocated, then copied.  If the replacement for every character in every
+     * possible string takes up no more bytes than the the character it
+     * replaces, then it can be edited in place.  Otherwise the replacement
+     * could "grow", depending on the strings being processed.  Some inputs
+     * won't grow, and might even shrink under /d, but some inputs could grow,
+     * so we have to assume any given one might grow.  On very long inputs, the
+     * temporary could eat up a lot of memory, so we want to avoid it if
+     * possible.  For non-UTF-8 inputs, everything is single-byte, so can be
+     * edited in place, unless there is something in the pattern that could
+     * force it into UTF-8.  The inversion map makes it feasible to determine
+     * this.  Previous versions of this code pretty much punted on determining
+     * if UTF-8 could be edited in place.  Now, this code is rigorous in making
+     * that determination.
+     *
+     * Another characteristic we need to know is whether the lhs and rhs are
+     * identical.  If so, and no other flags are present, the only effect of
+     * the tr/// is to count the characters present in the input that are
+     * mentioned in the lhs string.  The implementation of that is easier and
+     * runs faster than the more general case.  Normalizing here allows for
+     * accurate determination of this.  Previously there were false negatives
+     * possible.
+     *
+     * Instead of 'transliterated', the comments here use 'unmapped' for the
+     * characters that are left unchanged by the operation; otherwise they are
+     * 'mapped'
+     *
+     * The lhs of the tr/// is here referred to as the t side.
+     * The rhs of the tr/// is here referred to as the r side.
+     */
+
     SV * const tstr = ((SVOP*)expr)->op_sv;
     SV * const rstr = ((SVOP*)repl)->op_sv;
     STRLEN tlen;
     STRLEN rlen;
-    const U8 *t = (U8*)SvPV_const(tstr, tlen);
-    const U8 *r = (U8*)SvPV_const(rstr, rlen);
-    Size_t i, j;
-    bool grows = FALSE;
-    OPtrans_map *tbl;
-    SSize_t struct_size; /* malloced size of table struct */
-
+    const U8 * t0 = (U8*)SvPV_const(tstr, tlen);
+    const U8 * r0 = (U8*)SvPV_const(rstr, rlen);
+    const U8 * t = t0;
+    const U8 * r = r0;
+    Size_t t_count = 0, r_count = 0;  /* Number of characters in search and
+                                         replacement lists */
+
+    /* khw thinks some of the private flags for this op are quaintly named.
+     * OPpTRANS_GROWS for example is TRUE if the replacement for some lhs
+     * character when represented in UTF-8 is longer than the original
+     * character's UTF-8 representation */
     const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT);
     const bool squash     = cBOOL(o->op_private & OPpTRANS_SQUASH);
     const bool del        = cBOOL(o->op_private & OPpTRANS_DELETE);
-    SV* swash;
+
+    /* Set to true if there is some character < 256 in the lhs that maps to >
+     * 255.  If so, a non-UTF-8 match string can be forced into requiring to be
+     * in UTF-8 by a tr/// operation. */
+    bool can_force_utf8 = FALSE;
+
+    /* What is the maximum expansion factor in UTF-8 transliterations.  If a
+     * 2-byte UTF-8 encoded character is to be replaced by a 3-byte one, its
+     * expansion factor is 1.5.  This number is used at runtime to calculate
+     * how much space to allocate for non-inplace transliterations.  Without
+     * this number, the worst case is 14, which is extremely unlikely to happen
+     * in real life, and would require significant memory overhead. */
+    NV max_expansion = 1.;
+
+    SSize_t t_range_count, r_range_count, min_range_count;
+    UV* t_array;
+    SV* t_invlist;
+    UV* r_map;
+    UV r_cp, t_cp;
+    IV t_cp_end = -1;
+    UV r_cp_end;
+    Size_t len;
+    AV* invmap;
+    UV final_map = TR_UNLISTED;    /* The final character in the replacement
+                                      list, updated as we go along.  Initialize
+                                      to something illegal */
+
+    bool rstr_utf8 = cBOOL(SvUTF8(rstr));
+    bool tstr_utf8 = cBOOL(SvUTF8(tstr));
+
+    const U8* tend = t + tlen;
+    const U8* rend = r + rlen;
+
+    SV * inverted_tstr = NULL;
+
+    Size_t i;
+    unsigned int pass2;
+
+    /* This routine implements detection of a transliteration having a longer
+     * UTF-8 representation than its source, by partitioning all the possible
+     * code points of the platform into equivalence classes of the same UTF-8
+     * byte length in the first pass.  As it constructs the mappings, it carves
+     * these up into smaller chunks, but doesn't merge any together.  This
+     * makes it easy to find the instances it's looking for.  A second pass is
+     * done after this has been determined which merges things together to
+     * shrink the table for runtime.  For ASCII platforms, the table is
+     * trivial, given below, and uses the fundamental characteristics of UTF-8
+     * to construct the values.  For EBCDIC, it isn't so, and we rely on a
+     * table constructed by the perl script that generates these kinds of
+     * things */
+#ifndef EBCDIC
+    UV PL_partition_by_byte_length[] = {
+        0,
+        0x80,
+        (32 * (1UL << (    UTF_ACCUMULATION_SHIFT))),
+        (32 * (1UL << (    UTF_ACCUMULATION_SHIFT))),
+        (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))),
+        ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))),
+        ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))),
+        ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT)))
+
+#  ifdef UV_IS_QUAD
+                                                    ,
+        (     (1UL << (6 * UTF_ACCUMULATION_SHIFT)))
+
+#  endif
+#endif
+    };
 
     PERL_ARGS_ASSERT_PMTRANS;
 
     PL_hints |= HINT_BLOCK_SCOPE;
 
-    if (SvUTF8(tstr))
-        o->op_private |= OPpTRANS_FROM_UTF;
+    /* If /c, the search list is sorted and complemented.  This is now done by
+     * creating an inversion list from it, and then trivially inverting that.
+     * The previous implementation used qsort, but creating the list
+     * automatically keeps it sorted as we go along */
+    if (complement) {
+        UV start, end;
+        SV * inverted_tlist = _new_invlist(tlen);
+        Size_t temp_len;
+
+        while (t < tend) {
+
+            /* Non-utf8 strings don't have ranges, so each character is listed
+             * out */
+            if (! tstr_utf8) {
+                inverted_tlist = add_cp_to_invlist(inverted_tlist, *t);
+                t++;
+            }
+            else {  /* But UTF-8 strings have been parsed in toke.c to have
+                 * ranges if appropriate. */
+                UV t_cp;
+                Size_t t_char_len;
+
+                /* Get the first character */
+                t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+                t += t_char_len;
+
+                /* If the next byte indicates that this wasn't the first
+                 * element of a range, the range is just this one */
+                if (t >= tend || *t != RANGE_INDICATOR) {
+                    inverted_tlist = add_cp_to_invlist(inverted_tlist, t_cp);
+                }
+                else { /* Otherwise, ignore the indicator byte, and get the
+                          final element, and add the whole range */
+                    t++;
+                    t_cp_end = valid_utf8_to_uvchr(t, &t_char_len);
+                    t += t_char_len;
+
+                    inverted_tlist = _add_range_to_invlist(inverted_tlist,
+                                                      t_cp, t_cp_end);
+                }
+            }
+        } /* End of parse through tstr */
+
+        /* The inversion list is done; now invert it */
+        _invlist_invert(inverted_tlist);
+
+        /* Now go through the inverted list and create a new tstr for the rest
+         * of the routine to use.  Since the UTF-8 version can have ranges, and
+         * can be much more compact than the non-UTF-8 version, we create the
+         * string in UTF-8 even if not necessary.  (This is just an intermediate
+         * value that gets thrown away anyway.) */
+        invlist_iterinit(inverted_tlist);
+        inverted_tstr = newSVpvs("");
+        while (invlist_iternext(inverted_tlist, &start, &end)) {
+            U8 temp[UTF8_MAXBYTES];
+            U8 * temp_end_pos;
+
+            /* IV_MAX keeps things from going out of bounds */
+            start = MIN(IV_MAX, start);
+            end   = MIN(IV_MAX, end);
+
+            temp_end_pos = uvchr_to_utf8(temp, start);
+            sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);
+
+            if (start != end) {
+                Perl_sv_catpvf(aTHX_ inverted_tstr, "%c", RANGE_INDICATOR);
+                temp_end_pos = uvchr_to_utf8(temp, end);
+                sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);
+            }
+        }
+
+        /* Set up so the remainder of the routine uses this complement, instead
+         * of the actual input */
+        t0 = t = (U8*)SvPV_const(inverted_tstr, temp_len);
+        tend = t0 + temp_len;
+        tstr_utf8 = TRUE;
+
+        SvREFCNT_dec_NN(inverted_tlist);
+    }
+
+    /* For non-/d, an empty rhs means to use the lhs */
+    if (rlen == 0 && ! del) {
+        r0 = t0;
+        rend = tend;
+        rstr_utf8  = tstr_utf8;
+    }
+
+    t_invlist = _new_invlist(1);
+
+    /* Parse the (potentially adjusted) input, creating the inversion map.
+     * This is done in two passes.  The first pass is to determine if the
+     * transliteration can be done in place.  The inversion map it creates
+     * could be used, but generally would be larger and slower to run than the
+     * output of the second pass, which starts with a more compact table and
+     * allows more ranges to be merged */
+    for (pass2 = 0; pass2 < 2; pass2++) {
 
-    if (SvUTF8(rstr))
-        o->op_private |= OPpTRANS_TO_UTF;
+        /* Initialize to a single range */
+        t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
 
-    if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
+        /* In the second pass, we just have the single range */
 
-        /* for utf8 translations, op_sv will be set to point to a swash
-         * containing codepoint ranges. This is done by first assembling
-         * a textual representation of the ranges in listsv then compiling
-         * it using swash_init(). For more details of the textual format,
-         * see L<perlunicode.pod/"User-Defined Character Properties"> .
+        if (pass2) {
+            len = 1;
+            t_array = invlist_array(t_invlist);
+        }
+        else {
+
+            /* But in the first pass, the lhs is partitioned such that the
+             * number of UTF-8 bytes required to represent a code point in each
+             * partition is the same as the number for any other code point in
+             * that partion.  We copy the pre-compiled partion. */
+            len = C_ARRAY_LENGTH(PL_partition_by_byte_length);
+            invlist_extend(t_invlist, len);
+            t_array = invlist_array(t_invlist);
+            Copy(PL_partition_by_byte_length, t_array, len, UV);
+            invlist_set_len(t_invlist,
+                            len,
+                            *(get_invlist_offset_addr(t_invlist)));
+            Newx(r_map, len + 1, UV);
+        }
+
+        /* And the mapping of each of the ranges is initialized.  Initially,
+         * everything is TR_UNLISTED. */
+        for (i = 0; i < len; i++) {
+            r_map[i] = TR_UNLISTED;
+        }
+
+        t = t0;
+        t_count = 0;
+        r = r0;
+        r_count = 0;
+        t_range_count = r_range_count = 0;
+
+        /* Now go through the search list constructing an inversion map.  The
+         * input is not necessarily in any particular order.  Making it an
+         * inversion map orders it, potentially simplifying, and makes it easy
+         * to deal with at run time.  This is the only place in core that
+         * generates an inversion map; if others were introduced, it might be
+         * better to create general purpose routines to handle them.
+         * (Inversion maps are created in perl in other places.)
+         *
+         * An inversion map consists of two parallel arrays.  One is
+         * essentially an inversion list: an ordered list of code points such
+         * that each element gives the first code point of a range of
+         * consecutive code points that map to the element in the other array
+         * that has the same index as this one (in other words, the
+         * corresponding element).  Thus the range extends up to (but not
+         * including) the code point given by the next higher element.  In a
+         * true inversion map, the corresponding element in the other array
+         * gives the mapping of the first code point in the range, with the
+         * understanding that the next higher code point in the inversion
+         * list's range will map to the next higher code point in the map.
+         *
+         * So if at element [i], let's say we have:
+         *
+         *     t_invlist  r_map
+         * [i]    A         a
+         *
+         * This means that A => a, B => b, C => c....  Let's say that the
+         * situation is such that:
+         *
+         * [i+1]  L        -1
+         *
+         * This means the sequence that started at [i] stops at K => k.  This
+         * illustrates that you need to look at the next element to find where
+         * a sequence stops.  Except, the highest element in the inversion list
+         * begins a range that is understood to extend to the platform's
+         * infinity.
+         *
+         * This routine modifies traditional inversion maps to reserve two
+         * mappings:
+         *
+         *  TR_UNLISTED (or -1) indicates that the no code point in the range
+         *      is listed in the tr/// searchlist.  At runtime, these are
+         *      always passed through unchanged.  In the inversion map, all
+         *      points in the range are mapped to -1, instead of increasing,
+         *      like the 'L' in the example above.
+         *
+         *      We start the parse with every code point mapped to this, and as
+         *      we parse and find ones that are listed in the search list, we
+         *      carve out ranges as we go along that override that.
+         *
+         *  TR_SPECIAL_HANDLING (or -2) indicates that every code point in the
+         *      range needs special handling.  Again, all code points in the
+         *      range are mapped to -2, instead of increasing.
+         *
+         *      Under /d this value means the code point should be deleted from
+         *      the transliteration when encountered.
+         *
+         *      Otherwise, it marks that every code point in the range is to
+         *      map to the final character in the replacement list.  This
+         *      happens only when the replacement list is shorter than the
+         *      search one, so there are things in the search list that have no
+         *      correspondence in the replacement list.  For example, in
+         *      tr/a-z/A/, 'A' is the final value, and the inversion map
+         *      generated for this would be like this:
+         *          \0  =>  -1
+         *          a   =>   A
+         *          b-z =>  -2
+         *          z+1 =>  -1
+         *      'A' appears once, then the remainder of the range maps to -2.
+         *      The use of -2 isn't strictly necessary, as an inversion map is
+         *      capable of representing this situation, but not nearly so
+         *      compactly, and this is actually quite commonly encountered.
+         *      Indeed, the original design of this code used a full inversion
+         *      map for this.  But things like
+         *          tr/\0-\x{FFFF}/A/
+         *      generated huge data structures, slowly, and the execution was
+         *      also slow.  So the current scheme was implemented.
+         *
+         *  So, if the next element in our example is:
+         *
+         * [i+2]  Q        q
+         *
+         * Then all of L, M, N, O, and P map to TR_UNLISTED.  If the next
+         * elements are
+         *
+         * [i+3]  R        z
+         * [i+4]  S       TR_UNLISTED
+         *
+         * Then Q => q; R => z; and S => TR_UNLISTED.  If [i+4] (the 'S') is
+         * the final element in the arrays, every code point from S to infinity
+         * maps to TR_UNLISTED.
+         *
          */
+                           /* Finish up range started in what otherwise would
+                            * have been the final iteration */
+        while (t < tend || t_range_count > 0) {
+            bool adjacent_to_range_above = FALSE;
+            bool adjacent_to_range_below = FALSE;
+
+            bool merge_with_range_above = FALSE;
+            bool merge_with_range_below = FALSE;
+
+            SSize_t i, span, invmap_range_length_remaining;
+
+            /* If we are in the middle of processing a range in the 'target'
+             * side, the previous iteration has set us up.  Otherwise, look at
+             * the next character in the search list */
+            if (t_range_count <= 0) {
+                if (! tstr_utf8) {
+
+                    /* Here, not in the middle of a range, and not UTF-8.  The
+                     * next code point is the single byte where we're at */
+                    t_cp = *t;
+                    t_range_count = 1;
+                    t++;
+                }
+                else {
+                    Size_t t_char_len;
+
+                    /* Here, not in the middle of a range, and is UTF-8.  The
+                     * next code point is the next UTF-8 char in the input.  We
+                     * know the input is valid, because the toker constructed
+                     * it */
+                    t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+                    t += t_char_len;
+
+                    /* UTF-8 strings (only) have been parsed in toke.c to have
+                     * ranges.  See if the next byte indicates that this was
+                     * the first element of a range.  If so, get the final
+                     * element and calculate the range size.  If not, the range
+                     * size is 1 */
+                    if (t < tend && *t == RANGE_INDICATOR) {
+                        t++;
+                        t_range_count = valid_utf8_to_uvchr(t, &t_char_len)
+                                      - t_cp + 1;
+                        t += t_char_len;
+                    }
+                    else {
+                        t_range_count = 1;
+                    }
+                }
 
-	SV* const listsv = newSVpvs("# comment\n");
-	SV* transv = NULL;
-	const U8* tend = t + tlen;
-	const U8* rend = r + rlen;
-	STRLEN ulen;
-	UV tfirst = 1;
-	UV tlast = 0;
-	IV tdiff;
-	STRLEN tcount = 0;
-	UV rfirst = 1;
-	UV rlast = 0;
-	IV rdiff;
-	STRLEN rcount = 0;
-	IV diff;
-	I32 none = 0;
-	U32 max = 0;
-	I32 bits;
-	I32 havefinal = 0;
-	U32 final = 0;
-	const I32 from_utf  = o->op_private & OPpTRANS_FROM_UTF;
-	const I32 to_utf    = o->op_private & OPpTRANS_TO_UTF;
-	U8* tsave = NULL;
-	U8* rsave = NULL;
-	const U32 flags = UTF8_ALLOW_DEFAULT;
-
-	if (!from_utf) {
-	    STRLEN len = tlen;
-	    t = tsave = bytes_to_utf8(t, &len);
-	    tend = t + len;
-	}
-	if (!to_utf && rlen) {
-	    STRLEN len = rlen;
-	    r = rsave = bytes_to_utf8(r, &len);
-	    rend = r + len;
-	}
+                /* Count the total number of listed code points * */
+                t_count += t_range_count;
+            }
 
-/* There is a snag with this code on EBCDIC: scan_const() in toke.c has
- * encoded chars in native encoding which makes ranges in the EBCDIC 0..255
- * odd.  */
+            /* Similarly, get the next character in the replacement list */
+            if (r_range_count <= 0) {
+                if (r >= rend) {
 
-	if (complement) {
-            /* utf8 and /c:
-             * replace t/tlen/tend with a version that has the ranges
-             * complemented
-             */
-	    U8 tmpbuf[UTF8_MAXBYTES+1];
-	    UV *cp;
-	    UV nextmin = 0;
-	    Newx(cp, 2*tlen, UV);
-	    i = 0;
-	    transv = newSVpvs("");
-
-            /* convert search string into array of (start,end) range
-             * codepoint pairs stored in cp[]. Most "ranges" will start
-             * and end at the same char */
-	    while (t < tend) {
-		cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
-		t += ulen;
-                /* the toker converts X-Y into (X, RANGE_INDICATOR, Y) */
-		if (t < tend && *t == RANGE_INDICATOR) {
-		    t++;
-		    cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
-		    t += ulen;
-		}
-		else {
-		 cp[2*i+1] = cp[2*i];
-		}
-		i++;
-	    }
+                    /* But if we've exhausted the rhs, there is nothing to map
+                     * to, except the special handling one, and we make the
+                     * range the same size as the lhs one. */
+                    r_cp = TR_SPECIAL_HANDLING;
+                    r_range_count = t_range_count;
+                }
+                else {
+                    if (! rstr_utf8) {
+                        r_cp = *r;
+                        r_range_count = 1;
+                        r++;
+                    }
+                    else {
+                        Size_t r_char_len;
+
+                        r_cp = valid_utf8_to_uvchr(r, &r_char_len);
+                        r += r_char_len;
+                        if (r < rend && *r == RANGE_INDICATOR) {
+                            r++;
+                            r_range_count = valid_utf8_to_uvchr(r,
+                                                    &r_char_len) - r_cp + 1;
+                            r += r_char_len;
+                        }
+                        else {
+                            r_range_count = 1;
+                        }
+                    }
 
-            /* sort the ranges */
-	    qsort(cp, i, 2*sizeof(UV), uvcompare);
-
-            /* Create a utf8 string containing the complement of the
-             * codepoint ranges. For example if cp[] contains [A,B], [C,D],
-             * then transv will contain the equivalent of:
-             * join '', map chr, 0,     RANGE_INDICATOR, A - 1,
-             *                   B + 1, RANGE_INDICATOR, C - 1,
-             *                   D + 1, RANGE_INDICATOR, 0x7fffffff;
-             * A range of a single char skips the RANGE_INDICATOR and
-             * end cp.
-             */
-	    for (j = 0; j < i; j++) {
-		UV  val = cp[2*j];
-		diff = val - nextmin;
-		if (diff > 0) {
-		    t = uvchr_to_utf8(tmpbuf,nextmin);
-		    sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
-		    if (diff > 1) {
-			U8  range_mark = RANGE_INDICATOR;
-			t = uvchr_to_utf8(tmpbuf, val - 1);
-			sv_catpvn(transv, (char *)&range_mark, 1);
-			sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
-		    }
-	        }
-		val = cp[2*j+1];
-		if (val >= nextmin)
-		    nextmin = val + 1;
-	    }
+                    if (r_cp == TR_SPECIAL_HANDLING) {
+                        r_range_count = t_range_count;
+                    }
 
-	    t = uvchr_to_utf8(tmpbuf,nextmin);
-	    sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
-	    {
-		U8 range_mark = RANGE_INDICATOR;
-		sv_catpvn(transv, (char *)&range_mark, 1);
-	    }
-	    t = uvchr_to_utf8(tmpbuf, 0x7fffffff);
-	    sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
-	    t = (const U8*)SvPVX_const(transv);
-	    tlen = SvCUR(transv);
-	    tend = t + tlen;
-	    Safefree(cp);
-	}
-	else if (!rlen && !del) {
-	    r = t; rlen = tlen; rend = tend;
-	}
+                    /* This is the final character so far */
+                    final_map = r_cp + r_range_count - 1;
 
-	if (!squash) {
-		if ((!rlen && !del) || t == r ||
-		    (tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
-		{
-		    o->op_private |= OPpTRANS_IDENTICAL;
-		}
-	}
+                    r_count += r_range_count;
+                }
+            }
 
-        /* extract char ranges from t and r and append them to listsv */
-
-	while (t < tend || tfirst <= tlast) {
-	    /* see if we need more "t" chars */
-	    if (tfirst > tlast) {
-		tfirst = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
-		t += ulen;
-		if (t < tend && *t == RANGE_INDICATOR) {	/* illegal utf8 val indicates range */
-		    t++;
-		    tlast = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
-		    t += ulen;
-		}
-		else
-		    tlast = tfirst;
-	    }
+            /* Here, we have the next things ready in both sides.  They are
+             * potentially ranges.  We try to process as big a chunk as
+             * possible at once, but the lhs and rhs must be synchronized, so
+             * things like tr/A-Z/a-ij-z/ will need to be processed in 2 chunks
+             * */
+            min_range_count = MIN(t_range_count, r_range_count);
+
+            /* Search the inversion list for the entry that contains the input
+             * code point <cp>.  The inversion map was initialized to cover the
+             * entire range of possible inputs, so this should not fail.  So
+             * the return value is the index into the list's array of the range
+             * that contains <cp>, that is, 'i' such that array[i] <= cp <
+             * array[i+1] */
+            i = _invlist_search(t_invlist, t_cp);
+            assert(i >= 0);
+
+            /* Here, the data structure might look like:
+             *
+             * index    t   r     Meaning
+             * [i-1]    J   j   # J-L => j-l
+             * [i]      M  -1   # M => default; as do N, O, P, Q
+             * [i+1]    R   x   # R => x, S => x+1, T => x+2
+             * [i+2]    U   y   # U => y, V => y+1, ...
+             * ...
+             * [-1]     Z  -1   # Z => default; as do Z+1, ... infinity
+             *
+             * where 'x' and 'y' above are not to be taken literally.
+             *
+             * The maximum chunk we can handle in this loop iteration, is the
+             * smallest of the three components: the lhs 't_', the rhs 'r_',
+             * and the remainder of the range in element [i].  (In pass 1, that
+             * range will have everything in it be of the same class; we can't
+             * cross into another class.)  'min_range_count' already contains
+             * the smallest of the first two values.  The final one is
+             * irrelevant if the map is to the special indicator */
+
+            invmap_range_length_remaining = ((Size_t) i + 1 < len)
+                                            ? t_array[i+1] - t_cp
+                                            : IV_MAX - t_cp;
+            span = MAX(1, MIN(min_range_count, invmap_range_length_remaining));
+
+            /* The end point of this chunk is where we are, plus the span, but
+             * never larger than the platform's infinity */
+            t_cp_end = MIN(IV_MAX, t_cp + span - 1);
+
+            if (r_cp == TR_SPECIAL_HANDLING) {
+                r_cp_end = TR_SPECIAL_HANDLING;
+            }
+            else {
+                r_cp_end = MIN(IV_MAX, r_cp + span - 1);
+
+                /* If something on the lhs is below 256, and something on the
+                 * rhs is above, there is a potential mapping here across that
+                 * boundary.  Indeed the only way there isn't is if both sides
+                 * start at the same point.  That means they both cross at the
+                 * same time.  But otherwise one crosses before the other */
+                if (t_cp < 256 && r_cp_end > 255 && r_cp != t_cp) {
+                    can_force_utf8 = TRUE;
+                }
+            }
 
-	    /* now see if we need more "r" chars */
-	    if (rfirst > rlast) {
-		if (r < rend) {
-		    rfirst = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
-		    r += ulen;
-		    if (r < rend && *r == RANGE_INDICATOR) {	/* illegal utf8 val indicates range */
-			r++;
-			rlast = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
-			r += ulen;
-		    }
-		    else
-			rlast = rfirst;
-		}
-		else {
-		    if (!havefinal++)
-			final = rlast;
-		    rfirst = rlast = 0xffffffff;
-		}
-	    }
+            /* If a character appears in the search list more than once, the
+             * 2nd and succeeding occurrences are ignored, so only do this
+             * range if haven't already processed this character.  (The range
+             * has been set up so that all members in it will be of the same
+             * ilk) */
+            if (r_map[i] == TR_UNLISTED) {
+
+                /* This is the first definition for this chunk, hence is valid
+                 * and needs to be processed.  Here and in the comments below,
+                 * we use the above sample data.  The t_cp chunk must be any
+                 * contiguous subset of M, N, O, P, and/or Q.
+                 *
+                 * In the first pass, the t_invlist has been partitioned so
+                 * that all elements in any single range have the same number
+                 * of bytes in their UTF-8 representations.  And the r space is
+                 * either a single byte, or a range of strictly monotonically
+                 * increasing code points.  So the final element in the range
+                 * will be represented by no fewer bytes than the initial one.
+                 * That means that if the final code point in the t range has
+                 * at least as many bytes as the final code point in the r,
+                 * then all code points in the t range have at least as many
+                 * bytes as their corresponding r range element.  But if that's
+                 * not true, the transliteration of at least the final code
+                 * point grows in length.  As an example, suppose we had
+                 *      tr/\x{fff0}-\x{fff1}/\x{ffff}-\x{10000}/
+                 * The UTF-8 for all but 10000 occupies 3 bytes on ASCII
+                 * platforms.  We have deliberately set up the data structure
+                 * so that any range in the lhs gets split into chunks for
+                 * processing, such that every code point in a chunk has the
+                 * same number of UTF-8 bytes.  We only have to check the final
+                 * code point in the rhs against any code point in the lhs. */
+                if ( ! pass2
+                    && r_cp_end != TR_SPECIAL_HANDLING
+                    && UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end))
+                {
+                    NV ratio = UVCHR_SKIP(r_cp_end) / UVCHR_SKIP(t_cp);
 
-	    /* now see which range will peter out first, if either. */
-	    tdiff = tlast - tfirst;
-	    rdiff = rlast - rfirst;
-	    tcount += tdiff + 1;
-	    rcount += rdiff + 1;
+                    o->op_private |= OPpTRANS_GROWS;
 
-	    if (tdiff <= rdiff)
-		diff = tdiff;
-	    else
-		diff = rdiff;
+                    /* Now that we know it grows, we can keep track of the
+                     * largest ratio */
+                    if (ratio > max_expansion) {
+                        max_expansion = ratio;
+                    }
+                }
 
-	    if (rfirst == 0xffffffff) {
-		diff = tdiff;	/* oops, pretend rdiff is infinite */
-		if (diff > 0)
-		    Perl_sv_catpvf(aTHX_ listsv, "%04lx\t%04lx\tXXXX\n",
-				   (long)tfirst, (long)tlast);
-		else
-		    Perl_sv_catpvf(aTHX_ listsv, "%04lx\t\tXXXX\n", (long)tfirst);
-	    }
-	    else {
-		if (diff > 0)
-		    Perl_sv_catpvf(aTHX_ listsv, "%04lx\t%04lx\t%04lx\n",
-				   (long)tfirst, (long)(tfirst + diff),
-				   (long)rfirst);
-		else
-		    Perl_sv_catpvf(aTHX_ listsv, "%04lx\t\t%04lx\n",
-				   (long)tfirst, (long)rfirst);
-
-		if (rfirst + diff > max)
-		    max = rfirst + diff;
-		if (!grows)
-		    grows = (tfirst < rfirst &&
-			     UVCHR_SKIP(tfirst) < UVCHR_SKIP(rfirst + diff));
-		rfirst += diff + 1;
-	    }
-	    tfirst += diff + 1;
-	}
+                /* The very first range is marked as adjacent to the
+                 * non-existent range below it, as it causes things to "just
+                 * work" (TradeMark)
+                 *
+                 * If the lowest code point in this chunk is M, it adjoins the
+                 * J-L range */
+                if (t_cp == t_array[i]) {
+                    adjacent_to_range_below = TRUE;
+
+                    /* And if the map has the same offset from the beginning of
+                     * the range as does this new code point (or both are for
+                     * TR_SPECIAL_HANDLING), this chunk can be completely
+                     * merged with the range below.  EXCEPT, in the first pass,
+                     * we don't merge ranges whose UTF-8 byte representations
+                     * have different lengths, so that we can more easily
+                     * detect if a replacement is longer than the source, that
+                     * is if it 'grows'.  But in the 2nd pass, there's no
+                     * reason to not merge */
+                    if (   (i > 0 && (   pass2
+                                      || UVCHR_SKIP(t_array[i-1])
+                                                        == UVCHR_SKIP(t_cp)))
+                        && (   (   r_cp == TR_SPECIAL_HANDLING
+                                && r_map[i-1] == TR_SPECIAL_HANDLING)
+                            || (   r_cp != TR_SPECIAL_HANDLING
+                                && r_cp - r_map[i-1] == t_cp - t_array[i-1])))
+                    {
+                        merge_with_range_below = TRUE;
+                    }
+                }
 
-        /* compile listsv into a swash and attach to o */
+                /* Similarly, if the highest code point in this chunk is 'Q',
+                 * it adjoins the range above, and if the map is suitable, can
+                 * be merged with it */
+                if (    t_cp_end >= IV_MAX - 1
+                    || (   (Size_t) i + 1 < len
+                        && (Size_t) t_cp_end + 1 == t_array[i+1]))
+                {
+                    adjacent_to_range_above = TRUE;
+                    if ((Size_t) i + 1 < len)
+                    if (    (   pass2
+                             || UVCHR_SKIP(t_cp) == UVCHR_SKIP(t_array[i+1]))
+                        && (   (   r_cp == TR_SPECIAL_HANDLING
+                                && r_map[i+1] == (UV) TR_SPECIAL_HANDLING)
+                            || (   r_cp != TR_SPECIAL_HANDLING
+                                && r_cp_end == r_map[i+1] - 1)))
+                    {
+                        merge_with_range_above = TRUE;
+                    }
+                }
 
-	none = ++max;
-	if (del)
-	    ++max;
+                if (merge_with_range_below && merge_with_range_above) {
+
+                    /* Here the new chunk looks like M => m, ... Q => q; and
+                     * the range above is like R => r, ....  Thus, the [i-1]
+                     * and [i+1] ranges should be seamlessly melded so the
+                     * result looks like
+                     *
+                     * [i-1]    J   j   # J-T => j-t
+                     * [i]      U   y   # U => y, V => y+1, ...
+                     * ...
+                     * [-1]     Z  -1   # Z => default; as do Z+1, ... infinity
+                     */
+                    Move(t_array + i + 2, t_array + i, len - i - 2, UV);
+                    Move(r_map   + i + 2, r_map   + i, len - i - 2, UV);
+                    len -= 2;
+                    invlist_set_len(t_invlist,
+                                    len,
+                                    *(get_invlist_offset_addr(t_invlist)));
+                }
+                else if (merge_with_range_below) {
+
+                    /* Here the new chunk looks like M => m, .... But either
+                     * (or both) it doesn't extend all the way up through Q; or
+                     * the range above doesn't start with R => r. */
+                    if (! adjacent_to_range_above) {
+
+                        /* In the first case, let's say the new chunk extends
+                         * through O.  We then want:
+                         *
+                         * [i-1]    J   j   # J-O => j-o
+                         * [i]      P  -1   # P => -1, Q => -1
+                         * [i+1]    R   x   # R => x, S => x+1, T => x+2
+                         * [i+2]    U   y   # U => y, V => y+1, ...
+                         * ...
+                         * [-1]     Z  -1   # Z => default; as do Z+1, ...
+                         *                                            infinity
+                         */
+                        t_array[i] = t_cp_end + 1;
+                        r_map[i] = TR_UNLISTED;
+                    }
+                    else { /* Adjoins the range above, but can't merge with it
+                              (because 'x' is not the next map after q) */
+                        /*
+                         * [i-1]    J   j   # J-Q => j-q
+                         * [i]      R   x   # R => x, S => x+1, T => x+2
+                         * [i+1]    U   y   # U => y, V => y+1, ...
+                         * ...
+                         * [-1]     Z  -1   # Z => default; as do Z+1, ...
+                         *                                          infinity
+                         */
 
-	if (max > 0xffff)
-	    bits = 32;
-	else if (max > 0xff)
-	    bits = 16;
-	else
-	    bits = 8;
+                        Move(t_array + i + 1, t_array + i, len - i - 1, UV);
+                        Move(r_map + i + 1, r_map + i, len - i - 1, UV);
+                        len--;
+                        invlist_set_len(t_invlist, len,
+                                        *(get_invlist_offset_addr(t_invlist)));
+                    }
+                }
+                else if (merge_with_range_above) {
+
+                    /* Here the new chunk ends with Q => q, and the range above
+                     * must start with R => r, so the two can be merged. But
+                     * either (or both) the new chunk doesn't extend all the
+                     * way down to M; or the mapping of the final code point
+                     * range below isn't m */
+                    if (! adjacent_to_range_below) {
+
+                        /* In the first case, let's assume the new chunk starts
+                         * with P => p.  Then, because it's merge-able with the
+                         * range above, that range must be R => r.  We want:
+                         *
+                         * [i-1]    J   j   # J-L => j-l
+                         * [i]      M  -1   # M => -1, N => -1
+                         * [i+1]    P   p   # P-T => p-t
+                         * [i+2]    U   y   # U => y, V => y+1, ...
+                         * ...
+                         * [-1]     Z  -1   # Z => default; as do Z+1, ...
+                         *                                          infinity
+                         */
+                        t_array[i+1] = t_cp;
+                        r_map[i+1] = r_cp;
+                    }
+                    else { /* Adjoins the range below, but can't merge with it
+                            */
+                        /*
+                         * [i-1]    J   j   # J-L => j-l
+                         * [i]      M   x   # M-T => x-5 .. x+2
+                         * [i+1]    U   y   # U => y, V => y+1, ...
+                         * ...
+                         * [-1]     Z  -1   # Z => default; as do Z+1, ...
+                         *                                          infinity
+                         */
+                        Move(t_array + i + 1, t_array + i, len - i - 1, UV);
+                        Move(r_map   + i + 1, r_map   + i, len - i - 1, UV);
+                        len--;
+                        t_array[i] = t_cp;
+                        r_map[i] = r_cp;
+                        invlist_set_len(t_invlist, len,
+                                        *(get_invlist_offset_addr(t_invlist)));
+                    }
+                }
+                else if (adjacent_to_range_below && adjacent_to_range_above) {
+                    /* The new chunk completely fills the gap between the
+                     * ranges on either side, but can't merge with either of
+                     * them.
+                     *
+                     * [i-1]    J   j   # J-L => j-l
+                     * [i]      M   z   # M => z, N => z+1 ... Q => z+4
+                     * [i+1]    R   x   # R => x, S => x+1, T => x+2
+                     * [i+2]    U   y   # U => y, V => y+1, ...
+                     * ...
+                     * [-1]     Z  -1   # Z => default; as do Z+1, ... infinity
+                     */
+                    r_map[i] = r_cp;
+                }
+                else if (adjacent_to_range_below) {
+                    /* The new chunk adjoins the range below, but not the range
+                     * above, and can't merge.  Let's assume the chunk ends at
+                     * O.
+                     *
+                     * [i-1]    J   j   # J-L => j-l
+                     * [i]      M   z   # M => z, N => z+1, O => z+2
+                     * [i+1]    P   -1  # P => -1, Q => -1
+                     * [i+2]    R   x   # R => x, S => x+1, T => x+2
+                     * [i+3]    U   y   # U => y, V => y+1, ...
+                     * ...
+                     * [-w]     Z  -1   # Z => default; as do Z+1, ... infinity
+                     */
+                    invlist_extend(t_invlist, len + 1);
+                    t_array = invlist_array(t_invlist);
+                    Renew(r_map, len + 1, UV);
+
+                    Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV);
+                    Move(r_map + i + 1,   r_map   + i + 2, len - i - 1, UV);
+                    r_map[i] = r_cp;
+                    t_array[i+1] = t_cp_end + 1;
+                    r_map[i+1] = TR_UNLISTED;
+                    len++;
+                    invlist_set_len(t_invlist, len,
+                                    *(get_invlist_offset_addr(t_invlist)));
+                }
+                else if (adjacent_to_range_above) {
+                    /* The new chunk adjoins the range above, but not the range
+                     * below, and can't merge.  Let's assume the new chunk
+                     * starts at O
+                     *
+                     * [i-1]    J   j   # J-L => j-l
+                     * [i]      M  -1   # M => default, N => default
+                     * [i+1]    O   z   # O => z, P => z+1, Q => z+2
+                     * [i+2]    R   x   # R => x, S => x+1, T => x+2
+                     * [i+3]    U   y   # U => y, V => y+1, ...
+                     * ...
+                     * [-1]     Z  -1   # Z => default; as do Z+1, ... infinity
+                     */
+                    invlist_extend(t_invlist, len + 1);
+                    t_array = invlist_array(t_invlist);
+                    Renew(r_map, len + 1, UV);
+
+                    Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV);
+                    Move(r_map   + i + 1, r_map   + i + 2, len - i - 1, UV);
+                    t_array[i+1] = t_cp;
+                    r_map[i+1] = r_cp;
+                    len++;
+                    invlist_set_len(t_invlist, len,
+                                    *(get_invlist_offset_addr(t_invlist)));
+                }
+                else {
+                    /* The new chunk adjoins neither the range above, nor the
+                     * range below.  Lets assume it is N..P => n..p
+                     *
+                     * [i-1]    J   j   # J-L => j-l
+                     * [i]      M  -1   # M => default
+                     * [i+1]    N   n   # N..P => n..p
+                     * [i+2]    Q  -1   # Q => default
+                     * [i+3]    R   x   # R => x, S => x+1, T => x+2
+                     * [i+4]    U   y   # U => y, V => y+1, ...
+                     * ...
+                     * [-1]     Z  -1   # Z => default; as do Z+1, ... infinity
+                     */
 
-	swash = MUTABLE_SV(swash_init("utf8", "", listsv, bits, none));
-#ifdef USE_ITHREADS
-	cPADOPo->op_padix = pad_alloc(OP_TRANS, SVf_READONLY);
-	SvREFCNT_dec(PAD_SVl(cPADOPo->op_padix));
-	PAD_SETSV(cPADOPo->op_padix, swash);
-	SvPADTMP_on(swash);
-	SvREADONLY_on(swash);
-#else
-	cSVOPo->op_sv = swash;
-#endif
-	SvREFCNT_dec(listsv);
-	SvREFCNT_dec(transv);
+                    invlist_extend(t_invlist, len + 2);
+                    t_array = invlist_array(t_invlist);
+                    Renew(r_map, len + 2, UV);
+
+                    Move(t_array + i + 1,
+                         t_array + i + 2 + 1, len - i - (2 - 1), UV);
+                    Move(r_map   + i + 1,
+                         r_map   + i + 2 + 1, len - i - (2 - 1), UV);
+
+                    len += 2;
+                    invlist_set_len(t_invlist, len,
+                                    *(get_invlist_offset_addr(t_invlist)));
+
+                    t_array[i+1] = t_cp;
+                    r_map[i+1] = r_cp;
+
+                    t_array[i+2] = t_cp_end + 1;
+                    r_map[i+2] = TR_UNLISTED;
+                }
+            } /* End of this chunk needs to be processed */
+
+            /* Done with this chunk. */
+            t_cp += span;
+            if (t_cp >= IV_MAX) {
+                break;
+            }
+            t_range_count -= span;
+            if (r_cp != TR_SPECIAL_HANDLING) {
+                r_cp += span;
+                r_range_count -= span;
+            }
+            else {
+                r_range_count = 0;
+            }
+
+        } /* End of loop through the search list */
+
+        /* We don't need an exact count, but we do need to know if there is
+         * anything left over in the replacement list.  So, just assume it's
+         * one byte per character */
+        if (rend > r) {
+            r_count++;
+        }
+    } /* End of passes */
+
+    SvREFCNT_dec(inverted_tstr);
 
-	if (!del && havefinal && rlen)
-	    (void)hv_store(MUTABLE_HV(SvRV(swash)), "FINAL", 5,
-			   newSVuv((UV)final), 0);
+    /* We now have normalized the input into an inversion map.
+     *
+     * See if the lhs and rhs are equivalent.  If so, this tr/// is a no-op
+     * except for the count, and streamlined runtime code can be used */
+    if (!del && !squash) {
+
+        /* They are identical if they point to same address, or if everything
+         * maps to UNLISTED or to itself.  This catches things that not looking
+         * at the normalized inversion map doesn't catch, like tr/aa/ab/ or
+         * tr/\x{100}-\x{104}/\x{100}-\x{102}\x{103}-\x{104}  */
+        if (r0 != t0) {
+            for (i = 0; i < len; i++) {
+                if (r_map[i] != TR_UNLISTED && r_map[i] != t_array[i]) {
+                    goto done_identical_check;
+                }
+            }
+        }
+
+        /* Here have gone through entire list, and didn't find any
+         * non-identical mappings */
+        o->op_private |= OPpTRANS_IDENTICAL;
+
+      done_identical_check: ;
+    }
+
+    t_array = invlist_array(t_invlist);
+
+    /* If has components above 255, we generally need to use the inversion map
+     * implementation */
+    if (   can_force_utf8
+        || (   len > 0
+            && t_array[len-1] > 255
+                 /* If the final range is 0x100-INFINITY and is a special
+                  * mapping, the table implementation can handle it */
+            && ! (   t_array[len-1] == 256
+                  && (   r_map[len-1] == TR_UNLISTED
+                      || r_map[len-1] == TR_SPECIAL_HANDLING))))
+    {
+        SV* r_map_sv;
+
+        /* A UTF-8 op is generated, indicated by this flag.  This op is an
+         * sv_op */
+        o->op_private |= OPpTRANS_USE_SVOP;
+
+        if (can_force_utf8) {
+            o->op_private |= OPpTRANS_CAN_FORCE_UTF8;
+        }
+
+        /* The inversion map is pushed; first the list. */
+	invmap = MUTABLE_AV(newAV());
+        av_push(invmap, t_invlist);
 
-	Safefree(tsave);
-	Safefree(rsave);
+        /* 2nd is the mapping */
+        r_map_sv = newSVpvn((char *) r_map, len * sizeof(UV));
+        av_push(invmap, r_map_sv);
 
-	tlen = tcount;
-	rlen = rcount;
-	if (r < rend)
-	    rlen++;
-	else if (rlast == 0xffffffff)
-	    rlen = 0;
+        /* 3rd is the max possible expansion factor */
+        av_push(invmap, newSVnv(max_expansion));
+
+        /* Characters that are in the search list, but not in the replacement
+         * list are mapped to the final character in the replacement list */
+        if (! del && r_count < t_count) {
+            av_push(invmap, newSVuv(final_map));
+        }
+
+#ifdef USE_ITHREADS
+        cPADOPo->op_padix = pad_alloc(OP_TRANS, SVf_READONLY);
+        SvREFCNT_dec(PAD_SVl(cPADOPo->op_padix));
+        PAD_SETSV(cPADOPo->op_padix, (SV *) invmap);
+        SvPADTMP_on(invmap);
+        SvREADONLY_on(invmap);
+#else
+        cSVOPo->op_sv = (SV *) invmap;
+#endif
 
-	goto warnins;
     }
+    else {
+        OPtrans_map *tbl;
+        Size_t i;
+
+        /* The OPtrans_map struct already contains one slot; hence the -1. */
+        SSize_t struct_size = sizeof(OPtrans_map)
+                            + (256 - 1 + 1)*sizeof(short);
 
         /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup
         * table. Entries with the value TR_UNMAPPED indicate chars not to be
         * translated, while TR_DELETE indicates a search char without a
         * corresponding replacement char under /d.
         *
-        * Normally, the table has 256 slots. However, in the presence of
-        * /c, the search charlist has an implicit \x{100}-\x{7fffffff}
-        * added, and if there are enough replacement chars to start pairing
-        * with the \x{100},... search chars, then a larger (> 256) table
-        * is allocated.
-        *
-        * In addition, regardless of whether under /c, an extra slot at the
-        * end is used to store the final repeating char, or TR_R_EMPTY under an
-        * empty replacement list, or TR_DELETE under /d; which makes the
-        * runtime code easier.
-        *
-        * The toker will have already expanded char ranges in t and r.
+        * In addition, an extra slot at the end is used to store the final
+        * repeating char, or TR_R_EMPTY under an empty replacement list, or
+        * TR_DELETE under /d; which makes the runtime code easier.
         */
 
-        /* Initially allocate 257-slot table: 256 for basic (non /c) usage,
-        * plus final slot for repeat/TR_DELETE/TR_R_EMPTY. Later we realloc if
-        * excess > * 0.  The OPtrans_map struct already contains one slot;
-        * hence the -1.
-        */
-        struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short);
+        /* Indicate this is an op_pv */
+        o->op_private &= ~OPpTRANS_USE_SVOP;
+
         tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1);
         tbl->size = 256;
         cPVOPo->op_pv = (char*)tbl;
 
-        if (complement) {
-            Size_t excess;
+        for (i = 0; i < len; i++) {
+            STATIC_ASSERT_DECL(TR_SPECIAL_HANDLING == TR_DELETE);
+            short upper = i >= len - 1 ? 256 : t_array[i+1];
+            short to = r_map[i];
+            short j;
+            bool do_increment = TRUE;
 
-            /* in this branch, j is a count of 'consumed' (i.e. paired off
-            * with a search char) replacement chars (so j <= rlen always)
-            */
-            for (i = 0; i < tlen; i++)
-                tbl->map[t[i]] = (short) TR_UNMAPPED;
-
-            for (i = 0, j = 0; i < 256; i++) {
-                if (!tbl->map[i]) {
-                    if (j == rlen) {
-                        if (del)
-                            tbl->map[i] = (short) TR_DELETE;
-                        else if (rlen)
-                            tbl->map[i] = r[j-1];
-                        else
-                            tbl->map[i] = (short)i;
-                    }
-                    else {
-                        tbl->map[i] = r[j++];
-                    }
-                    if (   tbl->map[i] >= 0
-                        &&  UVCHR_IS_INVARIANT((UV)i)
-                        && !UVCHR_IS_INVARIANT((UV)(tbl->map[i]))
-                    )
-                        grows = TRUE;
-                }
+            /* Any code points above our limit should be irrelevant */
+            if (t_array[i] >= tbl->size) break;
+
+            /* Set up the map */
+            if (to == (short) TR_SPECIAL_HANDLING && ! del) {
+                to = final_map;
+                do_increment = FALSE;
+            }
+            else if (to < 0) {
+                do_increment = FALSE;
             }
 
-            ASSUME(j <= rlen);
-            excess = rlen - j;
+            /* Create a map for everything in this range.  The value increases
+             * except for the special cases */
+            for (j = t_array[i]; j < upper; j++) {
+                tbl->map[j] = to;
+                if (do_increment) to++;
+            }
+        }
 
-            if (excess) {
-                /* More replacement chars than search chars:
-                * store excess replacement chars at end of main table.
-                */
+        tbl->map[tbl->size] = del
+                              ? (short) TR_DELETE
+                              : rlen
+                                ? final_map
+                                : (short) TR_R_EMPTY;
 
-                struct_size += excess * sizeof(short);
-                tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, struct_size);
-                tbl->size += excess;
-                cPVOPo->op_pv = (char*)tbl;
+        SvREFCNT_dec(t_invlist);
 
-                for (i = 0; i < excess; i++)
-                    tbl->map[i + 256] = r[j+i];
-            }
-            else {
-                /* no more replacement chars than search chars */
-                if (!rlen && !del && !squash)
-                    o->op_private |= OPpTRANS_IDENTICAL;
-            }
+#if 0   /* code that added excess above-255 chars at the end of the table, in
+           case we ever want to not use the inversion map implementation for
+           this */
 
-            tbl->map[tbl->size] = del
-                                  ? (short) TR_DELETE
-                                  : rlen
-                                    ? r[rlen - 1]
-                                    : (short) TR_R_EMPTY;
+        ASSUME(j <= rlen);
+        excess = rlen - j;
+
+        if (excess) {
+            /* More replacement chars than search chars:
+             * store excess replacement chars at end of main table.
+             */
+
+            struct_size += excess;
+            tbl = (OPtrans_map*)PerlMemShared_realloc(tbl,
+                        struct_size + excess * sizeof(short));
+            tbl->size += excess;
+            cPVOPo->op_pv = (char*)tbl;
+
+            for (i = 0; i < excess; i++)
+                tbl->map[i + 256] = r[j+i];
         }
         else {
-            if (!rlen && !del) {
-                r = t; rlen = tlen;
-                if (!squash)
-                    o->op_private |= OPpTRANS_IDENTICAL;
-            }
-            else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) {
-                o->op_private |= OPpTRANS_IDENTICAL;
-            }
+            /* no more replacement chars than search chars */
+#endif
 
-            for (i = 0; i < 256; i++)
-                tbl->map[i] = (short) TR_UNMAPPED;
-            for (i = 0, j = 0; i < tlen; i++,j++) {
-                if (j >= rlen) {
-                    if (del) {
-                        if (tbl->map[t[i]] == (short) TR_UNMAPPED)
-                            tbl->map[t[i]] = (short) TR_DELETE;
-                        continue;
-                    }
-                    --j;
-                }
-                if (tbl->map[t[i]] == (short) TR_UNMAPPED) {
-                    if (     UVCHR_IS_INVARIANT(t[i])
-                        && ! UVCHR_IS_INVARIANT(r[j]))
-                        grows = TRUE;
-                    tbl->map[t[i]] = r[j];
-                }
-            }
-            tbl->map[tbl->size] = del
-                                  ? (short) TR_UNMAPPED
-                                  : rlen
-                                    ? (short) TR_UNMAPPED
-                                    : (short) TR_R_EMPTY;
-        }
+    }
 
-    /* both non-utf8 and utf8 code paths end up here */
+    Safefree(r_map);
 
-  warnins:
-    if(del && rlen == tlen) {
+    if(del && rlen != 0 && r_count == t_count) {
 	Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator");
-    } else if(rlen > tlen && !complement) {
+    } else if(r_count > t_count) {
 	Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Replacement list is longer than search list");
     }
 
-    if (grows)
-	o->op_private |= OPpTRANS_GROWS;
     op_free(expr);
     op_free(repl);
 
diff --git a/op.h b/op.h
index 3781960e19..537b2ef0d3 100644
--- a/op.h
+++ b/op.h
@@ -1110,7 +1110,7 @@ C<sib> is non-null. For a higher-level interface, see C<L</op_sibling_splice>>.
       "Use of strings with code points over 0xFF as arguments to "  \
       "%s operator is not allowed"
 #endif
-#if defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
+#if defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C) || defined(PERL_IN_PERL_C)
 #  define TR_UNMAPPED           (UV)-1
 #  define TR_DELETE             (UV)-2
 #  define TR_R_EMPTY            (UV)-3  /* rhs (replacement) is empty */
diff --git a/proto.h b/proto.h
index 20e4b0e511..51c316e070 100644
--- a/proto.h
+++ b/proto.h
@@ -4859,31 +4859,26 @@ STATIC Size_t	S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const
 #define PERL_ARGS_ASSERT_DO_TRANS_COMPLEX	\
 	assert(sv); assert(tbl)
 
-STATIC Size_t	S_do_trans_complex_utf8(pTHX_ SV * const sv)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8	\
-	assert(sv)
-
 STATIC Size_t	S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_DO_TRANS_COUNT	\
 	assert(sv); assert(tbl)
 
-STATIC Size_t	S_do_trans_count_utf8(pTHX_ SV * const sv)
+STATIC Size_t	S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const map)
 			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8	\
-	assert(sv)
+#define PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP	\
+	assert(sv); assert(map)
+
+STATIC Size_t	S_do_trans_invmap(pTHX_ SV * const sv, AV * const map)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_DO_TRANS_INVMAP	\
+	assert(sv); assert(map)
 
 STATIC Size_t	S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_DO_TRANS_SIMPLE	\
 	assert(sv); assert(tbl)
 
-STATIC Size_t	S_do_trans_simple_utf8(pTHX_ SV * const sv)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8	\
-	assert(sv)
-
 #endif
 #if defined(PERL_IN_DUMP_C)
 STATIC CV*	S_deb_curcv(pTHX_ I32 ix);
@@ -5540,12 +5535,6 @@ STATIC SV *	S_space_join_names_mortal(pTHX_ char *const *array);
 STATIC void	S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist);
 #define PERL_ARGS_ASSERT_ADD_ABOVE_LATIN1_FOLDS	\
 	assert(pRExC_state); assert(invlist)
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE SV*	S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_ADD_CP_TO_INVLIST
-#endif
-
 STATIC U32	S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_ADD_DATA	\
@@ -5582,13 +5571,6 @@ STATIC SV *	S_get_ANYOFM_contents(pTHX_ const regnode * n)
 STATIC SV*	S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, const regnode_charclass* const node);
 #define PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC	\
 	assert(pRExC_state); assert(node)
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE STRLEN*	S_get_invlist_iter_addr(SV* invlist)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR	\
-	assert(invlist)
-#endif
-
 STATIC bool	S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
 #define PERL_ARGS_ASSERT_GROK_BSLASH_N	\
 	assert(pRExC_state); assert(flagp)
@@ -5614,46 +5596,12 @@ PERL_STATIC_INLINE SV*	S_invlist_contents(pTHX_ SV* const invlist, const bool tr
 #endif
 
 #ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE void	S_invlist_extend(pTHX_ SV* const invlist, const UV len);
-#define PERL_ARGS_ASSERT_INVLIST_EXTEND	\
-	assert(invlist)
-#endif
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE UV	S_invlist_highest(SV* const invlist)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_INVLIST_HIGHEST	\
-	assert(invlist)
-#endif
-
-#ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE bool	S_invlist_is_iterating(SV* const invlist)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_INVLIST_IS_ITERATING	\
 	assert(invlist)
 #endif
 
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE void	S_invlist_iterfinish(SV* invlist);
-#define PERL_ARGS_ASSERT_INVLIST_ITERFINISH	\
-	assert(invlist)
-#endif
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE void	S_invlist_iterinit(SV* invlist);
-#define PERL_ARGS_ASSERT_INVLIST_ITERINIT	\
-	assert(invlist)
-#endif
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE bool	S_invlist_iternext(SV* invlist, UV* start, UV* end)
-			__attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_INVLIST_ITERNEXT	\
-	assert(invlist); assert(start); assert(end)
-#endif
-
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE void	S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset);
-#define PERL_ARGS_ASSERT_INVLIST_SET_LEN	\
-	assert(invlist)
-#endif
 STATIC bool	S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc);
 #define PERL_ARGS_ASSERT_IS_SSC_WORTH_IT	\
 	assert(pRExC_state); assert(ssc)
@@ -5811,6 +5759,55 @@ PERL_CALLCONV void	Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, const char*
 #define PERL_ARGS_ASSERT__INVLIST_DUMP	\
 	assert(file); assert(indent); assert(invlist)
 #endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE SV*	S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_ADD_CP_TO_INVLIST
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE STRLEN*	S_get_invlist_iter_addr(SV* invlist)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR	\
+	assert(invlist)
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE void	S_invlist_extend(pTHX_ SV* const invlist, const UV len);
+#define PERL_ARGS_ASSERT_INVLIST_EXTEND	\
+	assert(invlist)
+#endif
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE UV	S_invlist_highest(SV* const invlist)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_INVLIST_HIGHEST	\
+	assert(invlist)
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE void	S_invlist_iterfinish(SV* invlist);
+#define PERL_ARGS_ASSERT_INVLIST_ITERFINISH	\
+	assert(invlist)
+#endif
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE void	S_invlist_iterinit(SV* invlist);
+#define PERL_ARGS_ASSERT_INVLIST_ITERINIT	\
+	assert(invlist)
+#endif
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE bool	S_invlist_iternext(SV* invlist, UV* start, UV* end)
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_INVLIST_ITERNEXT	\
+	assert(invlist); assert(start); assert(end)
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE void	S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset);
+#define PERL_ARGS_ASSERT_INVLIST_SET_LEN	\
+	assert(invlist)
+#endif
+#endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
 PERL_CALLCONV bool	Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b);
 #define PERL_ARGS_ASSERT__INVLISTEQ	\
@@ -5832,7 +5829,7 @@ PERL_CALLCONV void	Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode*
 #define PERL_ARGS_ASSERT_REGPROP	\
 	assert(sv); assert(o)
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)	 || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)		 || defined(PERL_IN_PP_C) || defined(PERL_IN_OP_C)		 || defined(PERL_IN_DOOP_C)
 #ifndef PERL_NO_INLINE_FUNCTIONS
 PERL_STATIC_INLINE bool	S__invlist_contains_cp(SV* const invlist, const UV cp)
 			__attribute__warn_unused_result__;
diff --git a/toke.c b/toke.c
index 0fdcadc78c..2c448eb6de 100644
--- a/toke.c
+++ b/toke.c
@@ -2905,12 +2905,12 @@ S_scan_const(pTHX_ char *start)
     bool dorange = FALSE;               /* are we in a translit range? */
     bool didrange = FALSE;              /* did we just finish a range? */
     bool in_charclass = FALSE;          /* within /[...]/ */
-    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
                                            UTF8?  But, this can show as true
                                            when the source isn't utf8, as for
                                            example when it is entirely composed
                                            of hex constants */
+    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
                                            number of characters found so far
                                            that will expand (into 2 bytes)
@@ -2951,11 +2951,6 @@ S_scan_const(pTHX_ char *start)
     PERL_ARGS_ASSERT_SCAN_CONST;
 
     assert(PL_lex_inwhat != OP_TRANSR);
-    if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
-	/* If we are doing a trans and we know we want UTF8, set expectation */
-	d_is_utf8  = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
-	s_is_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
-    }
 
     /* Protect sv from errors and fatal warnings. */
     ENTER_with_name("scan_const");
@@ -3646,13 +3641,6 @@ S_scan_const(pTHX_ char *start)
                         }
 
 		        d = (char*)uvchr_to_utf8((U8*)d, uv);
-			if (PL_lex_inwhat == OP_TRANS
-                            && PL_parser->lex_sub_op)
-                        {
-			    PL_parser->lex_sub_op->op_private |=
-				(PL_lex_repl ? OPpTRANS_FROM_UTF
-					     : OPpTRANS_TO_UTF);
-			}
 		    }
 		}
 #ifdef EBCDIC
@@ -4133,10 +4121,6 @@ S_scan_const(pTHX_ char *start)
     SvPOK_on(sv);
     if (d_is_utf8) {
 	SvUTF8_on(sv);
-	if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
-	    PL_parser->lex_sub_op->op_private |=
-		    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
-	}
     }
 
     /* shrink the sv if we allocated more than we used */
@@ -10297,9 +10281,7 @@ S_scan_trans(pTHX_ char *start)
 
     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
     o->op_private &= ~OPpTRANS_ALL;
-    o->op_private |= del|squash|complement|
-      (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
-      (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
+    o->op_private |= del|squash|complement;
 
     PL_lex_op = o;
     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;