10 files changed, 27 insertions, 235 deletions
diff --git a/embed.fnc b/embed.fnc
index ab2cdec4b3..756f7c14c7 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -661,16 +661,9 @@ ApR	|bool	|is_utf8_punct	|NN const U8 *p
 ApR	|bool	|is_utf8_xdigit	|NN const U8 *p
 ApR	|bool	|is_utf8_mark	|NN const U8 *p
 EXpR	|bool	|is_utf8_X_extend	|NN const U8 *p
-EXpR	|bool	|is_utf8_X_prepend	|NN const U8 *p
 EXpR	|bool	|is_utf8_X_regular_begin|NN const U8 *p
-EXpR	|bool	|is_utf8_X_special_begin|NN const U8 *p
-EXpR	|bool	|is_utf8_X_L		|NN const U8 *p
-EXpR	|bool	|is_utf8_X_RI		|NN const U8 *p
 :not currently used EXpR	|bool	|is_utf8_X_LV		|NN const U8 *p
 EXpR	|bool	|is_utf8_X_LVT		|NN const U8 *p
-EXpR	|bool	|is_utf8_X_LV_LVT_V	|NN const U8 *p
-EXpR	|bool	|is_utf8_X_T		|NN const U8 *p
-EXpR	|bool	|is_utf8_X_V		|NN const U8 *p
 : Used in perly.y
 p	|OP*	|jmaybe		|NN OP *o
 : Used in pp.c 
diff --git a/embed.h b/embed.h
index 45291f0983..33d732d54d 100644
--- a/embed.h
+++ b/embed.h
@@ -855,16 +855,9 @@
 #define _is_utf8__perl_idstart(a)	Perl__is_utf8__perl_idstart(aTHX_ a)
 #define av_reify(a)		Perl_av_reify(aTHX_ a)
 #define current_re_engine()	Perl_current_re_engine(aTHX)
-#define is_utf8_X_L(a)		Perl_is_utf8_X_L(aTHX_ a)
 #define is_utf8_X_LVT(a)	Perl_is_utf8_X_LVT(aTHX_ a)
-#define is_utf8_X_LV_LVT_V(a)	Perl_is_utf8_X_LV_LVT_V(aTHX_ a)
-#define is_utf8_X_RI(a)		Perl_is_utf8_X_RI(aTHX_ a)
-#define is_utf8_X_T(a)		Perl_is_utf8_X_T(aTHX_ a)
-#define is_utf8_X_V(a)		Perl_is_utf8_X_V(aTHX_ a)
 #define is_utf8_X_extend(a)	Perl_is_utf8_X_extend(aTHX_ a)
-#define is_utf8_X_prepend(a)	Perl_is_utf8_X_prepend(aTHX_ a)
 #define is_utf8_X_regular_begin(a)	Perl_is_utf8_X_regular_begin(aTHX_ a)
-#define is_utf8_X_special_begin(a)	Perl_is_utf8_X_special_begin(aTHX_ a)
 #define op_clear(a)		Perl_op_clear(aTHX_ a)
 #define qerror(a)		Perl_qerror(aTHX_ a)
 #define reg_named_buff(a,b,c,d)	Perl_reg_named_buff(aTHX_ a,b,c,d)
diff --git a/embedvar.h b/embedvar.h
index 877e81161f..d3eeaf03a0 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -353,16 +353,9 @@
 #define PL_unitcheckav_save	(vTHX->Iunitcheckav_save)
 #define PL_unlockhook		(vTHX->Iunlockhook)
 #define PL_unsafe		(vTHX->Iunsafe)
-#define PL_utf8_X_L		(vTHX->Iutf8_X_L)
 #define PL_utf8_X_LVT		(vTHX->Iutf8_X_LVT)
-#define PL_utf8_X_LV_LVT_V	(vTHX->Iutf8_X_LV_LVT_V)
-#define PL_utf8_X_RI		(vTHX->Iutf8_X_RI)
-#define PL_utf8_X_T		(vTHX->Iutf8_X_T)
-#define PL_utf8_X_V		(vTHX->Iutf8_X_V)
 #define PL_utf8_X_extend	(vTHX->Iutf8_X_extend)
-#define PL_utf8_X_prepend	(vTHX->Iutf8_X_prepend)
 #define PL_utf8_X_regular_begin	(vTHX->Iutf8_X_regular_begin)
-#define PL_utf8_X_special_begin	(vTHX->Iutf8_X_special_begin)
 #define PL_utf8_alnum		(vTHX->Iutf8_alnum)
 #define PL_utf8_alpha		(vTHX->Iutf8_alpha)
 #define PL_utf8_blank		(vTHX->Iutf8_blank)
diff --git a/intrpvar.h b/intrpvar.h
index 94b7425c10..641cac6268 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -628,14 +628,7 @@ PERLVAR(I, utf8_xdigit,	SV *)
 PERLVAR(I, utf8_mark,	SV *)
 PERLVAR(I, utf8_X_regular_begin, SV *)
 PERLVAR(I, utf8_X_extend, SV *)
-PERLVAR(I, utf8_X_prepend, SV *)
-PERLVAR(I, utf8_X_special_begin, SV *)
-PERLVAR(I, utf8_X_L,	SV *)
 PERLVAR(I, utf8_X_LVT,	SV *)
-PERLVAR(I, utf8_X_RI,	SV *)
-PERLVAR(I, utf8_X_T,	SV *)
-PERLVAR(I, utf8_X_V,	SV *)
-PERLVAR(I, utf8_X_LV_LVT_V, SV *)
 PERLVAR(I, utf8_toupper, SV *)
 PERLVAR(I, utf8_totitle, SV *)
 PERLVAR(I, utf8_tolower, SV *)
diff --git a/proto.h b/proto.h
index f97fe1fbae..187b0ae5aa 100644
--- a/proto.h
+++ b/proto.h
@@ -1764,66 +1764,24 @@ PERL_CALLCONV bool	Perl_is_uni_xdigit_lc(pTHX_ UV c)
 			__attribute__warn_unused_result__
 			__attribute__pure__;
 
-PERL_CALLCONV bool	Perl_is_utf8_X_L(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_L	\
-	assert(p)
-
 PERL_CALLCONV bool	Perl_is_utf8_X_LVT(pTHX_ const U8 *p)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_IS_UTF8_X_LVT	\
 	assert(p)
 
-PERL_CALLCONV bool	Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_LV_LVT_V	\
-	assert(p)
-
-PERL_CALLCONV bool	Perl_is_utf8_X_RI(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_RI	\
-	assert(p)
-
-PERL_CALLCONV bool	Perl_is_utf8_X_T(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_T	\
-	assert(p)
-
-PERL_CALLCONV bool	Perl_is_utf8_X_V(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_V	\
-	assert(p)
-
 PERL_CALLCONV bool	Perl_is_utf8_X_extend(pTHX_ const U8 *p)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_IS_UTF8_X_EXTEND	\
 	assert(p)
 
-PERL_CALLCONV bool	Perl_is_utf8_X_prepend(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND	\
-	assert(p)
-
 PERL_CALLCONV bool	Perl_is_utf8_X_regular_begin(pTHX_ const U8 *p)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_IS_UTF8_X_REGULAR_BEGIN	\
 	assert(p)
 
-PERL_CALLCONV bool	Perl_is_utf8_X_special_begin(pTHX_ const U8 *p)
-			__attribute__warn_unused_result__
-			__attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_SPECIAL_BEGIN	\
-	assert(p)
-
 PERL_CALLCONV bool	Perl_is_utf8_alnum(pTHX_ const U8 *p)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1);
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 73ec4ae0bf..56e53491b1 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -127,9 +127,6 @@ __DATA__
 03C5 first
 03C5 tail
 
-1100
-1160
-11A8
 2010 string
 
 007F native
diff --git a/regexec.c b/regexec.c
index f51d50d612..4e9b80cc54 100644
--- a/regexec.c
+++ b/regexec.c
@@ -145,14 +145,7 @@
         /* No asserts are done for some of these, in case called on a   */  \
         /* Unicode version in which they map to nothing */                  \
 	LOAD_UTF8_CHARCLASS(X_regular_begin, HYPHEN_UTF8);                          \
-	LOAD_UTF8_CHARCLASS_NO_CHECK(X_special_begin);                      \
 	LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8);         \
-	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* empty in most releases*/ \
-	LOAD_UTF8_CHARCLASS(X_L, HANGUL_CHOSEONG_KIYEOK_UTF8);	            \
-	LOAD_UTF8_CHARCLASS(X_LV_LVT_V, HANGUL_JUNGSEONG_FILLER_UTF8);      \
-	LOAD_UTF8_CHARCLASS_NO_CHECK(X_RI);    /* empty in many releases */ \
-	LOAD_UTF8_CHARCLASS(X_T, HANGUL_JONGSEONG_KIYEOK_UTF8);             \
-	LOAD_UTF8_CHARCLASS(X_V, HANGUL_JUNGSEONG_FILLER_UTF8)
 
 #define PLACEHOLDER	/* Something for the preprocessor to grab onto */
 
@@ -4058,6 +4051,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		    locinput += 2;
 		}
 		else {
+                    STRLEN len;
+
 		    /* In case have to backtrack to beginning, then match '.' */
 		    char *starting = locinput;
 
@@ -4066,16 +4061,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
 		    LOAD_UTF8_CHARCLASS_GCB();
 
-                    /* Match (prepend)*, but don't bother trying if empty (as
-                     * being set to _undef indicates) */
-                    if (PL_utf8_X_prepend != &PL_sv_undef) {
-                        while (locinput < PL_regeol
-                               && swash_fetch(PL_utf8_X_prepend,
-                                              (U8*)locinput, utf8_target))
-                        {
-                            previous_prepend = locinput;
-                            locinput += UTF8SKIP(locinput);
-                        }
+                    /* Match (prepend)*   */
+                    while (locinput < PL_regeol
+                           && (len = is_GCB_Prepend_utf8(locinput)))
+                    {
+                        previous_prepend = locinput;
+                        locinput += len;
                     }
 
 		    /* As noted above, if we matched a prepend character, but
@@ -4085,8 +4076,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 			&& (locinput >=  PL_regeol
 			    || (! swash_fetch(PL_utf8_X_regular_begin,
 					     (U8*)locinput, utf8_target)
-			         && ! swash_fetch(PL_utf8_X_special_begin,
-					     (U8*)locinput, utf8_target)))
+			         && ! is_GCB_SPECIAL_BEGIN_utf8(locinput)))
                         )
 		    {
 			locinput = previous_prepend;
@@ -4101,9 +4091,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                                     (U8*)locinput, utf8_target)) {
                         locinput += UTF8SKIP(locinput);
                     }
-                    else if (! swash_fetch(PL_utf8_X_special_begin,
-					(U8*)locinput, utf8_target))
-			{
+                    else if (! is_GCB_SPECIAL_BEGIN_utf8(locinput)) {
 
 			/* Here did not match the required 'Begin' in the
 			 * second term.  So just match the very first
@@ -4115,26 +4103,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                         /* Here is a special begin.  It can be composed of
                          * several individual characters.  One possibility is
                          * RI+ */
-                        if (swash_fetch(PL_utf8_X_RI,
-                                        (U8*)locinput, utf8_target))
-                        {
-                            locinput += UTF8SKIP(locinput);
+                        if ((len = is_GCB_RI_utf8(locinput))) {
+                            locinput += len;
                             while (locinput < PL_regeol
-                                    && swash_fetch(PL_utf8_X_RI,
-                                                    (U8*)locinput, utf8_target))
+                                   && (len = is_GCB_RI_utf8(locinput)))
                             {
-                                locinput += UTF8SKIP(locinput);
+                                locinput += len;
                             }
-                        } else /* Another possibility is T+ */
-                               if (swash_fetch(PL_utf8_X_T,
-                                               (U8*)locinput, utf8_target))
-                        {
-                            locinput += UTF8SKIP(locinput);
+                        } else if ((len = is_GCB_T_utf8(locinput))) {
+                            /* Another possibility is T+ */
+                            locinput += len;
                             while (locinput < PL_regeol
-                                    && swash_fetch(PL_utf8_X_T,
-                                                    (U8*)locinput, utf8_target))
+                                && (len = is_GCB_T_utf8(locinput)))
                             {
-                                locinput += UTF8SKIP(locinput);
+                                locinput += len;
                             }
                         } else {
 
@@ -4145,10 +4127,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
                             /* Match L*           */
                             while (locinput < PL_regeol
-                                    && swash_fetch(PL_utf8_X_L,
-                                                    (U8*)locinput, utf8_target))
+                                   && (len = is_GCB_L_utf8(locinput)))
                             {
-                                locinput += UTF8SKIP(locinput);
+                                locinput += len;
                             }
 
                             /* Here, have exhausted L*.  If the next character
@@ -4158,8 +4139,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                              * Are done. */
 
                             if (locinput < PL_regeol
-                                && swash_fetch(PL_utf8_X_LV_LVT_V,
-                                                (U8*)locinput, utf8_target))
+                                && is_GCB_LV_LVT_V_utf8(locinput))
                             {
 
                                 /* Otherwise keep going.  Must be LV, LVT or V.
@@ -4172,22 +4152,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                                      * V*     */
                                     locinput += UTF8SKIP(locinput);
                                     while (locinput < PL_regeol
-                                            && swash_fetch(PL_utf8_X_V,
-                                                           (U8*)locinput,
-                                                           utf8_target))
+                                           && (len = is_GCB_V_utf8(locinput)))
                                     {
-                                        locinput += UTF8SKIP(locinput);
+                                        locinput += len;
                                     }
                                 }
 
                                 /* And any of LV, LVT, or V can be followed
-                                    * by T*            */
+                                 * by T*            */
                                 while (locinput < PL_regeol
-                                        && swash_fetch(PL_utf8_X_T,
-                                                        (U8*)locinput,
-                                                        utf8_target))
+                                       && (len = is_GCB_T_utf8(locinput)))
                                 {
-                                    locinput += UTF8SKIP(locinput);
+                                    locinput += len;
                                 }
                             }
                         }
diff --git a/sv.c b/sv.c
index 497417c53a..a757ad2b0e 100644
--- a/sv.c
+++ b/sv.c
@@ -13376,15 +13376,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_utf8_mark	= sv_dup_inc(proto_perl->Iutf8_mark, param);
     PL_utf8_X_regular_begin	= sv_dup_inc(proto_perl->Iutf8_X_regular_begin, param);
     PL_utf8_X_extend	= sv_dup_inc(proto_perl->Iutf8_X_extend, param);
-    PL_utf8_X_prepend	= sv_dup_inc(proto_perl->Iutf8_X_prepend, param);
-    PL_utf8_X_special_begin	= sv_dup_inc(proto_perl->Iutf8_X_special_begin, param);
-    PL_utf8_X_L	= sv_dup_inc(proto_perl->Iutf8_X_L, param);
-    /*not currently used: PL_utf8_X_LV	= sv_dup_inc(proto_perl->Iutf8_X_LV, param);*/
     PL_utf8_X_LVT	= sv_dup_inc(proto_perl->Iutf8_X_LVT, param);
-    PL_utf8_X_RI	= sv_dup_inc(proto_perl->Iutf8_X_RI, param);
-    PL_utf8_X_T	= sv_dup_inc(proto_perl->Iutf8_X_T, param);
-    PL_utf8_X_V	= sv_dup_inc(proto_perl->Iutf8_X_V, param);
-    PL_utf8_X_LV_LVT_V	= sv_dup_inc(proto_perl->Iutf8_X_LV_LVT_V, param);
     PL_utf8_toupper	= sv_dup_inc(proto_perl->Iutf8_toupper, param);
     PL_utf8_totitle	= sv_dup_inc(proto_perl->Iutf8_totitle, param);
     PL_utf8_tolower	= sv_dup_inc(proto_perl->Iutf8_tolower, param);
diff --git a/unicode_constants.h b/unicode_constants.h
index f915d59b8e..82081d910a 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -30,9 +30,6 @@
 #define GREEK_SMALL_LETTER_UPSILON_UTF8_FIRST_BYTE 0xCF    /* U+03C5 */
 #define GREEK_SMALL_LETTER_UPSILON_UTF8_TAIL "\x85"    /* U+03C5 */
 
-#define HANGUL_CHOSEONG_KIYEOK_UTF8 "\xE1\x84\x80"    /* U+1100 */
-#define HANGUL_JUNGSEONG_FILLER_UTF8 "\xE1\x85\xA0"    /* U+1160 */
-#define HANGUL_JONGSEONG_KIYEOK_UTF8 "\xE1\x86\xA8"    /* U+11A8 */
 #define HYPHEN_UTF8 "\xE2\x80\x90"    /* U+2010 */
 
 #define DELETE_NATIVE 0x007F    /* U+007F */
diff --git a/utf8.c b/utf8.c
index 2172d311b4..88ca041eee 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2229,76 +2229,6 @@ Perl_is_utf8_X_extend(pTHX_ const U8 *p)
     return is_utf8_common(p, &PL_utf8_X_extend, "_X_Extend");
 }
 
-bool
-Perl_is_utf8_X_prepend(pTHX_ const U8 *p)
-{
-    /* If no code points in the Unicode version being worked on match
-     * GCB=Prepend, this will set PL_utf8_X_prepend to &PL_sv_undef during its
-     * first call.  Otherwise, it will set it to a swash created for it.
-     * swash_fetch() hence can't be used without checking first if it is valid
-     * to do so. */
-
-    dVAR;
-    bool initialized = cBOOL(PL_utf8_X_prepend);
-    bool ret;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND;
-
-    if (PL_utf8_X_prepend == &PL_sv_undef) {
-        return FALSE;
-    }
-
-    if ((ret = is_utf8_common(p, &PL_utf8_X_prepend, "_X_GCB_Prepend"))
-        || initialized)
-    {
-        return ret;
-    }
-
-    /* Here the code point being checked was not a prepend, and we hadn't
-     * initialized PL_utf8_X_prepend, so we don't know if it is just this
-     * particular input code point that didn't match, or if the table is
-     * completely empty. The is_utf8_common() call did the initialization, so
-     * we can inspect the swash's inversion list to find out.  If there are no
-     * elements in its inversion list, it's empty, and nothing will ever match,
-     * so set things up so we can skip the check in future calls. */
-    if (_invlist_len(_get_swash_invlist(PL_utf8_X_prepend)) == 0) {
-        SvREFCNT_dec(PL_utf8_X_prepend);
-        PL_utf8_X_prepend = &PL_sv_undef;
-    }
-
-    return FALSE;
-}
-
-bool
-Perl_is_utf8_X_special_begin(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_SPECIAL_BEGIN;
-
-    return is_utf8_common(p, &PL_utf8_X_special_begin, "_X_Special_Begin");
-}
-
-bool
-Perl_is_utf8_X_L(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_L;
-
-    return is_utf8_common(p, &PL_utf8_X_L, "_X_GCB_L");
-}
-
-bool
-Perl_is_utf8_X_RI(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_RI;
-
-    return is_utf8_common(p, &PL_utf8_X_RI, "_X_RI");
-}
-
 /* These constants are for finding GCB=LV and GCB=LVT.  These are for the
  * pre-composed Hangul syllables, which are all in a contiguous block and
  * arranged there in such a way so as to facilitate alorithmic determination of
@@ -2367,35 +2297,6 @@ Perl_is_utf8_X_LVT(pTHX_ const U8 *p)
             && (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
 }
 
-bool
-Perl_is_utf8_X_T(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_T;
-
-    return is_utf8_common(p, &PL_utf8_X_T, "_X_GCB_T");
-}
-
-bool
-Perl_is_utf8_X_V(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_V;
-
-    return is_utf8_common(p, &PL_utf8_X_V, "_X_GCB_V");
-}
-
-bool
-Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_LV_LVT_V;
-
-    return is_utf8_common(p, &PL_utf8_X_LV_LVT_V, "_X_LV_LVT_V");
-}
 
 bool
 Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
@@ -2408,7 +2309,6 @@ Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
 
     return is_utf8_common(p, &PL_utf8_quotemeta, "_Perl_Quotemeta");
 }
-
 /*
 =for apidoc to_utf8_case