Allow perl to work again with all Unicode releases

Compiling perl to work with early Unicode releases is a goal, but has been broken since 5.18. This sequence of commits gets this working again, actually better than before.
author: Karl Williamson <khw@cpan.org> 2015-07-28 22:17:01 -0600
committer: Karl Williamson <khw@cpan.org> 2015-07-28 22:17:01 -0600
commit: a0b05c4bde4f97301b663b8de70677551eacff4c (patch)
tree: e9329726efb7aa27f5fd08f1441943f95b68a594
parent: 16d89be8495ff4e0a9b99e837b5444df8a4a6cc6 (diff)
parent: 912fd71114648888551a4750534463f20ae16c7c (diff)
download: perl-a0b05c4bde4f97301b663b8de70677551eacff4c.tar.gz
24 files changed, 4282 insertions, 3313 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 076d223724..64406bfe78 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -312,7 +312,387 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */
 	0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */
+#endif	/* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for ASCII/Latin1 */
+	45,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x2BC,
+	0x2BD,
+	0x308,
+	0x309,
+	0x313,
+	0x314,
+	0x342,
+	0x343,
+	0x3AC,
+	0x3AD,
+	0x3AE,
+	0x3AF,
+	0x3B1,
+	0x3B2,
+	0x3B7,
+	0x3B8,
+	0x3B9,
+	0x3BA,
+	0x3C1,
+	0x3C2,
+	0x3C5,
+	0x3C6,
+	0x3C9,
+	0x3CA,
+	0x3CE,
+	0x3CF,
+	0x565,
+	0x566,
+	0x574,
+	0x575,
+	0x57E,
+	0x57F,
+	0x1F00,
+	0x1F08,
+	0x1F20,
+	0x1F28,
+	0x1F60,
+	0x1F68,
+	0x1F70,
+	0x1F71,
+	0x1F74,
+	0x1F75,
+	0x1F7C,
+	0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */
+	247,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x41,
+	0x5B,
+	0x61,
+	0x7B,
+	0xB5,
+	0xB6,
+	0xC0,
+	0xD7,
+	0xD8,
+	0xF7,
+	0xF8,
+	0x131,
+	0x132,
+	0x138,
+	0x139,
+	0x18D,
+	0x18E,
+	0x19B,
+	0x19C,
+	0x1AA,
+	0x1AC,
+	0x1BA,
+	0x1BC,
+	0x1BE,
+	0x1BF,
+	0x1C0,
+	0x1C4,
+	0x221,
+	0x222,
+	0x234,
+	0x23A,
+	0x255,
+	0x256,
+	0x258,
+	0x259,
+	0x25A,
+	0x25B,
+	0x25D,
+	0x260,
+	0x262,
+	0x263,
+	0x264,
+	0x265,
+	0x267,
+	0x268,
+	0x26A,
+	0x26B,
+	0x26D,
+	0x26F,
+	0x270,
+	0x271,
+	0x273,
+	0x275,
+	0x276,
+	0x27D,
+	0x27E,
+	0x280,
+	0x281,
+	0x283,
+	0x284,
+	0x287,
+	0x28D,
+	0x292,
+	0x293,
+	0x29D,
+	0x29F,
+	0x2BC,
+	0x2BD,
+	0x2BE,
+	0x2BF,
+	0x300,
+	0x302,
+	0x307,
+	0x309,
+	0x30A,
+	0x30B,
+	0x30C,
+	0x30D,
+	0x313,
+	0x314,
+	0x331,
+	0x332,
+	0x342,
+	0x343,
+	0x345,
+	0x346,
+	0x370,
+	0x374,
+	0x376,
+	0x378,
+	0x37B,
+	0x37E,
+	0x37F,
+	0x380,
+	0x386,
+	0x387,
+	0x388,
+	0x38B,
+	0x38C,
+	0x38D,
+	0x38E,
+	0x3A2,
+	0x3A3,
+	0x3D2,
+	0x3D5,
+	0x3F6,
+	0x3F7,
+	0x3FC,
+	0x3FD,
+	0x482,
+	0x48A,
+	0x530,
+	0x531,
+	0x557,
+	0x561,
+	0x588,
+	0x10A0,
+	0x10C6,
+	0x10C7,
+	0x10C8,
+	0x10CD,
+	0x10CE,
+	0x13A0,
+	0x13F6,
+	0x13F8,
+	0x13FE,
+	0x1D79,
+	0x1D7A,
+	0x1D7D,
+	0x1D7E,
+	0x1E00,
+	0x1E9C,
+	0x1E9E,
+	0x1E9F,
+	0x1EA0,
+	0x1F16,
+	0x1F18,
+	0x1F1E,
+	0x1F20,
+	0x1F46,
+	0x1F48,
+	0x1F4E,
+	0x1F50,
+	0x1F58,
+	0x1F59,
+	0x1F5A,
+	0x1F5B,
+	0x1F5C,
+	0x1F5D,
+	0x1F5E,
+	0x1F5F,
+	0x1F7E,
+	0x1F80,
+	0x1FB5,
+	0x1FB6,
+	0x1FBD,
+	0x1FBE,
+	0x1FBF,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FCD,
+	0x1FD0,
+	0x1FD4,
+	0x1FD6,
+	0x1FDC,
+	0x1FE0,
+	0x1FED,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FFD,
+	0x2126,
+	0x2127,
+	0x212A,
+	0x212C,
+	0x2132,
+	0x2133,
+	0x214E,
+	0x214F,
+	0x2160,
+	0x2180,
+	0x2183,
+	0x2185,
+	0x24B6,
+	0x24EA,
+	0x2C00,
+	0x2C2F,
+	0x2C30,
+	0x2C5F,
+	0x2C60,
+	0x2C71,
+	0x2C72,
+	0x2C74,
+	0x2C75,
+	0x2C77,
+	0x2C7E,
+	0x2CE4,
+	0x2CEB,
+	0x2CEF,
+	0x2CF2,
+	0x2CF4,
+	0x2D00,
+	0x2D26,
+	0x2D27,
+	0x2D28,
+	0x2D2D,
+	0x2D2E,
+	0xA640,
+	0xA66E,
+	0xA680,
+	0xA69C,
+	0xA722,
+	0xA730,
+	0xA732,
+	0xA770,
+	0xA779,
+	0xA788,
+	0xA78B,
+	0xA78E,
+	0xA790,
+	0xA794,
+	0xA796,
+	0xA7AE,
+	0xA7B0,
+	0xA7B8,
+	0xAB53,
+	0xAB54,
+	0xAB70,
+	0xABC0,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18,
+	0xFF21,
+	0xFF3B,
+	0xFF41,
+	0xFF5B,
+	0x10400,
+	0x10450,
+	0x10C80,
+	0x10CB3,
+	0x10CC0,
+	0x10CF3,
+	0x118A0,
+	0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for ASCII/Latin1 */
+	59,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0xDF,
+	0xE0,
+	0x130,
+	0x131,
+	0x149,
+	0x14A,
+	0x1F0,
+	0x1F1,
+	0x390,
+	0x391,
+	0x3B0,
+	0x3B1,
+	0x587,
+	0x588,
+	0x1E96,
+	0x1E9B,
+	0x1E9E,
+	0x1E9F,
+	0x1F50,
+	0x1F51,
+	0x1F52,
+	0x1F53,
+	0x1F54,
+	0x1F55,
+	0x1F56,
+	0x1F57,
+	0x1F80,
+	0x1FB0,
+	0x1FB2,
+	0x1FB5,
+	0x1FB6,
+	0x1FB8,
+	0x1FBC,
+	0x1FBD,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FC8,
+	0x1FCC,
+	0x1FCD,
+	0x1FD2,
+	0x1FD4,
+	0x1FD6,
+	0x1FD8,
+	0x1FE2,
+	0x1FE5,
+	0x1FE6,
+	0x1FE8,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FF8,
+	0x1FFC,
+	0x1FFD,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for ASCII/Latin1 */
 	1502,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -1844,7 +2224,7 @@ typedef enum {
 	GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for ASCII/Latin1 */
 	GCB_Control,
 	GCB_LF,
 	GCB_Control,
@@ -3351,382 +3731,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for ASCII/Latin1 */
-	45,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x2BC,
-	0x2BD,
-	0x308,
-	0x309,
-	0x313,
-	0x314,
-	0x342,
-	0x343,
-	0x3AC,
-	0x3AD,
-	0x3AE,
-	0x3AF,
-	0x3B1,
-	0x3B2,
-	0x3B7,
-	0x3B8,
-	0x3B9,
-	0x3BA,
-	0x3C1,
-	0x3C2,
-	0x3C5,
-	0x3C6,
-	0x3C9,
-	0x3CA,
-	0x3CE,
-	0x3CF,
-	0x565,
-	0x566,
-	0x574,
-	0x575,
-	0x57E,
-	0x57F,
-	0x1F00,
-	0x1F08,
-	0x1F20,
-	0x1F28,
-	0x1F60,
-	0x1F68,
-	0x1F70,
-	0x1F71,
-	0x1F74,
-	0x1F75,
-	0x1F7C,
-	0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */
-	247,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x41,
-	0x5B,
-	0x61,
-	0x7B,
-	0xB5,
-	0xB6,
-	0xC0,
-	0xD7,
-	0xD8,
-	0xF7,
-	0xF8,
-	0x131,
-	0x132,
-	0x138,
-	0x139,
-	0x18D,
-	0x18E,
-	0x19B,
-	0x19C,
-	0x1AA,
-	0x1AC,
-	0x1BA,
-	0x1BC,
-	0x1BE,
-	0x1BF,
-	0x1C0,
-	0x1C4,
-	0x221,
-	0x222,
-	0x234,
-	0x23A,
-	0x255,
-	0x256,
-	0x258,
-	0x259,
-	0x25A,
-	0x25B,
-	0x25D,
-	0x260,
-	0x262,
-	0x263,
-	0x264,
-	0x265,
-	0x267,
-	0x268,
-	0x26A,
-	0x26B,
-	0x26D,
-	0x26F,
-	0x270,
-	0x271,
-	0x273,
-	0x275,
-	0x276,
-	0x27D,
-	0x27E,
-	0x280,
-	0x281,
-	0x283,
-	0x284,
-	0x287,
-	0x28D,
-	0x292,
-	0x293,
-	0x29D,
-	0x29F,
-	0x2BC,
-	0x2BD,
-	0x2BE,
-	0x2BF,
-	0x300,
-	0x302,
-	0x307,
-	0x309,
-	0x30A,
-	0x30B,
-	0x30C,
-	0x30D,
-	0x313,
-	0x314,
-	0x331,
-	0x332,
-	0x342,
-	0x343,
-	0x345,
-	0x346,
-	0x370,
-	0x374,
-	0x376,
-	0x378,
-	0x37B,
-	0x37E,
-	0x37F,
-	0x380,
-	0x386,
-	0x387,
-	0x388,
-	0x38B,
-	0x38C,
-	0x38D,
-	0x38E,
-	0x3A2,
-	0x3A3,
-	0x3D2,
-	0x3D5,
-	0x3F6,
-	0x3F7,
-	0x3FC,
-	0x3FD,
-	0x482,
-	0x48A,
-	0x530,
-	0x531,
-	0x557,
-	0x561,
-	0x588,
-	0x10A0,
-	0x10C6,
-	0x10C7,
-	0x10C8,
-	0x10CD,
-	0x10CE,
-	0x13A0,
-	0x13F6,
-	0x13F8,
-	0x13FE,
-	0x1D79,
-	0x1D7A,
-	0x1D7D,
-	0x1D7E,
-	0x1E00,
-	0x1E9C,
-	0x1E9E,
-	0x1E9F,
-	0x1EA0,
-	0x1F16,
-	0x1F18,
-	0x1F1E,
-	0x1F20,
-	0x1F46,
-	0x1F48,
-	0x1F4E,
-	0x1F50,
-	0x1F58,
-	0x1F59,
-	0x1F5A,
-	0x1F5B,
-	0x1F5C,
-	0x1F5D,
-	0x1F5E,
-	0x1F5F,
-	0x1F7E,
-	0x1F80,
-	0x1FB5,
-	0x1FB6,
-	0x1FBD,
-	0x1FBE,
-	0x1FBF,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FCD,
-	0x1FD0,
-	0x1FD4,
-	0x1FD6,
-	0x1FDC,
-	0x1FE0,
-	0x1FED,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FFD,
-	0x2126,
-	0x2127,
-	0x212A,
-	0x212C,
-	0x2132,
-	0x2133,
-	0x214E,
-	0x214F,
-	0x2160,
-	0x2180,
-	0x2183,
-	0x2185,
-	0x24B6,
-	0x24EA,
-	0x2C00,
-	0x2C2F,
-	0x2C30,
-	0x2C5F,
-	0x2C60,
-	0x2C71,
-	0x2C72,
-	0x2C74,
-	0x2C75,
-	0x2C77,
-	0x2C7E,
-	0x2CE4,
-	0x2CEB,
-	0x2CEF,
-	0x2CF2,
-	0x2CF4,
-	0x2D00,
-	0x2D26,
-	0x2D27,
-	0x2D28,
-	0x2D2D,
-	0x2D2E,
-	0xA640,
-	0xA66E,
-	0xA680,
-	0xA69C,
-	0xA722,
-	0xA730,
-	0xA732,
-	0xA770,
-	0xA779,
-	0xA788,
-	0xA78B,
-	0xA78E,
-	0xA790,
-	0xA794,
-	0xA796,
-	0xA7AE,
-	0xA7B0,
-	0xA7B8,
-	0xAB53,
-	0xAB54,
-	0xAB70,
-	0xABC0,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18,
-	0xFF21,
-	0xFF3B,
-	0xFF41,
-	0xFF5B,
-	0x10400,
-	0x10450,
-	0x10C80,
-	0x10CB3,
-	0x10CC0,
-	0x10CF3,
-	0x118A0,
-	0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for ASCII/Latin1 */
-	59,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0xDF,
-	0xE0,
-	0x130,
-	0x131,
-	0x149,
-	0x14A,
-	0x1F0,
-	0x1F1,
-	0x390,
-	0x391,
-	0x3B0,
-	0x3B1,
-	0x587,
-	0x588,
-	0x1E96,
-	0x1E9B,
-	0x1E9E,
-	0x1E9F,
-	0x1F50,
-	0x1F51,
-	0x1F52,
-	0x1F53,
-	0x1F54,
-	0x1F55,
-	0x1F56,
-	0x1F57,
-	0x1F80,
-	0x1FB0,
-	0x1FB2,
-	0x1FB5,
-	0x1FB6,
-	0x1FB8,
-	0x1FBC,
-	0x1FBD,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FC8,
-	0x1FCC,
-	0x1FCD,
-	0x1FD2,
-	0x1FD4,
-	0x1FD6,
-	0x1FD8,
-	0x1FE2,
-	0x1FE5,
-	0x1FE6,
-	0x1FE8,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FF8,
-	0x1FFC,
-	0x1FFD,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */
@@ -6193,7 +6197,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */
+static const UV _Perl_SB_invlist[] = { /* for ASCII/Latin1 */
 	2896,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -9121,7 +9125,7 @@ typedef enum {
 	SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */
+static const SB_enum _Perl_SB_invmap[] = { /* for ASCII/Latin1 */
 	SB_Other,
 	SB_Sp,
 	SB_LF,
@@ -12022,37 +12026,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for ASCII/Latin1 */
-	3,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x80,
-	0x100
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for ASCII/Latin1 */
-	7,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0xA,
-	0xE,
-	0x85,
-	0x86,
-	0x2028,
-	0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */
+static const UV _Perl_WB_invlist[] = { /* for ASCII/Latin1 */
 	1524,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -13611,7 +13587,7 @@ typedef enum {
 	WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */
+static const WB_enum _Perl_WB_invmap[] = { /* for ASCII/Latin1 */
 	WB_Other,
 	WB_LF,
 	WB_Newline,
@@ -15140,8 +15116,36 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for ASCII/Latin1 */
+	3,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x80,
+	0x100
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for ASCII/Latin1 */
+	7,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0xA,
+	0xE,
+	0x85,
+	0x86,
+	0x2028,
+	0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for ASCII/Latin1 */
 	21,	/* Number of elements */
 	148565664, /* Version and data structure type */
@@ -24947,7 +24951,419 @@ static const UV Cased_invlist[] = { /* for EBCDIC 1047 */
 	0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC 1047 */
+#endif	/* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 1047 */
+	45,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x2BC,
+	0x2BD,
+	0x308,
+	0x309,
+	0x313,
+	0x314,
+	0x342,
+	0x343,
+	0x3AC,
+	0x3AD,
+	0x3AE,
+	0x3AF,
+	0x3B1,
+	0x3B2,
+	0x3B7,
+	0x3B8,
+	0x3B9,
+	0x3BA,
+	0x3C1,
+	0x3C2,
+	0x3C5,
+	0x3C6,
+	0x3C9,
+	0x3CA,
+	0x3CE,
+	0x3CF,
+	0x565,
+	0x566,
+	0x574,
+	0x575,
+	0x57E,
+	0x57F,
+	0x1F00,
+	0x1F08,
+	0x1F20,
+	0x1F28,
+	0x1F60,
+	0x1F68,
+	0x1F70,
+	0x1F71,
+	0x1F74,
+	0x1F75,
+	0x1F7C,
+	0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 1047 */
+	279,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x42,
+	0x4A,
+	0x51,
+	0x5A,
+	0x62,
+	0x6A,
+	0x70,
+	0x79,
+	0x80,
+	0x8A,
+	0x8C,
+	0x8F,
+	0x91,
+	0x9A,
+	0x9C,
+	0x9D,
+	0x9E,
+	0x9F,
+	0xA0,
+	0xA1,
+	0xA2,
+	0xAA,
+	0xAC,
+	0xAD,
+	0xAE,
+	0xAF,
+	0xBA,
+	0xBB,
+	0xC1,
+	0xCA,
+	0xCB,
+	0xD0,
+	0xD1,
+	0xDA,
+	0xDB,
+	0xE0,
+	0xE2,
+	0xEA,
+	0xEB,
+	0xF0,
+	0xFB,
+	0xFF,
+	0x100,
+	0x131,
+	0x132,
+	0x138,
+	0x139,
+	0x18D,
+	0x18E,
+	0x19B,
+	0x19C,
+	0x1AA,
+	0x1AC,
+	0x1BA,
+	0x1BC,
+	0x1BE,
+	0x1BF,
+	0x1C0,
+	0x1C4,
+	0x221,
+	0x222,
+	0x234,
+	0x23A,
+	0x255,
+	0x256,
+	0x258,
+	0x259,
+	0x25A,
+	0x25B,
+	0x25D,
+	0x260,
+	0x262,
+	0x263,
+	0x264,
+	0x265,
+	0x267,
+	0x268,
+	0x26A,
+	0x26B,
+	0x26D,
+	0x26F,
+	0x270,
+	0x271,
+	0x273,
+	0x275,
+	0x276,
+	0x27D,
+	0x27E,
+	0x280,
+	0x281,
+	0x283,
+	0x284,
+	0x287,
+	0x28D,
+	0x292,
+	0x293,
+	0x29D,
+	0x29F,
+	0x2BC,
+	0x2BD,
+	0x2BE,
+	0x2BF,
+	0x300,
+	0x302,
+	0x307,
+	0x309,
+	0x30A,
+	0x30B,
+	0x30C,
+	0x30D,
+	0x313,
+	0x314,
+	0x331,
+	0x332,
+	0x342,
+	0x343,
+	0x345,
+	0x346,
+	0x370,
+	0x374,
+	0x376,
+	0x378,
+	0x37B,
+	0x37E,
+	0x37F,
+	0x380,
+	0x386,
+	0x387,
+	0x388,
+	0x38B,
+	0x38C,
+	0x38D,
+	0x38E,
+	0x3A2,
+	0x3A3,
+	0x3D2,
+	0x3D5,
+	0x3F6,
+	0x3F7,
+	0x3FC,
+	0x3FD,
+	0x482,
+	0x48A,
+	0x530,
+	0x531,
+	0x557,
+	0x561,
+	0x588,
+	0x10A0,
+	0x10C6,
+	0x10C7,
+	0x10C8,
+	0x10CD,
+	0x10CE,
+	0x13A0,
+	0x13F6,
+	0x13F8,
+	0x13FE,
+	0x1D79,
+	0x1D7A,
+	0x1D7D,
+	0x1D7E,
+	0x1E00,
+	0x1E9C,
+	0x1E9E,
+	0x1E9F,
+	0x1EA0,
+	0x1F16,
+	0x1F18,
+	0x1F1E,
+	0x1F20,
+	0x1F46,
+	0x1F48,
+	0x1F4E,
+	0x1F50,
+	0x1F58,
+	0x1F59,
+	0x1F5A,
+	0x1F5B,
+	0x1F5C,
+	0x1F5D,
+	0x1F5E,
+	0x1F5F,
+	0x1F7E,
+	0x1F80,
+	0x1FB5,
+	0x1FB6,
+	0x1FBD,
+	0x1FBE,
+	0x1FBF,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FCD,
+	0x1FD0,
+	0x1FD4,
+	0x1FD6,
+	0x1FDC,
+	0x1FE0,
+	0x1FED,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FFD,
+	0x2126,
+	0x2127,
+	0x212A,
+	0x212C,
+	0x2132,
+	0x2133,
+	0x214E,
+	0x214F,
+	0x2160,
+	0x2180,
+	0x2183,
+	0x2185,
+	0x24B6,
+	0x24EA,
+	0x2C00,
+	0x2C2F,
+	0x2C30,
+	0x2C5F,
+	0x2C60,
+	0x2C71,
+	0x2C72,
+	0x2C74,
+	0x2C75,
+	0x2C77,
+	0x2C7E,
+	0x2CE4,
+	0x2CEB,
+	0x2CEF,
+	0x2CF2,
+	0x2CF4,
+	0x2D00,
+	0x2D26,
+	0x2D27,
+	0x2D28,
+	0x2D2D,
+	0x2D2E,
+	0xA640,
+	0xA66E,
+	0xA680,
+	0xA69C,
+	0xA722,
+	0xA730,
+	0xA732,
+	0xA770,
+	0xA779,
+	0xA788,
+	0xA78B,
+	0xA78E,
+	0xA790,
+	0xA794,
+	0xA796,
+	0xA7AE,
+	0xA7B0,
+	0xA7B8,
+	0xAB53,
+	0xAB54,
+	0xAB70,
+	0xABC0,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18,
+	0xFF21,
+	0xFF3B,
+	0xFF41,
+	0xFF5B,
+	0x10400,
+	0x10450,
+	0x10C80,
+	0x10CB3,
+	0x10CC0,
+	0x10CF3,
+	0x118A0,
+	0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 1047 */
+	59,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x59,
+	0x5A,
+	0x130,
+	0x131,
+	0x149,
+	0x14A,
+	0x1F0,
+	0x1F1,
+	0x390,
+	0x391,
+	0x3B0,
+	0x3B1,
+	0x587,
+	0x588,
+	0x1E96,
+	0x1E9B,
+	0x1E9E,
+	0x1E9F,
+	0x1F50,
+	0x1F51,
+	0x1F52,
+	0x1F53,
+	0x1F54,
+	0x1F55,
+	0x1F56,
+	0x1F57,
+	0x1F80,
+	0x1FB0,
+	0x1FB2,
+	0x1FB5,
+	0x1FB6,
+	0x1FB8,
+	0x1FBC,
+	0x1FBD,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FC8,
+	0x1FCC,
+	0x1FCD,
+	0x1FD2,
+	0x1FD4,
+	0x1FD6,
+	0x1FD8,
+	0x1FE2,
+	0x1FE5,
+	0x1FE6,
+	0x1FE8,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FF8,
+	0x1FFC,
+	0x1FFD,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC 1047 */
 	1502,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -26479,7 +26895,7 @@ typedef enum {
 	GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 1047 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 1047 */
 	GCB_Control,
 	GCB_CR,
 	GCB_Control,
@@ -27986,414 +28402,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 1047 */
-	45,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x2BC,
-	0x2BD,
-	0x308,
-	0x309,
-	0x313,
-	0x314,
-	0x342,
-	0x343,
-	0x3AC,
-	0x3AD,
-	0x3AE,
-	0x3AF,
-	0x3B1,
-	0x3B2,
-	0x3B7,
-	0x3B8,
-	0x3B9,
-	0x3BA,
-	0x3C1,
-	0x3C2,
-	0x3C5,
-	0x3C6,
-	0x3C9,
-	0x3CA,
-	0x3CE,
-	0x3CF,
-	0x565,
-	0x566,
-	0x574,
-	0x575,
-	0x57E,
-	0x57F,
-	0x1F00,
-	0x1F08,
-	0x1F20,
-	0x1F28,
-	0x1F60,
-	0x1F68,
-	0x1F70,
-	0x1F71,
-	0x1F74,
-	0x1F75,
-	0x1F7C,
-	0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 1047 */
-	279,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x42,
-	0x4A,
-	0x51,
-	0x5A,
-	0x62,
-	0x6A,
-	0x70,
-	0x79,
-	0x80,
-	0x8A,
-	0x8C,
-	0x8F,
-	0x91,
-	0x9A,
-	0x9C,
-	0x9D,
-	0x9E,
-	0x9F,
-	0xA0,
-	0xA1,
-	0xA2,
-	0xAA,
-	0xAC,
-	0xAD,
-	0xAE,
-	0xAF,
-	0xBA,
-	0xBB,
-	0xC1,
-	0xCA,
-	0xCB,
-	0xD0,
-	0xD1,
-	0xDA,
-	0xDB,
-	0xE0,
-	0xE2,
-	0xEA,
-	0xEB,
-	0xF0,
-	0xFB,
-	0xFF,
-	0x100,
-	0x131,
-	0x132,
-	0x138,
-	0x139,
-	0x18D,
-	0x18E,
-	0x19B,
-	0x19C,
-	0x1AA,
-	0x1AC,
-	0x1BA,
-	0x1BC,
-	0x1BE,
-	0x1BF,
-	0x1C0,
-	0x1C4,
-	0x221,
-	0x222,
-	0x234,
-	0x23A,
-	0x255,
-	0x256,
-	0x258,
-	0x259,
-	0x25A,
-	0x25B,
-	0x25D,
-	0x260,
-	0x262,
-	0x263,
-	0x264,
-	0x265,
-	0x267,
-	0x268,
-	0x26A,
-	0x26B,
-	0x26D,
-	0x26F,
-	0x270,
-	0x271,
-	0x273,
-	0x275,
-	0x276,
-	0x27D,
-	0x27E,
-	0x280,
-	0x281,
-	0x283,
-	0x284,
-	0x287,
-	0x28D,
-	0x292,
-	0x293,
-	0x29D,
-	0x29F,
-	0x2BC,
-	0x2BD,
-	0x2BE,
-	0x2BF,
-	0x300,
-	0x302,
-	0x307,
-	0x309,
-	0x30A,
-	0x30B,
-	0x30C,
-	0x30D,
-	0x313,
-	0x314,
-	0x331,
-	0x332,
-	0x342,
-	0x343,
-	0x345,
-	0x346,
-	0x370,
-	0x374,
-	0x376,
-	0x378,
-	0x37B,
-	0x37E,
-	0x37F,
-	0x380,
-	0x386,
-	0x387,
-	0x388,
-	0x38B,
-	0x38C,
-	0x38D,
-	0x38E,
-	0x3A2,
-	0x3A3,
-	0x3D2,
-	0x3D5,
-	0x3F6,
-	0x3F7,
-	0x3FC,
-	0x3FD,
-	0x482,
-	0x48A,
-	0x530,
-	0x531,
-	0x557,
-	0x561,
-	0x588,
-	0x10A0,
-	0x10C6,
-	0x10C7,
-	0x10C8,
-	0x10CD,
-	0x10CE,
-	0x13A0,
-	0x13F6,
-	0x13F8,
-	0x13FE,
-	0x1D79,
-	0x1D7A,
-	0x1D7D,
-	0x1D7E,
-	0x1E00,
-	0x1E9C,
-	0x1E9E,
-	0x1E9F,
-	0x1EA0,
-	0x1F16,
-	0x1F18,
-	0x1F1E,
-	0x1F20,
-	0x1F46,
-	0x1F48,
-	0x1F4E,
-	0x1F50,
-	0x1F58,
-	0x1F59,
-	0x1F5A,
-	0x1F5B,
-	0x1F5C,
-	0x1F5D,
-	0x1F5E,
-	0x1F5F,
-	0x1F7E,
-	0x1F80,
-	0x1FB5,
-	0x1FB6,
-	0x1FBD,
-	0x1FBE,
-	0x1FBF,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FCD,
-	0x1FD0,
-	0x1FD4,
-	0x1FD6,
-	0x1FDC,
-	0x1FE0,
-	0x1FED,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FFD,
-	0x2126,
-	0x2127,
-	0x212A,
-	0x212C,
-	0x2132,
-	0x2133,
-	0x214E,
-	0x214F,
-	0x2160,
-	0x2180,
-	0x2183,
-	0x2185,
-	0x24B6,
-	0x24EA,
-	0x2C00,
-	0x2C2F,
-	0x2C30,
-	0x2C5F,
-	0x2C60,
-	0x2C71,
-	0x2C72,
-	0x2C74,
-	0x2C75,
-	0x2C77,
-	0x2C7E,
-	0x2CE4,
-	0x2CEB,
-	0x2CEF,
-	0x2CF2,
-	0x2CF4,
-	0x2D00,
-	0x2D26,
-	0x2D27,
-	0x2D28,
-	0x2D2D,
-	0x2D2E,
-	0xA640,
-	0xA66E,
-	0xA680,
-	0xA69C,
-	0xA722,
-	0xA730,
-	0xA732,
-	0xA770,
-	0xA779,
-	0xA788,
-	0xA78B,
-	0xA78E,
-	0xA790,
-	0xA794,
-	0xA796,
-	0xA7AE,
-	0xA7B0,
-	0xA7B8,
-	0xAB53,
-	0xAB54,
-	0xAB70,
-	0xABC0,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18,
-	0xFF21,
-	0xFF3B,
-	0xFF41,
-	0xFF5B,
-	0x10400,
-	0x10450,
-	0x10C80,
-	0x10CB3,
-	0x10CC0,
-	0x10CF3,
-	0x118A0,
-	0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 1047 */
-	59,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x59,
-	0x5A,
-	0x130,
-	0x131,
-	0x149,
-	0x14A,
-	0x1F0,
-	0x1F1,
-	0x390,
-	0x391,
-	0x3B0,
-	0x3B1,
-	0x587,
-	0x588,
-	0x1E96,
-	0x1E9B,
-	0x1E9E,
-	0x1E9F,
-	0x1F50,
-	0x1F51,
-	0x1F52,
-	0x1F53,
-	0x1F54,
-	0x1F55,
-	0x1F56,
-	0x1F57,
-	0x1F80,
-	0x1FB0,
-	0x1FB2,
-	0x1FB5,
-	0x1FB6,
-	0x1FB8,
-	0x1FBC,
-	0x1FBD,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FC8,
-	0x1FCC,
-	0x1FCD,
-	0x1FD2,
-	0x1FD4,
-	0x1FD6,
-	0x1FD8,
-	0x1FE2,
-	0x1FE5,
-	0x1FE6,
-	0x1FE8,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FF8,
-	0x1FFC,
-	0x1FFD,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC 1047 */
@@ -30910,7 +30918,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC 1047 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC 1047 */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC 1047 */
 	2920,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -33862,7 +33870,7 @@ typedef enum {
 	SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 1047 */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 1047 */
 	SB_Other,
 	SB_Sp,
 	SB_Other,
@@ -36787,91 +36795,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC 1047 */
-	55,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x4,
-	0x5,
-	0x6,
-	0x7,
-	0x8,
-	0xB,
-	0x14,
-	0x15,
-	0x17,
-	0x18,
-	0x1A,
-	0x1C,
-	0x20,
-	0x26,
-	0x28,
-	0x2D,
-	0x30,
-	0x32,
-	0x33,
-	0x37,
-	0x38,
-	0x3C,
-	0x3E,
-	0x3F,
-	0x41,
-	0x4B,
-	0x51,
-	0x5A,
-	0x62,
-	0x6B,
-	0x70,
-	0x79,
-	0x80,
-	0x81,
-	0x8A,
-	0x91,
-	0x9A,
-	0xA1,
-	0xAA,
-	0xAD,
-	0xAE,
-	0xBD,
-	0xBE,
-	0xC0,
-	0xCA,
-	0xD0,
-	0xDA,
-	0xE0,
-	0xE1,
-	0xE2,
-	0xEA,
-	0xF0,
-	0xFA,
-	0x100
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC 1047 */
-	9,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0xB,
-	0xE,
-	0x15,
-	0x16,
-	0x25,
-	0x26,
-	0x2028,
-	0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC 1047 */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC 1047 */
 	1549,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -38455,7 +38381,7 @@ typedef enum {
 	WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 1047 */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 1047 */
 	WB_Other,
 	WB_Newline,
 	WB_CR,
@@ -40009,8 +39935,90 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC 1047 */
+	55,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x4,
+	0x5,
+	0x6,
+	0x7,
+	0x8,
+	0xB,
+	0x14,
+	0x15,
+	0x17,
+	0x18,
+	0x1A,
+	0x1C,
+	0x20,
+	0x26,
+	0x28,
+	0x2D,
+	0x30,
+	0x32,
+	0x33,
+	0x37,
+	0x38,
+	0x3C,
+	0x3E,
+	0x3F,
+	0x41,
+	0x4B,
+	0x51,
+	0x5A,
+	0x62,
+	0x6B,
+	0x70,
+	0x79,
+	0x80,
+	0x81,
+	0x8A,
+	0x91,
+	0x9A,
+	0xA1,
+	0xAA,
+	0xAD,
+	0xAE,
+	0xBD,
+	0xBE,
+	0xC0,
+	0xCA,
+	0xD0,
+	0xDA,
+	0xE0,
+	0xE1,
+	0xE2,
+	0xEA,
+	0xF0,
+	0xFA,
+	0x100
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC 1047 */
+	9,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0xB,
+	0xE,
+	0x15,
+	0x16,
+	0x25,
+	0x26,
+	0x2028,
+	0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC 1047 */
 	23,	/* Number of elements */
 	148565664, /* Version and data structure type */
@@ -49924,7 +49932,415 @@ static const UV Cased_invlist[] = { /* for EBCDIC 037 */
 	0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC 037 */
+#endif	/* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 037 */
+	45,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x2BC,
+	0x2BD,
+	0x308,
+	0x309,
+	0x313,
+	0x314,
+	0x342,
+	0x343,
+	0x3AC,
+	0x3AD,
+	0x3AE,
+	0x3AF,
+	0x3B1,
+	0x3B2,
+	0x3B7,
+	0x3B8,
+	0x3B9,
+	0x3BA,
+	0x3C1,
+	0x3C2,
+	0x3C5,
+	0x3C6,
+	0x3C9,
+	0x3CA,
+	0x3CE,
+	0x3CF,
+	0x565,
+	0x566,
+	0x574,
+	0x575,
+	0x57E,
+	0x57F,
+	0x1F00,
+	0x1F08,
+	0x1F20,
+	0x1F28,
+	0x1F60,
+	0x1F68,
+	0x1F70,
+	0x1F71,
+	0x1F74,
+	0x1F75,
+	0x1F7C,
+	0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 037 */
+	275,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x42,
+	0x4A,
+	0x51,
+	0x5A,
+	0x62,
+	0x6A,
+	0x70,
+	0x79,
+	0x80,
+	0x8A,
+	0x8C,
+	0x8F,
+	0x91,
+	0x9A,
+	0x9C,
+	0x9D,
+	0x9E,
+	0x9F,
+	0xA0,
+	0xA1,
+	0xA2,
+	0xAA,
+	0xAC,
+	0xAF,
+	0xC1,
+	0xCA,
+	0xCB,
+	0xD0,
+	0xD1,
+	0xDA,
+	0xDB,
+	0xE0,
+	0xE2,
+	0xEA,
+	0xEB,
+	0xF0,
+	0xFB,
+	0xFF,
+	0x100,
+	0x131,
+	0x132,
+	0x138,
+	0x139,
+	0x18D,
+	0x18E,
+	0x19B,
+	0x19C,
+	0x1AA,
+	0x1AC,
+	0x1BA,
+	0x1BC,
+	0x1BE,
+	0x1BF,
+	0x1C0,
+	0x1C4,
+	0x221,
+	0x222,
+	0x234,
+	0x23A,
+	0x255,
+	0x256,
+	0x258,
+	0x259,
+	0x25A,
+	0x25B,
+	0x25D,
+	0x260,
+	0x262,
+	0x263,
+	0x264,
+	0x265,
+	0x267,
+	0x268,
+	0x26A,
+	0x26B,
+	0x26D,
+	0x26F,
+	0x270,
+	0x271,
+	0x273,
+	0x275,
+	0x276,
+	0x27D,
+	0x27E,
+	0x280,
+	0x281,
+	0x283,
+	0x284,
+	0x287,
+	0x28D,
+	0x292,
+	0x293,
+	0x29D,
+	0x29F,
+	0x2BC,
+	0x2BD,
+	0x2BE,
+	0x2BF,
+	0x300,
+	0x302,
+	0x307,
+	0x309,
+	0x30A,
+	0x30B,
+	0x30C,
+	0x30D,
+	0x313,
+	0x314,
+	0x331,
+	0x332,
+	0x342,
+	0x343,
+	0x345,
+	0x346,
+	0x370,
+	0x374,
+	0x376,
+	0x378,
+	0x37B,
+	0x37E,
+	0x37F,
+	0x380,
+	0x386,
+	0x387,
+	0x388,
+	0x38B,
+	0x38C,
+	0x38D,
+	0x38E,
+	0x3A2,
+	0x3A3,
+	0x3D2,
+	0x3D5,
+	0x3F6,
+	0x3F7,
+	0x3FC,
+	0x3FD,
+	0x482,
+	0x48A,
+	0x530,
+	0x531,
+	0x557,
+	0x561,
+	0x588,
+	0x10A0,
+	0x10C6,
+	0x10C7,
+	0x10C8,
+	0x10CD,
+	0x10CE,
+	0x13A0,
+	0x13F6,
+	0x13F8,
+	0x13FE,
+	0x1D79,
+	0x1D7A,
+	0x1D7D,
+	0x1D7E,
+	0x1E00,
+	0x1E9C,
+	0x1E9E,
+	0x1E9F,
+	0x1EA0,
+	0x1F16,
+	0x1F18,
+	0x1F1E,
+	0x1F20,
+	0x1F46,
+	0x1F48,
+	0x1F4E,
+	0x1F50,
+	0x1F58,
+	0x1F59,
+	0x1F5A,
+	0x1F5B,
+	0x1F5C,
+	0x1F5D,
+	0x1F5E,
+	0x1F5F,
+	0x1F7E,
+	0x1F80,
+	0x1FB5,
+	0x1FB6,
+	0x1FBD,
+	0x1FBE,
+	0x1FBF,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FCD,
+	0x1FD0,
+	0x1FD4,
+	0x1FD6,
+	0x1FDC,
+	0x1FE0,
+	0x1FED,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FFD,
+	0x2126,
+	0x2127,
+	0x212A,
+	0x212C,
+	0x2132,
+	0x2133,
+	0x214E,
+	0x214F,
+	0x2160,
+	0x2180,
+	0x2183,
+	0x2185,
+	0x24B6,
+	0x24EA,
+	0x2C00,
+	0x2C2F,
+	0x2C30,
+	0x2C5F,
+	0x2C60,
+	0x2C71,
+	0x2C72,
+	0x2C74,
+	0x2C75,
+	0x2C77,
+	0x2C7E,
+	0x2CE4,
+	0x2CEB,
+	0x2CEF,
+	0x2CF2,
+	0x2CF4,
+	0x2D00,
+	0x2D26,
+	0x2D27,
+	0x2D28,
+	0x2D2D,
+	0x2D2E,
+	0xA640,
+	0xA66E,
+	0xA680,
+	0xA69C,
+	0xA722,
+	0xA730,
+	0xA732,
+	0xA770,
+	0xA779,
+	0xA788,
+	0xA78B,
+	0xA78E,
+	0xA790,
+	0xA794,
+	0xA796,
+	0xA7AE,
+	0xA7B0,
+	0xA7B8,
+	0xAB53,
+	0xAB54,
+	0xAB70,
+	0xABC0,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18,
+	0xFF21,
+	0xFF3B,
+	0xFF41,
+	0xFF5B,
+	0x10400,
+	0x10450,
+	0x10C80,
+	0x10CB3,
+	0x10CC0,
+	0x10CF3,
+	0x118A0,
+	0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 037 */
+	59,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x59,
+	0x5A,
+	0x130,
+	0x131,
+	0x149,
+	0x14A,
+	0x1F0,
+	0x1F1,
+	0x390,
+	0x391,
+	0x3B0,
+	0x3B1,
+	0x587,
+	0x588,
+	0x1E96,
+	0x1E9B,
+	0x1E9E,
+	0x1E9F,
+	0x1F50,
+	0x1F51,
+	0x1F52,
+	0x1F53,
+	0x1F54,
+	0x1F55,
+	0x1F56,
+	0x1F57,
+	0x1F80,
+	0x1FB0,
+	0x1FB2,
+	0x1FB5,
+	0x1FB6,
+	0x1FB8,
+	0x1FBC,
+	0x1FBD,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FC8,
+	0x1FCC,
+	0x1FCD,
+	0x1FD2,
+	0x1FD4,
+	0x1FD6,
+	0x1FD8,
+	0x1FE2,
+	0x1FE5,
+	0x1FE6,
+	0x1FE8,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FF8,
+	0x1FFC,
+	0x1FFD,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC 037 */
 	1502,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -51456,7 +51872,7 @@ typedef enum {
 	GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 037 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */
 	GCB_Control,
 	GCB_CR,
 	GCB_Control,
@@ -52963,410 +53379,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 037 */
-	45,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x2BC,
-	0x2BD,
-	0x308,
-	0x309,
-	0x313,
-	0x314,
-	0x342,
-	0x343,
-	0x3AC,
-	0x3AD,
-	0x3AE,
-	0x3AF,
-	0x3B1,
-	0x3B2,
-	0x3B7,
-	0x3B8,
-	0x3B9,
-	0x3BA,
-	0x3C1,
-	0x3C2,
-	0x3C5,
-	0x3C6,
-	0x3C9,
-	0x3CA,
-	0x3CE,
-	0x3CF,
-	0x565,
-	0x566,
-	0x574,
-	0x575,
-	0x57E,
-	0x57F,
-	0x1F00,
-	0x1F08,
-	0x1F20,
-	0x1F28,
-	0x1F60,
-	0x1F68,
-	0x1F70,
-	0x1F71,
-	0x1F74,
-	0x1F75,
-	0x1F7C,
-	0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 037 */
-	275,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x42,
-	0x4A,
-	0x51,
-	0x5A,
-	0x62,
-	0x6A,
-	0x70,
-	0x79,
-	0x80,
-	0x8A,
-	0x8C,
-	0x8F,
-	0x91,
-	0x9A,
-	0x9C,
-	0x9D,
-	0x9E,
-	0x9F,
-	0xA0,
-	0xA1,
-	0xA2,
-	0xAA,
-	0xAC,
-	0xAF,
-	0xC1,
-	0xCA,
-	0xCB,
-	0xD0,
-	0xD1,
-	0xDA,
-	0xDB,
-	0xE0,
-	0xE2,
-	0xEA,
-	0xEB,
-	0xF0,
-	0xFB,
-	0xFF,
-	0x100,
-	0x131,
-	0x132,
-	0x138,
-	0x139,
-	0x18D,
-	0x18E,
-	0x19B,
-	0x19C,
-	0x1AA,
-	0x1AC,
-	0x1BA,
-	0x1BC,
-	0x1BE,
-	0x1BF,
-	0x1C0,
-	0x1C4,
-	0x221,
-	0x222,
-	0x234,
-	0x23A,
-	0x255,
-	0x256,
-	0x258,
-	0x259,
-	0x25A,
-	0x25B,
-	0x25D,
-	0x260,
-	0x262,
-	0x263,
-	0x264,
-	0x265,
-	0x267,
-	0x268,
-	0x26A,
-	0x26B,
-	0x26D,
-	0x26F,
-	0x270,
-	0x271,
-	0x273,
-	0x275,
-	0x276,
-	0x27D,
-	0x27E,
-	0x280,
-	0x281,
-	0x283,
-	0x284,
-	0x287,
-	0x28D,
-	0x292,
-	0x293,
-	0x29D,
-	0x29F,
-	0x2BC,
-	0x2BD,
-	0x2BE,
-	0x2BF,
-	0x300,
-	0x302,
-	0x307,
-	0x309,
-	0x30A,
-	0x30B,
-	0x30C,
-	0x30D,
-	0x313,
-	0x314,
-	0x331,
-	0x332,
-	0x342,
-	0x343,
-	0x345,
-	0x346,
-	0x370,
-	0x374,
-	0x376,
-	0x378,
-	0x37B,
-	0x37E,
-	0x37F,
-	0x380,
-	0x386,
-	0x387,
-	0x388,
-	0x38B,
-	0x38C,
-	0x38D,
-	0x38E,
-	0x3A2,
-	0x3A3,
-	0x3D2,
-	0x3D5,
-	0x3F6,
-	0x3F7,
-	0x3FC,
-	0x3FD,
-	0x482,
-	0x48A,
-	0x530,
-	0x531,
-	0x557,
-	0x561,
-	0x588,
-	0x10A0,
-	0x10C6,
-	0x10C7,
-	0x10C8,
-	0x10CD,
-	0x10CE,
-	0x13A0,
-	0x13F6,
-	0x13F8,
-	0x13FE,
-	0x1D79,
-	0x1D7A,
-	0x1D7D,
-	0x1D7E,
-	0x1E00,
-	0x1E9C,
-	0x1E9E,
-	0x1E9F,
-	0x1EA0,
-	0x1F16,
-	0x1F18,
-	0x1F1E,
-	0x1F20,
-	0x1F46,
-	0x1F48,
-	0x1F4E,
-	0x1F50,
-	0x1F58,
-	0x1F59,
-	0x1F5A,
-	0x1F5B,
-	0x1F5C,
-	0x1F5D,
-	0x1F5E,
-	0x1F5F,
-	0x1F7E,
-	0x1F80,
-	0x1FB5,
-	0x1FB6,
-	0x1FBD,
-	0x1FBE,
-	0x1FBF,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FCD,
-	0x1FD0,
-	0x1FD4,
-	0x1FD6,
-	0x1FDC,
-	0x1FE0,
-	0x1FED,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FFD,
-	0x2126,
-	0x2127,
-	0x212A,
-	0x212C,
-	0x2132,
-	0x2133,
-	0x214E,
-	0x214F,
-	0x2160,
-	0x2180,
-	0x2183,
-	0x2185,
-	0x24B6,
-	0x24EA,
-	0x2C00,
-	0x2C2F,
-	0x2C30,
-	0x2C5F,
-	0x2C60,
-	0x2C71,
-	0x2C72,
-	0x2C74,
-	0x2C75,
-	0x2C77,
-	0x2C7E,
-	0x2CE4,
-	0x2CEB,
-	0x2CEF,
-	0x2CF2,
-	0x2CF4,
-	0x2D00,
-	0x2D26,
-	0x2D27,
-	0x2D28,
-	0x2D2D,
-	0x2D2E,
-	0xA640,
-	0xA66E,
-	0xA680,
-	0xA69C,
-	0xA722,
-	0xA730,
-	0xA732,
-	0xA770,
-	0xA779,
-	0xA788,
-	0xA78B,
-	0xA78E,
-	0xA790,
-	0xA794,
-	0xA796,
-	0xA7AE,
-	0xA7B0,
-	0xA7B8,
-	0xAB53,
-	0xAB54,
-	0xAB70,
-	0xABC0,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18,
-	0xFF21,
-	0xFF3B,
-	0xFF41,
-	0xFF5B,
-	0x10400,
-	0x10450,
-	0x10C80,
-	0x10CB3,
-	0x10CC0,
-	0x10CF3,
-	0x118A0,
-	0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 037 */
-	59,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x59,
-	0x5A,
-	0x130,
-	0x131,
-	0x149,
-	0x14A,
-	0x1F0,
-	0x1F1,
-	0x390,
-	0x391,
-	0x3B0,
-	0x3B1,
-	0x587,
-	0x588,
-	0x1E96,
-	0x1E9B,
-	0x1E9E,
-	0x1E9F,
-	0x1F50,
-	0x1F51,
-	0x1F52,
-	0x1F53,
-	0x1F54,
-	0x1F55,
-	0x1F56,
-	0x1F57,
-	0x1F80,
-	0x1FB0,
-	0x1FB2,
-	0x1FB5,
-	0x1FB6,
-	0x1FB8,
-	0x1FBC,
-	0x1FBD,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FC8,
-	0x1FCC,
-	0x1FCD,
-	0x1FD2,
-	0x1FD4,
-	0x1FD6,
-	0x1FD8,
-	0x1FE2,
-	0x1FE5,
-	0x1FE6,
-	0x1FE8,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FF8,
-	0x1FFC,
-	0x1FFD,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC 037 */
@@ -55875,7 +55887,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC 037 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC 037 */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC 037 */
 	2916,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -58823,7 +58835,7 @@ typedef enum {
 	SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 037 */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 037 */
 	SB_Other,
 	SB_Sp,
 	SB_Other,
@@ -61744,93 +61756,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC 037 */
-	57,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x4,
-	0x5,
-	0x6,
-	0x7,
-	0x8,
-	0xB,
-	0x14,
-	0x16,
-	0x17,
-	0x18,
-	0x1A,
-	0x1C,
-	0x20,
-	0x25,
-	0x28,
-	0x2D,
-	0x30,
-	0x32,
-	0x33,
-	0x37,
-	0x38,
-	0x3C,
-	0x3E,
-	0x3F,
-	0x41,
-	0x4B,
-	0x51,
-	0x5A,
-	0x5F,
-	0x60,
-	0x62,
-	0x6B,
-	0x70,
-	0x79,
-	0x80,
-	0x81,
-	0x8A,
-	0x91,
-	0x9A,
-	0xA1,
-	0xAA,
-	0xB0,
-	0xB1,
-	0xBA,
-	0xBC,
-	0xC0,
-	0xCA,
-	0xD0,
-	0xDA,
-	0xE0,
-	0xE1,
-	0xE2,
-	0xEA,
-	0xF0,
-	0xFA,
-	0x100
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC 037 */
-	9,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0xB,
-	0xE,
-	0x15,
-	0x16,
-	0x25,
-	0x26,
-	0x2028,
-	0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC 037 */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC 037 */
 	1545,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -63410,7 +63338,7 @@ typedef enum {
 	WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 037 */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 037 */
 	WB_Other,
 	WB_Newline,
 	WB_CR,
@@ -64960,8 +64888,92 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC 037 */
+	57,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x4,
+	0x5,
+	0x6,
+	0x7,
+	0x8,
+	0xB,
+	0x14,
+	0x16,
+	0x17,
+	0x18,
+	0x1A,
+	0x1C,
+	0x20,
+	0x25,
+	0x28,
+	0x2D,
+	0x30,
+	0x32,
+	0x33,
+	0x37,
+	0x38,
+	0x3C,
+	0x3E,
+	0x3F,
+	0x41,
+	0x4B,
+	0x51,
+	0x5A,
+	0x5F,
+	0x60,
+	0x62,
+	0x6B,
+	0x70,
+	0x79,
+	0x80,
+	0x81,
+	0x8A,
+	0x91,
+	0x9A,
+	0xA1,
+	0xAA,
+	0xB0,
+	0xB1,
+	0xBA,
+	0xBC,
+	0xC0,
+	0xCA,
+	0xD0,
+	0xDA,
+	0xE0,
+	0xE1,
+	0xE2,
+	0xEA,
+	0xF0,
+	0xFA,
+	0x100
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC 037 */
+	9,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0xB,
+	0xE,
+	0x15,
+	0x16,
+	0x25,
+	0x26,
+	0x2028,
+	0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC 037 */
 	23,	/* Number of elements */
 	148565664, /* Version and data structure type */
@@ -74865,7 +74877,417 @@ static const UV Cased_invlist[] = { /* for EBCDIC POSIX-BC */
 	0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC POSIX-BC */
+#endif	/* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
+	45,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x2BC,
+	0x2BD,
+	0x308,
+	0x309,
+	0x313,
+	0x314,
+	0x342,
+	0x343,
+	0x3AC,
+	0x3AD,
+	0x3AE,
+	0x3AF,
+	0x3B1,
+	0x3B2,
+	0x3B7,
+	0x3B8,
+	0x3B9,
+	0x3BA,
+	0x3C1,
+	0x3C2,
+	0x3C5,
+	0x3C6,
+	0x3C9,
+	0x3CA,
+	0x3CE,
+	0x3CF,
+	0x565,
+	0x566,
+	0x574,
+	0x575,
+	0x57E,
+	0x57F,
+	0x1F00,
+	0x1F08,
+	0x1F20,
+	0x1F28,
+	0x1F60,
+	0x1F68,
+	0x1F70,
+	0x1F71,
+	0x1F74,
+	0x1F75,
+	0x1F7C,
+	0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
+	277,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x42,
+	0x4A,
+	0x51,
+	0x5A,
+	0x62,
+	0x6A,
+	0x70,
+	0x79,
+	0x80,
+	0x8A,
+	0x8C,
+	0x8F,
+	0x91,
+	0x9A,
+	0x9C,
+	0x9D,
+	0x9E,
+	0x9F,
+	0xA0,
+	0xA1,
+	0xA2,
+	0xAA,
+	0xAC,
+	0xAF,
+	0xC0,
+	0xCA,
+	0xCB,
+	0xD0,
+	0xD1,
+	0xDA,
+	0xDB,
+	0xE1,
+	0xE2,
+	0xEA,
+	0xEB,
+	0xF0,
+	0xFC,
+	0xFD,
+	0xFE,
+	0xFF,
+	0x100,
+	0x131,
+	0x132,
+	0x138,
+	0x139,
+	0x18D,
+	0x18E,
+	0x19B,
+	0x19C,
+	0x1AA,
+	0x1AC,
+	0x1BA,
+	0x1BC,
+	0x1BE,
+	0x1BF,
+	0x1C0,
+	0x1C4,
+	0x221,
+	0x222,
+	0x234,
+	0x23A,
+	0x255,
+	0x256,
+	0x258,
+	0x259,
+	0x25A,
+	0x25B,
+	0x25D,
+	0x260,
+	0x262,
+	0x263,
+	0x264,
+	0x265,
+	0x267,
+	0x268,
+	0x26A,
+	0x26B,
+	0x26D,
+	0x26F,
+	0x270,
+	0x271,
+	0x273,
+	0x275,
+	0x276,
+	0x27D,
+	0x27E,
+	0x280,
+	0x281,
+	0x283,
+	0x284,
+	0x287,
+	0x28D,
+	0x292,
+	0x293,
+	0x29D,
+	0x29F,
+	0x2BC,
+	0x2BD,
+	0x2BE,
+	0x2BF,
+	0x300,
+	0x302,
+	0x307,
+	0x309,
+	0x30A,
+	0x30B,
+	0x30C,
+	0x30D,
+	0x313,
+	0x314,
+	0x331,
+	0x332,
+	0x342,
+	0x343,
+	0x345,
+	0x346,
+	0x370,
+	0x374,
+	0x376,
+	0x378,
+	0x37B,
+	0x37E,
+	0x37F,
+	0x380,
+	0x386,
+	0x387,
+	0x388,
+	0x38B,
+	0x38C,
+	0x38D,
+	0x38E,
+	0x3A2,
+	0x3A3,
+	0x3D2,
+	0x3D5,
+	0x3F6,
+	0x3F7,
+	0x3FC,
+	0x3FD,
+	0x482,
+	0x48A,
+	0x530,
+	0x531,
+	0x557,
+	0x561,
+	0x588,
+	0x10A0,
+	0x10C6,
+	0x10C7,
+	0x10C8,
+	0x10CD,
+	0x10CE,
+	0x13A0,
+	0x13F6,
+	0x13F8,
+	0x13FE,
+	0x1D79,
+	0x1D7A,
+	0x1D7D,
+	0x1D7E,
+	0x1E00,
+	0x1E9C,
+	0x1E9E,
+	0x1E9F,
+	0x1EA0,
+	0x1F16,
+	0x1F18,
+	0x1F1E,
+	0x1F20,
+	0x1F46,
+	0x1F48,
+	0x1F4E,
+	0x1F50,
+	0x1F58,
+	0x1F59,
+	0x1F5A,
+	0x1F5B,
+	0x1F5C,
+	0x1F5D,
+	0x1F5E,
+	0x1F5F,
+	0x1F7E,
+	0x1F80,
+	0x1FB5,
+	0x1FB6,
+	0x1FBD,
+	0x1FBE,
+	0x1FBF,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FCD,
+	0x1FD0,
+	0x1FD4,
+	0x1FD6,
+	0x1FDC,
+	0x1FE0,
+	0x1FED,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FFD,
+	0x2126,
+	0x2127,
+	0x212A,
+	0x212C,
+	0x2132,
+	0x2133,
+	0x214E,
+	0x214F,
+	0x2160,
+	0x2180,
+	0x2183,
+	0x2185,
+	0x24B6,
+	0x24EA,
+	0x2C00,
+	0x2C2F,
+	0x2C30,
+	0x2C5F,
+	0x2C60,
+	0x2C71,
+	0x2C72,
+	0x2C74,
+	0x2C75,
+	0x2C77,
+	0x2C7E,
+	0x2CE4,
+	0x2CEB,
+	0x2CEF,
+	0x2CF2,
+	0x2CF4,
+	0x2D00,
+	0x2D26,
+	0x2D27,
+	0x2D28,
+	0x2D2D,
+	0x2D2E,
+	0xA640,
+	0xA66E,
+	0xA680,
+	0xA69C,
+	0xA722,
+	0xA730,
+	0xA732,
+	0xA770,
+	0xA779,
+	0xA788,
+	0xA78B,
+	0xA78E,
+	0xA790,
+	0xA794,
+	0xA796,
+	0xA7AE,
+	0xA7B0,
+	0xA7B8,
+	0xAB53,
+	0xAB54,
+	0xAB70,
+	0xABC0,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18,
+	0xFF21,
+	0xFF3B,
+	0xFF41,
+	0xFF5B,
+	0x10400,
+	0x10450,
+	0x10C80,
+	0x10CB3,
+	0x10CC0,
+	0x10CF3,
+	0x118A0,
+	0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC POSIX-BC */
+	59,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x59,
+	0x5A,
+	0x130,
+	0x131,
+	0x149,
+	0x14A,
+	0x1F0,
+	0x1F1,
+	0x390,
+	0x391,
+	0x3B0,
+	0x3B1,
+	0x587,
+	0x588,
+	0x1E96,
+	0x1E9B,
+	0x1E9E,
+	0x1E9F,
+	0x1F50,
+	0x1F51,
+	0x1F52,
+	0x1F53,
+	0x1F54,
+	0x1F55,
+	0x1F56,
+	0x1F57,
+	0x1F80,
+	0x1FB0,
+	0x1FB2,
+	0x1FB5,
+	0x1FB6,
+	0x1FB8,
+	0x1FBC,
+	0x1FBD,
+	0x1FC2,
+	0x1FC5,
+	0x1FC6,
+	0x1FC8,
+	0x1FCC,
+	0x1FCD,
+	0x1FD2,
+	0x1FD4,
+	0x1FD6,
+	0x1FD8,
+	0x1FE2,
+	0x1FE5,
+	0x1FE6,
+	0x1FE8,
+	0x1FF2,
+	0x1FF5,
+	0x1FF6,
+	0x1FF8,
+	0x1FFC,
+	0x1FFD,
+	0xFB00,
+	0xFB07,
+	0xFB13,
+	0xFB18
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC POSIX-BC */
 	1502,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -76397,7 +76819,7 @@ typedef enum {
 	GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC POSIX-BC */
 	GCB_Control,
 	GCB_CR,
 	GCB_Control,
@@ -77904,412 +78326,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC POSIX-BC
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
-	45,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x2BC,
-	0x2BD,
-	0x308,
-	0x309,
-	0x313,
-	0x314,
-	0x342,
-	0x343,
-	0x3AC,
-	0x3AD,
-	0x3AE,
-	0x3AF,
-	0x3B1,
-	0x3B2,
-	0x3B7,
-	0x3B8,
-	0x3B9,
-	0x3BA,
-	0x3C1,
-	0x3C2,
-	0x3C5,
-	0x3C6,
-	0x3C9,
-	0x3CA,
-	0x3CE,
-	0x3CF,
-	0x565,
-	0x566,
-	0x574,
-	0x575,
-	0x57E,
-	0x57F,
-	0x1F00,
-	0x1F08,
-	0x1F20,
-	0x1F28,
-	0x1F60,
-	0x1F68,
-	0x1F70,
-	0x1F71,
-	0x1F74,
-	0x1F75,
-	0x1F7C,
-	0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
-	277,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x42,
-	0x4A,
-	0x51,
-	0x5A,
-	0x62,
-	0x6A,
-	0x70,
-	0x79,
-	0x80,
-	0x8A,
-	0x8C,
-	0x8F,
-	0x91,
-	0x9A,
-	0x9C,
-	0x9D,
-	0x9E,
-	0x9F,
-	0xA0,
-	0xA1,
-	0xA2,
-	0xAA,
-	0xAC,
-	0xAF,
-	0xC0,
-	0xCA,
-	0xCB,
-	0xD0,
-	0xD1,
-	0xDA,
-	0xDB,
-	0xE1,
-	0xE2,
-	0xEA,
-	0xEB,
-	0xF0,
-	0xFC,
-	0xFD,
-	0xFE,
-	0xFF,
-	0x100,
-	0x131,
-	0x132,
-	0x138,
-	0x139,
-	0x18D,
-	0x18E,
-	0x19B,
-	0x19C,
-	0x1AA,
-	0x1AC,
-	0x1BA,
-	0x1BC,
-	0x1BE,
-	0x1BF,
-	0x1C0,
-	0x1C4,
-	0x221,
-	0x222,
-	0x234,
-	0x23A,
-	0x255,
-	0x256,
-	0x258,
-	0x259,
-	0x25A,
-	0x25B,
-	0x25D,
-	0x260,
-	0x262,
-	0x263,
-	0x264,
-	0x265,
-	0x267,
-	0x268,
-	0x26A,
-	0x26B,
-	0x26D,
-	0x26F,
-	0x270,
-	0x271,
-	0x273,
-	0x275,
-	0x276,
-	0x27D,
-	0x27E,
-	0x280,
-	0x281,
-	0x283,
-	0x284,
-	0x287,
-	0x28D,
-	0x292,
-	0x293,
-	0x29D,
-	0x29F,
-	0x2BC,
-	0x2BD,
-	0x2BE,
-	0x2BF,
-	0x300,
-	0x302,
-	0x307,
-	0x309,
-	0x30A,
-	0x30B,
-	0x30C,
-	0x30D,
-	0x313,
-	0x314,
-	0x331,
-	0x332,
-	0x342,
-	0x343,
-	0x345,
-	0x346,
-	0x370,
-	0x374,
-	0x376,
-	0x378,
-	0x37B,
-	0x37E,
-	0x37F,
-	0x380,
-	0x386,
-	0x387,
-	0x388,
-	0x38B,
-	0x38C,
-	0x38D,
-	0x38E,
-	0x3A2,
-	0x3A3,
-	0x3D2,
-	0x3D5,
-	0x3F6,
-	0x3F7,
-	0x3FC,
-	0x3FD,
-	0x482,
-	0x48A,
-	0x530,
-	0x531,
-	0x557,
-	0x561,
-	0x588,
-	0x10A0,
-	0x10C6,
-	0x10C7,
-	0x10C8,
-	0x10CD,
-	0x10CE,
-	0x13A0,
-	0x13F6,
-	0x13F8,
-	0x13FE,
-	0x1D79,
-	0x1D7A,
-	0x1D7D,
-	0x1D7E,
-	0x1E00,
-	0x1E9C,
-	0x1E9E,
-	0x1E9F,
-	0x1EA0,
-	0x1F16,
-	0x1F18,
-	0x1F1E,
-	0x1F20,
-	0x1F46,
-	0x1F48,
-	0x1F4E,
-	0x1F50,
-	0x1F58,
-	0x1F59,
-	0x1F5A,
-	0x1F5B,
-	0x1F5C,
-	0x1F5D,
-	0x1F5E,
-	0x1F5F,
-	0x1F7E,
-	0x1F80,
-	0x1FB5,
-	0x1FB6,
-	0x1FBD,
-	0x1FBE,
-	0x1FBF,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FCD,
-	0x1FD0,
-	0x1FD4,
-	0x1FD6,
-	0x1FDC,
-	0x1FE0,
-	0x1FED,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FFD,
-	0x2126,
-	0x2127,
-	0x212A,
-	0x212C,
-	0x2132,
-	0x2133,
-	0x214E,
-	0x214F,
-	0x2160,
-	0x2180,
-	0x2183,
-	0x2185,
-	0x24B6,
-	0x24EA,
-	0x2C00,
-	0x2C2F,
-	0x2C30,
-	0x2C5F,
-	0x2C60,
-	0x2C71,
-	0x2C72,
-	0x2C74,
-	0x2C75,
-	0x2C77,
-	0x2C7E,
-	0x2CE4,
-	0x2CEB,
-	0x2CEF,
-	0x2CF2,
-	0x2CF4,
-	0x2D00,
-	0x2D26,
-	0x2D27,
-	0x2D28,
-	0x2D2D,
-	0x2D2E,
-	0xA640,
-	0xA66E,
-	0xA680,
-	0xA69C,
-	0xA722,
-	0xA730,
-	0xA732,
-	0xA770,
-	0xA779,
-	0xA788,
-	0xA78B,
-	0xA78E,
-	0xA790,
-	0xA794,
-	0xA796,
-	0xA7AE,
-	0xA7B0,
-	0xA7B8,
-	0xAB53,
-	0xAB54,
-	0xAB70,
-	0xABC0,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18,
-	0xFF21,
-	0xFF3B,
-	0xFF41,
-	0xFF5B,
-	0x10400,
-	0x10450,
-	0x10C80,
-	0x10CB3,
-	0x10CC0,
-	0x10CF3,
-	0x118A0,
-	0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC POSIX-BC */
-	59,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x59,
-	0x5A,
-	0x130,
-	0x131,
-	0x149,
-	0x14A,
-	0x1F0,
-	0x1F1,
-	0x390,
-	0x391,
-	0x3B0,
-	0x3B1,
-	0x587,
-	0x588,
-	0x1E96,
-	0x1E9B,
-	0x1E9E,
-	0x1E9F,
-	0x1F50,
-	0x1F51,
-	0x1F52,
-	0x1F53,
-	0x1F54,
-	0x1F55,
-	0x1F56,
-	0x1F57,
-	0x1F80,
-	0x1FB0,
-	0x1FB2,
-	0x1FB5,
-	0x1FB6,
-	0x1FB8,
-	0x1FBC,
-	0x1FBD,
-	0x1FC2,
-	0x1FC5,
-	0x1FC6,
-	0x1FC8,
-	0x1FCC,
-	0x1FCD,
-	0x1FD2,
-	0x1FD4,
-	0x1FD6,
-	0x1FD8,
-	0x1FE2,
-	0x1FE5,
-	0x1FE6,
-	0x1FE8,
-	0x1FF2,
-	0x1FF5,
-	0x1FF6,
-	0x1FF8,
-	0x1FFC,
-	0x1FFD,
-	0xFB00,
-	0xFB07,
-	0xFB13,
-	0xFB18
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC POSIX-BC */
@@ -80822,7 +80838,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC POSIX-BC */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC POSIX-BC */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC POSIX-BC */
 	2924,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -83778,7 +83794,7 @@ typedef enum {
 	SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC POSIX-BC */
 	SB_Other,
 	SB_Sp,
 	SB_Other,
@@ -86707,93 +86723,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC POSIX-BC */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC POSIX-BC */
-	57,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0x4,
-	0x5,
-	0x6,
-	0x7,
-	0x8,
-	0xB,
-	0x14,
-	0x15,
-	0x17,
-	0x18,
-	0x1A,
-	0x1C,
-	0x20,
-	0x26,
-	0x28,
-	0x2D,
-	0x30,
-	0x32,
-	0x33,
-	0x37,
-	0x38,
-	0x3C,
-	0x3E,
-	0x3F,
-	0x41,
-	0x4A,
-	0x51,
-	0x5A,
-	0x5F,
-	0x60,
-	0x62,
-	0x6A,
-	0x70,
-	0x7A,
-	0x80,
-	0x81,
-	0x8A,
-	0x91,
-	0x9A,
-	0xA2,
-	0xAA,
-	0xBB,
-	0xBE,
-	0xC1,
-	0xCA,
-	0xD1,
-	0xDA,
-	0xE2,
-	0xEA,
-	0xF0,
-	0xFA,
-	0xFB,
-	0xFC,
-	0xFD,
-	0xFE,
-	0xFF
-};
-
-#endif	/* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC POSIX-BC */
-	9,	/* Number of elements */
-	148565664, /* Version and data structure type */
-	1,	/* 0 if the list starts at 0;
-		   1 if it starts at the element beyond 0 */
-	0x0,
-	0xB,
-	0xE,
-	0x15,
-	0x16,
-	0x25,
-	0x26,
-	0x2028,
-	0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC POSIX-BC */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC POSIX-BC */
 	1547,	/* Number of elements */
 	148565664, /* Version and data structure type */
 	0,	/* 0 if the list starts at 0;
@@ -88375,7 +88307,7 @@ typedef enum {
 	WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC POSIX-BC */
 	WB_Other,
 	WB_Newline,
 	WB_CR,
@@ -89927,8 +89859,92 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC POSIX-BC */
 
 #endif	/* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC POSIX-BC */
+	57,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0x4,
+	0x5,
+	0x6,
+	0x7,
+	0x8,
+	0xB,
+	0x14,
+	0x15,
+	0x17,
+	0x18,
+	0x1A,
+	0x1C,
+	0x20,
+	0x26,
+	0x28,
+	0x2D,
+	0x30,
+	0x32,
+	0x33,
+	0x37,
+	0x38,
+	0x3C,
+	0x3E,
+	0x3F,
+	0x41,
+	0x4A,
+	0x51,
+	0x5A,
+	0x5F,
+	0x60,
+	0x62,
+	0x6A,
+	0x70,
+	0x7A,
+	0x80,
+	0x81,
+	0x8A,
+	0x91,
+	0x9A,
+	0xA2,
+	0xAA,
+	0xBB,
+	0xBE,
+	0xC1,
+	0xCA,
+	0xD1,
+	0xDA,
+	0xE2,
+	0xEA,
+	0xF0,
+	0xFA,
+	0xFB,
+	0xFC,
+	0xFD,
+	0xFE,
+	0xFF
+};
+
+#endif	/* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC POSIX-BC */
+	9,	/* Number of elements */
+	148565664, /* Version and data structure type */
+	1,	/* 0 if the list starts at 0;
+		   1 if it starts at the element beyond 0 */
+	0x0,
+	0xB,
+	0xE,
+	0x15,
+	0x16,
+	0x25,
+	0x26,
+	0x2028,
+	0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC POSIX-BC */
 	23,	/* Number of elements */
 	148565664, /* Version and data structure type */
@@ -99479,7 +99495,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
 #endif	/* EBCDIC POSIX-BC */
 
 /* Generated from:
- * 083180df694deb1fc173361406c1a75619fb8376403db3a76dc585c1e3951eca lib/Unicode/UCD.pm
+ * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
  * ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
  * 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
  * 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
@@ -99521,8 +99537,8 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * c9326eab8d7861c3543963e555d5b927348f4467c93071db23154dece7619654 lib/unicore/mktables
+ * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
- * f199f92c0b5f87882b0198936ea8ef3dc43627b57a77ac3eb9250bd2664bbd88 regen/mk_invlists.pl
+ * 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl
  * ex: set ro: */
diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t
index 9ebe0d3dd8..eea81e8a8f 100644
--- a/ext/XS-APItest/t/handy.t
+++ b/ext/XS-APItest/t/handy.t
@@ -42,7 +42,7 @@ my %properties = (
                    alnum => 'Word',
                    wordchar => 'Word',
                    alphanumeric => 'Alnum',
-                   alpha => 'Alpha',
+                   alpha => 'XPosixAlpha',
                    ascii => 'ASCII',
                    blank => 'Blank',
                    cntrl => 'Control',
@@ -50,14 +50,14 @@ my %properties = (
                    graph => 'Graph',
                    idfirst => '_Perl_IDStart',
                    idcont => '_Perl_IDCont',
-                   lower => 'Lower',
+                   lower => 'XPosixLower',
                    print => 'Print',
                    psxspc => 'XPosixSpace',
                    punct => 'XPosixPunct',
                    quotemeta => '_Perl_Quotemeta',
                    space => 'XPerlSpace',
                    vertws => 'VertSpace',
-                   upper => 'Upper',
+                   upper => 'XPosixUpper',
                    xdigit => 'XDigit',
                 );
 
@@ -69,8 +69,13 @@ foreach my $name (sort keys %properties) {
     my $property = $properties{$name};
     my @invlist = prop_invlist($property, '_perl_core_internal_ok');
     if (! @invlist) {
-        fail("No inversion list found for $property");
-        next;
+
+        # An empty return could mean an unknown property, or merely that it is
+        # empty.  Call in scalar context to differentiate
+        if (! prop_invlist($property, '_perl_core_internal_ok')) {
+            fail("No inversion list found for $property");
+            next;
+        }
     }
 
     # Include all the Latin1 code points, plus 0x100.
@@ -270,7 +275,7 @@ foreach my $name (sort keys %to_properties) {
         fail("No inversion map found for $property");
         next;
     }
-    if ($format ne "al") {
+    if ($format !~ / ^ a l? $ /x) {
         fail("Unexpected inversion map format ('$format') found for $property");
         next;
     }
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 06fbfd1143..1854982491 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -5,7 +5,7 @@ use warnings;
 no warnings 'surrogate';    # surrogates can be inputs to this
 use charnames ();
 
-our $VERSION = '0.61';
+our $VERSION = '0.62';
 
 require Exporter;
 
@@ -775,7 +775,6 @@ sub charprop ($$) {
     }
     else {
         croak __PACKAGE__, "::charprop: Internal error: unknown format '$format'.  Please perlbug this";
-        return undef;
     }
 }
 
@@ -877,6 +876,10 @@ sub _charblocks {
 	    local $_;
 	    local $/ = "\n";
 	    while (<$BLOCKSFH>) {
+
+                # Old versions used a different syntax to mark the range.
+                $_ =~ s/;\s+/../ if $v_unicode_version lt v3.1.0;
+
 		if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) {
 		    my ($lo, $hi) = (hex($1), hex($2));
 		    my $subrange = [ $lo, $hi, $3 ];
@@ -2652,9 +2655,11 @@ or even better, C<"Gc=LC">).
 
 Many Unicode properties have more than one name (or alias).  C<prop_invmap>
 understands all of these, including Perl extensions to them.  Ambiguities are
-resolved as described above for L</prop_aliases()>.  The Perl internal
-property "Perl_Decimal_Digit, described below, is also accepted.  An empty
-list is returned if the property name is unknown.
+resolved as described above for L</prop_aliases()> (except if a property has
+both a complete mapping, and a binary C<Y>/C<N> mapping, then specifying the
+property name prefixed by C<"is"> causes the binary one to be returned).  The
+Perl internal property "Perl_Decimal_Digit, described below, is also accepted.
+An empty list is returned if the property name is unknown.
 See L<perluniprops/Properties accessible through Unicode::UCD> for the
 properties acceptable as inputs to this function.
 
@@ -3253,8 +3258,8 @@ RETRY:
             # we need to also read in that table.  Create a hash with the keys
             # being the code points, and the values being a list of the
             # aliases for the code point key.
-            my ($aliases_code_points, $aliases_maps, undef, undef) =
-                                                &prop_invmap('Name_Alias');
+            my ($aliases_code_points, $aliases_maps, undef, undef)
+                  = &prop_invmap("_Perl_Name_Alias", '_perl_core_internal_ok');
             my %aliases;
             for (my $i = 0; $i < @$aliases_code_points; $i++) {
                 my $code_point = $aliases_code_points->[$i];
@@ -3545,7 +3550,19 @@ RETRY:
 
     if ($swash->{'LIST'} =~ /^V/) {
         @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
-        shift @invlist;
+
+        shift @invlist;     # Get rid of 'V';
+
+        # Could need to be inverted: add or subtract a 0 at the beginning of
+        # the list.
+        if ($swash->{'INVERT_IT'}) {
+            if (@invlist && $invlist[0] == 0) {
+                shift @invlist;
+            }
+            else {
+                unshift @invlist, 0;
+            }
+        }
         foreach my $i (0 .. @invlist - 1) {
             $invmap[$i] = ($i % 2 == 0) ? 'Y' : 'N'
         }
@@ -3558,6 +3575,10 @@ RETRY:
         }
     }
     else {
+        if ($swash->{'INVERT_IT'}) {
+            croak __PACKAGE__, ":prop_invmap: Don't know how to deal with inverted";
+        }
+
         # The LIST input lines look like:
         # ...
         # 0374\t\tCommon
@@ -3873,7 +3894,7 @@ RETRY:
         map { $_ = [ split " ", $_  ] if $_ =~ / / } @invmap;
         $format = 'sl';
     }
-    elsif ($returned_prop eq 'ToNameAlias') {
+    elsif ($returned_prop =~ / To ( _Perl )? NameAlias/x) {
 
         # This property currently doesn't have any lists, but theoretically
         # could
@@ -3888,7 +3909,14 @@ RETRY:
         # to indicate that need to add code point to it.
         $format = 'ar';
     }
-    elsif ($format ne 'n' && $format ne 'a') {
+    elsif ($format eq 'ax') {
+
+        # Normally 'ax' properties have overrides, and will have been handled
+        # above, but if not, they still need adjustment, and the hex values
+        # have already been converted to decimal
+        $format = 'a';
+    }
+    elsif ($format ne 'n' && $format !~ / ^ a /x) {
 
         # All others are simple scalars
         $format = 's';
@@ -4079,6 +4107,15 @@ for its block using C<charblock>).
 Note that starting in Unicode 6.1, many of the block names have shorter
 synonyms.  These are always given in the new style.
 
+=head2 Use with older Unicode versions
+
+The functions in this module work as well as can be expected when
+used on earlier Unicode versions.  But, obviously, they use the available data
+from that Unicode version.  For example, if the Unicode version predates the
+definition of the script property (Unicode 3.1), then any function that deals
+with scripts is going to return C<undef> for the script portion of the return
+value.
+
 =head1 AUTHOR
 
 Jarkko Hietaniemi.  Now maintained by perl5 porters.
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index a799dd036b..22b2edbc93 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -19,14 +19,22 @@ use Test::More;
 
 use Unicode::UCD qw(charinfo charprop charprops_all);
 
+my $expected_version = '8.0.0';
+my $current_version = Unicode::UCD::UnicodeVersion;
+my $v_unicode_version = pack "C*", split /\./, $current_version;
+my $unknown_script = ($v_unicode_version lt v5.0.0)
+                     ? 'Common'
+                     : 'Unknown';
 my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by
 $/ = $input_record_separator;   # setting this.
 
 my $charinfo;
 
 is(charinfo(0x110000), undef, "Verify charinfo() of non-unicode is undef");
-is(charprop(0x110000, 'age'), "Unassigned", "Verify charprop(age) of non-unicode is Unassigned");
-is(charprop(0x110000, 'in'), "Unassigned", "Verify charprop(in), a bipartite Perl extension, works");
+if ($v_unicode_version ge v3.2.0) {
+    is(lc charprop(0x110000, 'age'), lc "Unassigned", "Verify charprop(age) of non-unicode is Unassigned");
+    is(charprop(0x110000, 'in'), "Unassigned", "Verify charprop(in), a bipartite Perl extension, works");
+}
 is(charprop(0x110000, 'Any'), undef, "Verify charprop of non-bipartite Perl extension returns undef");
 
 my $cp = 0;
@@ -37,9 +45,10 @@ is($charinfo->{code},           "0000",
 is($charinfo->{name},           "<control>");
 is(charprop($cp, "name"),       "");
 
-# This gets a sl-type property returning a flattened list
-is(charprop($cp, "name_alias"), "NULL: control,NUL: abbreviation");
-
+if ($v_unicode_version ge v6.1.0) {
+    # This gets a sl-type property returning a flattened list
+    is(charprop($cp, "name_alias"), "NULL: control,NUL: abbreviation");
+}
 is($charinfo->{category},       "Cc");
 is(charprop($cp, "category"),   "Control");
 is($charinfo->{combining},      "0");
@@ -66,8 +75,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, "tc"),         "\0");
 is($charinfo->{block},          "Basic Latin");
 is(charprop($cp, "block"),      "Basic_Latin");
-is($charinfo->{script},         "Common");
-is(charprop($cp, "script"),     "Common");
+is($charinfo->{script},         "Common") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, "script"),     "Common") if $v_unicode_version gt v3.0.1;
 
 $cp = utf8::unicode_to_native(0x41);
 my $A_code = sprintf("%04X", ord("A"));
@@ -103,8 +112,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "A");
 is($charinfo->{block},          "Basic Latin");
 is(charprop($cp, 'block'),      "Basic_Latin");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 $cp = 0x100;
 $charinfo = charinfo($cp);
@@ -138,8 +147,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{100}");
 is($charinfo->{block},          "Latin Extended-A");
 is(charprop($cp, 'block'),      "Latin_Extended_A");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 $cp = 0x590;               # 0x0590 is in the Hebrew block but unused.
 $charinfo = charinfo($cp);
@@ -152,7 +161,9 @@ is(charprop($cp, 'gc'),         "Unassigned");
 is($charinfo->{combining},      undef);
 is(charprop($cp, 'ccc'),        "Not_Reordered");
 is($charinfo->{bidi},           undef);
-is(charprop($cp, 'bc'),         "Right_To_Left");
+if ($v_unicode_version gt v3.2.0) {
+    is(charprop($cp, 'bc'),         "Right_To_Left");
+}
 is($charinfo->{decomposition},  undef);
 is(charprop($cp, 'dm'),         "\x{590}");
 is($charinfo->{decimal},        undef);
@@ -174,7 +185,8 @@ is(charprop($cp, 'tc'),         "\x{590}");
 is($charinfo->{block},          undef);
 is(charprop($cp, 'block'),      "Hebrew");
 is($charinfo->{script},         undef);
-is(charprop($cp, 'script'),     "Unknown");
+is(charprop($cp, 'script'),     $unknown_script) if $v_unicode_version gt
+v3.0.1;
 
 # 0x05d0 is in the Hebrew block and used.
 
@@ -210,8 +222,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{5d0}");
 is($charinfo->{block},          "Hebrew");
 is(charprop($cp, 'block'),      "Hebrew");
-is($charinfo->{script},         "Hebrew");
-is(charprop($cp, 'script'),     "Hebrew");
+is($charinfo->{script},         "Hebrew") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hebrew") if $v_unicode_version gt v3.0.1;
 
 # An open syllable in Hangul.
 
@@ -247,8 +259,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{AC00}");
 is($charinfo->{block},          "Hangul Syllables");
 is(charprop($cp, 'block'),      "Hangul_Syllables");
-is($charinfo->{script},         "Hangul");
-is(charprop($cp, 'script'),     "Hangul");
+is($charinfo->{script},         "Hangul") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hangul") if $v_unicode_version gt v3.0.1;
 
 # A closed syllable in Hangul.
 
@@ -284,85 +296,89 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{AE00}");
 is($charinfo->{block},          "Hangul Syllables");
 is(charprop($cp, 'block'),      "Hangul_Syllables");
-is($charinfo->{script},         "Hangul");
-is(charprop($cp, 'script'),     "Hangul");
-
-$cp = 0x1D400;
-$charinfo = charinfo($cp);
+is($charinfo->{script},         "Hangul") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hangul") if $v_unicode_version gt v3.0.1;
+
+if ($v_unicode_version gt v3.0.1) {
+    $cp = 0x1D400;
+    $charinfo = charinfo($cp);
+
+    is($charinfo->{code},           "1D400", "MATHEMATICAL BOLD CAPITAL A");
+    is($charinfo->{name},           "MATHEMATICAL BOLD CAPITAL A");
+    is(charprop($cp, 'name'),       "MATHEMATICAL BOLD CAPITAL A");
+    is($charinfo->{category},       "Lu");
+    is(charprop($cp, 'gc'),         "Uppercase_Letter");
+    is($charinfo->{combining},      "0");
+    is(charprop($cp, 'ccc'),        "Not_Reordered");
+    is($charinfo->{bidi},           "L");
+    is(charprop($cp, 'bc'),         "Left_To_Right");
+    is($charinfo->{decomposition},  "<font> $A_code");
+    is(charprop($cp, 'dm'),         "A");
+    is($charinfo->{decimal},        "");
+    is($charinfo->{digit},          "");
+    is($charinfo->{numeric},        "");
+    is(charprop($cp, 'nv'),         "NaN");
+    is($charinfo->{mirrored},       "N");
+    is(charprop($cp, 'bidim'),      "No");
+    is($charinfo->{unicode10},      "");
+    is(charprop($cp, 'na1'),        "");
+    is($charinfo->{comment},        "");
+    is(charprop($cp, 'isc'),        "");
+    is($charinfo->{upper},          "");
+    is(charprop($cp, 'uc'),         "\x{1D400}");
+    is($charinfo->{lower},          "");
+    is(charprop($cp, 'lc'),         "\x{1D400}");
+    is($charinfo->{title},          "");
+    is(charprop($cp, 'tc'),         "\x{1D400}");
+    is($charinfo->{block},          "Mathematical Alphanumeric Symbols");
+    is(charprop($cp, 'block'),      "Mathematical_Alphanumeric_Symbols");
+    is($charinfo->{script},         "Common");
+    is(charprop($cp, 'script'),     "Common");
+}
 
-is($charinfo->{code},           "1D400", "MATHEMATICAL BOLD CAPITAL A");
-is($charinfo->{name},           "MATHEMATICAL BOLD CAPITAL A");
-is(charprop($cp, 'name'),       "MATHEMATICAL BOLD CAPITAL A");
-is($charinfo->{category},       "Lu");
-is(charprop($cp, 'gc'),         "Uppercase_Letter");
-is($charinfo->{combining},      "0");
-is(charprop($cp, 'ccc'),        "Not_Reordered");
-is($charinfo->{bidi},           "L");
-is(charprop($cp, 'bc'),         "Left_To_Right");
-is($charinfo->{decomposition},  "<font> $A_code");
-is(charprop($cp, 'dm'),         "A");
-is($charinfo->{decimal},        "");
-is($charinfo->{digit},          "");
-is($charinfo->{numeric},        "");
-is(charprop($cp, 'nv'),         "NaN");
-is($charinfo->{mirrored},       "N");
-is(charprop($cp, 'bidim'),      "No");
-is($charinfo->{unicode10},      "");
-is(charprop($cp, 'na1'),        "");
-is($charinfo->{comment},        "");
-is(charprop($cp, 'isc'),        "");
-is($charinfo->{upper},          "");
-is(charprop($cp, 'uc'),         "\x{1D400}");
-is($charinfo->{lower},          "");
-is(charprop($cp, 'lc'),         "\x{1D400}");
-is($charinfo->{title},          "");
-is(charprop($cp, 'tc'),         "\x{1D400}");
-is($charinfo->{block},          "Mathematical Alphanumeric Symbols");
-is(charprop($cp, 'block'),      "Mathematical_Alphanumeric_Symbols");
-is($charinfo->{script},         "Common");
-is(charprop($cp, 'script'),     "Common");
-
-$cp = 0x9FBA;	                #Bug 58428
-$charinfo = charinfo(0x9FBA);
-
-is($charinfo->{code},           "9FBA", "U+9FBA");
-is($charinfo->{name},           "CJK UNIFIED IDEOGRAPH-9FBA");
-is(charprop($cp, 'name'),       "CJK UNIFIED IDEOGRAPH-9FBA");
-is($charinfo->{category},       "Lo");
-is(charprop($cp, 'gc'),         "Other_Letter");
-is($charinfo->{combining},      "0");
-is(charprop($cp, 'ccc'),        "Not_Reordered");
-is($charinfo->{bidi},           "L");
-is(charprop($cp, 'bc'),         "Left_To_Right");
-is($charinfo->{decomposition},  "");
-is(charprop($cp, 'dm'),         "\x{9FBA}");
-is($charinfo->{decimal},        "");
-is($charinfo->{digit},          "");
-is($charinfo->{numeric},        "");
-is(charprop($cp, 'nv'),         "NaN");
-is($charinfo->{mirrored},       "N");
-is(charprop($cp, 'bidim'),      "No");
-is($charinfo->{unicode10},      "");
-is(charprop($cp, 'na1'),        "");
-is($charinfo->{comment},        "");
-is(charprop($cp, 'isc'),        "");
-is($charinfo->{upper},          "");
-is(charprop($cp, 'uc'),         "\x{9FBA}");
-is($charinfo->{lower},          "");
-is(charprop($cp, 'lc'),         "\x{9FBA}");
-is($charinfo->{title},          "");
-is(charprop($cp, 'tc'),         "\x{9FBA}");
-is($charinfo->{block},          "CJK Unified Ideographs");
-is(charprop($cp, 'block'),      "CJK_Unified_Ideographs");
-is($charinfo->{script},         "Han");
-is(charprop($cp, 'script'),     "Han");
+if ($v_unicode_version ge v4.1.0) {
+    $cp = 0x9FBA;	                #Bug 58428
+    $charinfo = charinfo(0x9FBA);
+
+    is($charinfo->{code},           "9FBA", "U+9FBA");
+    is($charinfo->{name},           "CJK UNIFIED IDEOGRAPH-9FBA");
+    is(charprop($cp, 'name'),       "CJK UNIFIED IDEOGRAPH-9FBA");
+    is($charinfo->{category},       "Lo");
+    is(charprop($cp, 'gc'),         "Other_Letter");
+    is($charinfo->{combining},      "0");
+    is(charprop($cp, 'ccc'),        "Not_Reordered");
+    is($charinfo->{bidi},           "L");
+    is(charprop($cp, 'bc'),         "Left_To_Right");
+    is($charinfo->{decomposition},  "");
+    is(charprop($cp, 'dm'),         "\x{9FBA}");
+    is($charinfo->{decimal},        "");
+    is($charinfo->{digit},          "");
+    is($charinfo->{numeric},        "");
+    is(charprop($cp, 'nv'),         "NaN");
+    is($charinfo->{mirrored},       "N");
+    is(charprop($cp, 'bidim'),      "No");
+    is($charinfo->{unicode10},      "");
+    is(charprop($cp, 'na1'),        "");
+    is($charinfo->{comment},        "");
+    is(charprop($cp, 'isc'),        "");
+    is($charinfo->{upper},          "");
+    is(charprop($cp, 'uc'),         "\x{9FBA}");
+    is($charinfo->{lower},          "");
+    is(charprop($cp, 'lc'),         "\x{9FBA}");
+    is($charinfo->{title},          "");
+    is(charprop($cp, 'tc'),         "\x{9FBA}");
+    is($charinfo->{block},          "CJK Unified Ideographs");
+    is(charprop($cp, 'block'),      "CJK_Unified_Ideographs");
+    is($charinfo->{script},         "Han");
+    is(charprop($cp, 'script'),     "Han");
+}
 
 use Unicode::UCD qw(charblock charscript);
 
 # 0x0590 is in the Hebrew block but unused.
 
 is(charblock(0x590),          "Hebrew", "0x0590 - Hebrew unused charblock");
-is(charscript(0x590),         "Unknown",    "0x0590 - Hebrew unused charscript");
+is(charscript(0x590),         $unknown_script, "0x0590 - Hebrew unused charscript") if $v_unicode_version gt v3.0.1;
 is(charblock(0x1FFFF),        "No_Block", "0x1FFFF - unused charblock");
 
 my $fraction_3_4_code = sprintf("%04X", utf8::unicode_to_native(0xbe));
@@ -401,8 +417,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         chr hex $cp);
 is($charinfo->{block},          "Latin-1 Supplement");
 is(charprop($cp, 'block'),      "Latin_1_Supplement");
-is($charinfo->{script},         "Common");
-is(charprop($cp, 'script'),     "Common");
+is($charinfo->{script},         "Common") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Common") if $v_unicode_version gt v3.0.1;
 
 # This is to test a case where both simple and full lowercases exist and
 # differ
@@ -435,13 +451,13 @@ is(charprop($cp, 'isc'),        "");
 is($charinfo->{upper},          "");
 is(charprop($cp, 'uc'),         "\x{130}");
 is($charinfo->{lower},          $i_code);
-is(charprop($cp, 'lc'),         "i\x{307}");
+is(charprop($cp, 'lc'),         "i\x{307}") if $v_unicode_version ge v3.2.0;
 is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{130}");
 is($charinfo->{block},          "Latin Extended-A");
 is(charprop($cp, 'block'),      "Latin_Extended_A");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 # This is to test a case where both simple and full uppercases exist and
 # differ
@@ -478,19 +494,23 @@ is($charinfo->{title},          "1F88");
 is(charprop($cp, "tc"),         "\x{1F88}");
 is($charinfo->{block},          "Greek Extended");
 is(charprop($cp, "block"),      "Greek_Extended");
-is($charinfo->{script},         "Greek");
-is(charprop($cp, "script"),     "Greek");
+is($charinfo->{script},         "Greek") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, "script"),     "Greek") if $v_unicode_version gt v3.0.1;
 
 is(charprop(ord("A"), "foo"),    undef,
                         "Verify charprop of unknown property returns <undef>");
 
 # These were created from inspection of the code to exercise the branches
-is(charprop(ord("("), "bpb"),    ")",
+if ($v_unicode_version ge v6.3.0) {
+    is(charprop(ord("("), "bpb"),    ")",
             "Verify charprop figures out that s-type properties can be char");
+}
 is(charprop(ord("9"), "nv"),     9,
                             "Verify charprop can adjust an ar-type property");
-is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
+if ($v_unicode_version ge v5.2.0) {
+    is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
                     "Verify charprop can handle an \"\" in ae-type property");
+}
 
 my $mark_props_ref = charprops_all(0x300);
 is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
@@ -499,9 +519,13 @@ is($mark_props_ref->{'Bidi_Mirrored'}, "No");
 is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
 is($mark_props_ref->{'Case_Folding'}, "\x{300}");
 is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
-is($mark_props_ref->{'Decomposition_Type'}, "None");
+is($mark_props_ref->{'Decomposition_Type'}, ($v_unicode_version le v4.0.0)
+                                             ? "none"
+                                             : "None");
 is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
-is($mark_props_ref->{'ISO_Comment'}, "");
+if ($v_unicode_version gt v5.1.0) {
+    is($mark_props_ref->{'ISO_Comment'}, "");
+}
 is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
 is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
 is($mark_props_ref->{'Numeric_Type'}, "None");
@@ -522,36 +546,40 @@ ok(exists $charblocks->{Thai}, 'Thai charblock exists');
 is($charblocks->{Thai}->[0]->[0], hex('0e00'));
 ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
 
-my $charscripts = charscripts();
+if ($v_unicode_version gt v3.0.1) {
+    my $charscripts = charscripts();
 
-ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
-is($charscripts->{Armenian}->[0]->[0], hex('0531'));
-ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
+    ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
+    is($charscripts->{Armenian}->[0]->[0], hex('0531'));
+    ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
 
-my $charscript;
+    my $charscript;
 
-$charscript = charscript("12ab");
-is($charscript, 'Ethiopic', 'Ethiopic charscript');
+    $charscript = charscript("12ab");
+    is($charscript, 'Ethiopic', 'Ethiopic charscript');
 
-$charscript = charscript("0x12ab");
-is($charscript, 'Ethiopic');
+    $charscript = charscript("0x12ab");
+    is($charscript, 'Ethiopic');
 
-$charscript = charscript("U+12ab");
-is($charscript, 'Ethiopic');
+    $charscript = charscript("U+12ab");
+    is($charscript, 'Ethiopic');
 
-my $ranges;
+    my $ranges;
 
-$ranges = charscript('Ogham');
-is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
-is($ranges->[0]->[1], hex('169C'));
+    if ($v_unicode_version gt v4.0.0) {
+        $ranges = charscript('Ogham');
+        is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
+        is($ranges->[0]->[1], hex('169C'));
+    }
 
-use Unicode::UCD qw(charinrange);
+    use Unicode::UCD qw(charinrange);
 
-$ranges = charscript('Cherokee');
-ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
-ok( charinrange($ranges, "13a0"));
-ok( charinrange($ranges, "13f4"));
-ok(!charinrange($ranges, "13ff"));
+    $ranges = charscript('Cherokee');
+    ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
+    ok( charinrange($ranges, "13a0"));
+    ok( charinrange($ranges, "13f4"));
+    ok(!charinrange($ranges, "13ff"));
+}
 
 use Unicode::UCD qw(general_categories);
 
@@ -571,7 +599,8 @@ is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic');
 
 # If this fails, then maybe one should look at the Unicode changes to see
 # what else might need to be updated.
-is(Unicode::UCD::UnicodeVersion, '8.0.0', 'UnicodeVersion');
+ok($current_version le $expected_version,
+                    "Verify there isn't a new Unicode version to upgrade to");
 
 use Unicode::UCD qw(compexcl);
 
@@ -593,66 +622,93 @@ is($casefold->{full}, $a_code, 'casefold native(0x41) full');
 is($casefold->{simple}, $a_code, 'casefold native(0x41) simple');
 is($casefold->{turkic}, "", 'casefold native(0x41) turkic');
 
-$casefold = casefold(utf8::unicode_to_native(0xdf));
 my $sharp_s_code = sprintf("%04X", utf8::unicode_to_native(0xdf));
 my $S_code = sprintf("%04X", ord "S");
 my $s_code = sprintf("%04X", ord "s");
 
-is($casefold->{code}, $sharp_s_code, 'casefold native(0xDF) code');
-is($casefold->{status}, 'F', 'casefold native(0xDF) status');
-is($casefold->{mapping}, "$s_code $s_code", 'casefold native(0xDF) mapping');
-is($casefold->{full}, "$s_code $s_code", 'casefold native(0xDF) full');
-is($casefold->{simple}, "", 'casefold native(0xDF) simple');
-is($casefold->{turkic}, "", 'casefold native(0xDF) turkic');
-
-# Do different tests depending on if version < 3.2, or not.
-my $v_unicode_version = pack "C*", split /\./, Unicode::UCD::UnicodeVersion();
-if ($v_unicode_version lt v3.2.0) {
-	$casefold = casefold(0x130);
-
-	is($casefold->{code}, '0130', 'casefold 0x130 code');
-	is($casefold->{status}, 'I' , 'casefold 0x130 status');
-	is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
-	is($casefold->{full}, $i_code, 'casefold 0x130 full');
-	is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
-	is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
-
-	$casefold = casefold(0x131);
-
-	is($casefold->{code}, '0131', 'casefold 0x131 code');
-	is($casefold->{status}, 'I' , 'casefold 0x131 status');
-	is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
-	is($casefold->{full}, $i_code, 'casefold 0x131 full');
-	is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
-	is($casefold->{turkic}, $i_code, 'casefold 0x131 turkic');
-} else {
-	$casefold = casefold(utf8::unicode_to_native(0x49));
-
-	is($casefold->{code}, $I_code, 'casefold native(0x49) code');
-	is($casefold->{status}, 'C' , 'casefold native(0x49) status');
-	is($casefold->{mapping}, $i_code, 'casefold native(0x49) mapping');
-	is($casefold->{full}, $i_code, 'casefold native(0x49) full');
-	is($casefold->{simple}, $i_code, 'casefold native(0x49) simple');
-	is($casefold->{turkic}, "0131", 'casefold native(0x49) turkic');
-
-	$casefold = casefold(0x130);
-
-	is($casefold->{code}, '0130', 'casefold 0x130 code');
-	is($casefold->{status}, 'F' , 'casefold 0x130 status');
-	is($casefold->{mapping}, "$i_code 0307", 'casefold 0x130 mapping');
-	is($casefold->{full}, "$i_code 0307", 'casefold 0x130 full');
-	is($casefold->{simple}, "", 'casefold 0x130 simple');
-	is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
-}
+if ($v_unicode_version gt v3.0.0) { # These special ones don't work on early
+                                    # perls
+    $casefold = casefold(utf8::unicode_to_native(0xdf));
+
+    is($casefold->{code}, $sharp_s_code, 'casefold native(0xDF) code');
+    is($casefold->{status}, 'F', 'casefold native(0xDF) status');
+    is($casefold->{mapping}, "$s_code $s_code", 'casefold native(0xDF) mapping');
+    is($casefold->{full}, "$s_code $s_code", 'casefold native(0xDF) full');
+    is($casefold->{simple}, "", 'casefold native(0xDF) simple');
+    is($casefold->{turkic}, "", 'casefold native(0xDF) turkic');
+
+    # Do different tests depending on if version < 3.2, or not.
+    if ($v_unicode_version eq v3.0.1) {
+            # In this release, there was no special Turkic values.
+            # Both 0x130 and 0x131 folded to 'i'.
+
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'C' , 'casefold 0x130 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x130 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
+            is($casefold->{turkic}, "", 'casefold 0x130 turkic');
+
+            $casefold = casefold(0x131);
+
+            is($casefold->{code}, '0131', 'casefold 0x131 code');
+            is($casefold->{status}, 'C' , 'casefold 0x131 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x131 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
+            is($casefold->{turkic}, "", 'casefold 0x131 turkic');
+    }
+    elsif ($v_unicode_version lt v3.2.0) {
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'I' , 'casefold 0x130 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x130 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
+
+            $casefold = casefold(0x131);
+
+            is($casefold->{code}, '0131', 'casefold 0x131 code');
+            is($casefold->{status}, 'I' , 'casefold 0x131 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x131 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x131 turkic');
+    } else {
+            $casefold = casefold(utf8::unicode_to_native(0x49));
+
+            is($casefold->{code}, $I_code, 'casefold native(0x49) code');
+            is($casefold->{status}, 'C' , 'casefold native(0x49) status');
+            is($casefold->{mapping}, $i_code, 'casefold native(0x49) mapping');
+            is($casefold->{full}, $i_code, 'casefold native(0x49) full');
+            is($casefold->{simple}, $i_code, 'casefold native(0x49) simple');
+            is($casefold->{turkic}, "0131", 'casefold native(0x49) turkic');
+
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'F' , 'casefold 0x130 status');
+            is($casefold->{mapping}, "$i_code 0307", 'casefold 0x130 mapping');
+            is($casefold->{full}, "$i_code 0307", 'casefold 0x130 full');
+            is($casefold->{simple}, "", 'casefold 0x130 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
+    }
 
-$casefold = casefold(0x1F88);
+    if ($v_unicode_version gt v3.0.1) {
+        $casefold = casefold(0x1F88);
 
-is($casefold->{code}, '1F88', 'casefold 0x1F88 code');
-is($casefold->{status}, 'S' , 'casefold 0x1F88 status');
-is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping');
-is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full');
-is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple');
-is($casefold->{turkic}, "", 'casefold 0x1F88 turkic');
+        is($casefold->{code}, '1F88', 'casefold 0x1F88 code');
+        is($casefold->{status}, 'S' , 'casefold 0x1F88 status');
+        is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping');
+        is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full');
+        is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple');
+        is($casefold->{turkic}, "", 'casefold 0x1F88 turkic');
+    }
+}
 
 ok(!casefold(utf8::unicode_to_native(0x20)));
 
@@ -672,12 +728,16 @@ ok($casespec->{code} eq $sharp_s_code &&
 
 $casespec = casespec(0x307);
 
-ok($casespec->{az}->{code} eq '0307' &&
-   !defined $casespec->{az}->{lower} &&
-   $casespec->{az}->{title} eq '0307'  &&
-   $casespec->{az}->{upper} eq '0307' &&
-   $casespec->{az}->{condition} eq 'az After_I',
-   'casespec 0x307');
+if ($v_unicode_version gt v3.1.0) {
+    ok($casespec->{az}->{code} eq '0307'
+    && !defined $casespec->{az}->{lower}
+    && $casespec->{az}->{title} eq '0307'
+    && $casespec->{az}->{upper} eq '0307'
+    && $casespec->{az}->{condition} eq ($v_unicode_version le v3.2)
+                                        ? 'az After_Soft_Dotted'
+                                        : 'az After_I',
+    'casespec 0x307');
+}
 
 # perl #7305 UnicodeCD::compexcl is weird
 
@@ -699,11 +759,15 @@ is(Unicode::UCD::_getcode('x123'),    undef, "_getcode(x123)");
 is(Unicode::UCD::_getcode('0x123x'),  undef, "_getcode(x123)");
 is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
 
+SKIP:
 {
+    skip("Script property not in this release", 3) if $v_unicode_version lt v3.1.0;
     my $r1 = charscript('Latin');
     if (ok(defined $r1, "Found Latin script")) {
+        skip("Latin range count will be wrong when using older Unicode release",
+             2) if $v_unicode_version lt $expected_version;
         my $n1 = @$r1;
-        is($n1, 31, "number of ranges in Latin script (Unicode 7.0.0)") if $::IS_ASCII;
+        is($n1, 31, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
         shift @$r1 while @$r1;
         my $r2 = charscript('Latin');
         is(@$r2, $n1, "modifying results should not mess up internal caches");
@@ -714,38 +778,72 @@ is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
 	is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
 }
 
-use Unicode::UCD qw(namedseq);
-
-is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
-is(namedseq("KATAKANA LETTER AINU Q"), undef);
-is(namedseq(), undef);
-is(namedseq(qw(foo bar)), undef);
-my @ns = namedseq("KATAKANA LETTER AINU P");
-is(scalar @ns, 2);
-is($ns[0], 0x31F7);
-is($ns[1], 0x309A);
-my %ns = namedseq();
-is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
-@ns = namedseq(42);
-is(@ns, 0);
+if ($v_unicode_version ge v4.1.0) {
+    use Unicode::UCD qw(namedseq);
+
+    is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
+    is(namedseq("KATAKANA LETTER AINU Q"), undef);
+    is(namedseq(), undef);
+    is(namedseq(qw(foo bar)), undef);
+    my @ns = namedseq("KATAKANA LETTER AINU P");
+    is(scalar @ns, 2);
+    is($ns[0], 0x31F7);
+    is($ns[1], 0x309A);
+    my %ns = namedseq();
+    is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
+    @ns = namedseq(42);
+    is(@ns, 0);
+}
 
 use Unicode::UCD qw(num);
-use charnames ":full";
+use charnames ();   # Don't use \N{} on things not in original Unicode
+                    # version; else will get a compilation error when this .t
+                    # is run on an older version.
 
 is(num("0"), 0, 'Verify num("0") == 0');
 is(num("98765"), 98765, 'Verify num("98765") == 98765');
-ok(! defined num("98765\N{FULLWIDTH DIGIT FOUR}"), 'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined');
-is(num("\N{NEW TAI LUE DIGIT TWO}"), 2, 'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2');
-is(num("\N{NEW TAI LUE DIGIT ONE}"), 1, 'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1');
-is(num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}"), 21, 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21');
-ok(! defined num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}"), 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined');
-is(num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}"), 3, 'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3');
-ok(! defined num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}"), 'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined');
+ok(! defined num("98765\N{FULLWIDTH DIGIT FOUR}"),
+   'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined');
+my $tai_lue_2;
+if ($v_unicode_version ge v4.1.0) {
+    my $tai_lue_1 = charnames::string_vianame("NEW TAI LUE DIGIT ONE");
+    $tai_lue_2 = charnames::string_vianame("NEW TAI LUE DIGIT TWO");
+    is(num($tai_lue_2), 2, 'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2');
+    is(num($tai_lue_1), 1, 'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1');
+    is(num($tai_lue_2 . $tai_lue_1), 21,
+       'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21');
+}
+if ($v_unicode_version ge v5.2.0) {
+    ok(! defined num($tai_lue_2
+         . charnames::string_vianame("NEW TAI LUE THAM DIGIT ONE")),
+         'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined');
+}
+if ($v_unicode_version ge v5.1.0) {
+    my $cham_0 = charnames::string_vianame("CHAM DIGIT ZERO");
+    is(num($cham_0 . charnames::string_vianame("CHAM DIGIT THREE")), 3,
+       'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3');
+    if ($v_unicode_version ge v5.2.0) {
+        ok(! defined num(  $cham_0
+                         . charnames::string_vianame("JAVANESE DIGIT NINE")),
+        'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined');
+    }
+}
 is(num("\N{SUPERSCRIPT TWO}"), 2, 'Verify num("\N{SUPERSCRIPT TWO} == 2');
-is(num("\N{ETHIOPIC NUMBER TEN THOUSAND}"), 10000, 'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000');
-is(num("\N{NORTH INDIC FRACTION ONE HALF}"), .5, 'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5');
-is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9');
-is(num("\N{U+5146}"), 1000000000000, 'Verify num("\N{U+5146}") == 1000000000000');
+if ($v_unicode_version ge v3.0.0) {
+    is(num(charnames::string_vianame("ETHIOPIC NUMBER TEN THOUSAND")), 10000,
+       'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000');
+}
+if ($v_unicode_version ge v5.2.0) {
+    is(num(charnames::string_vianame("NORTH INDIC FRACTION ONE HALF")),
+       .5,
+       'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5');
+    is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9');
+}
+if ($v_unicode_version gt v3.2.0) { # Is missing from non-Unihan files before
+                                    # this
+    is(num("\N{U+5146}"), 1000000000000,
+                                'Verify num("\N{U+5146}") == 1000000000000');
+}
 
 # Create a user-defined property
 sub InKana {<<'END'}
@@ -979,6 +1077,12 @@ while (<$propvalues>) {
     my @fields = split /\s*;\s*/; # Fields are separated by semi-colons
     my $prop = shift @fields;   # 0th field is the property,
 
+    # 'qc' is short in early versions of the file for any of the quick check
+    # properties.  Choose one of them.
+    if ($prop eq 'qc' && $v_unicode_version le v4.0.0) {
+        $prop = "NFKC_QC";
+    }
+
     # When changing properties, we examine the accumulated values for the old
     # one to see if our function that returns them matches.
     if ($prev_prop ne $prop) {
@@ -986,6 +1090,11 @@ while (<$propvalues>) {
             my @ucd_function_values = prop_values($prev_prop);
             @ucd_function_values = () unless @ucd_function_values;
 
+            # The file didn't include strictly numeric values until after this
+            if ($prev_prop eq 'ccc' && $v_unicode_version le v6.0.0) {
+                @ucd_function_values = grep { /\D/ } @ucd_function_values;
+            }
+
             # This perl extension doesn't appear in the official file
             push @this_prop_values, "Non_Canon" if $prev_prop eq 'dt';
 
@@ -1008,6 +1117,12 @@ while (<$propvalues>) {
     # characters that are ignored under loose matching to test that
     my $mod_prop = "$extra_chars$prop";
 
+    if ($prop eq 'blk' && $v_unicode_version le v5.0.0) {
+        foreach my $element (@fields) {
+            $element =~ s/-/_/g;
+        }
+    }
+
     if ($fields[0] eq 'n/a') {  # See comments in input file, essentially
                                 # means full name and short name are identical
         $fields[0] = $fields[1];
@@ -1190,36 +1305,39 @@ if ($::IS_ASCII) { # On EBCDIC, other things will come first, and can vary
 
     $prop = "lc";
     ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
-    is($format, 'al', "prop_invmap() format of '$prop' is 'al'");
+    my $lc_format = ($v_unicode_version ge v3.2.0) ? 'al' : 'a';
+    is($format, $lc_format, "prop_invmap() format of '$prop' is '$lc_format");
     is($missing, '0', "prop_invmap() missing of '$prop' is '0'");
     is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41");
     is($invmap_ref->[1], 0x61, "prop_invmap('$prop') map[1] is 0x61");
 }
 
 # This property is stable and small, so can test all of it
-$prop = "ASCII_Hex_Digit";
-($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
-is($format, 's', "prop_invmap() format of '$prop' is 's'");
-is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
-if ($::IS_ASCII) {
-    is_deeply($invlist_ref, [ 0x0000, 0x0030, 0x003A,
-                              0x0041, 0x0047,
-                              0x0061, 0x0067, 0x110000
-                            ],
-          "prop_invmap('$prop') code point list is correct");
-}
-elsif ($::IS_EBCDIC) {
-    is_deeply($invlist_ref, [
-            utf8::unicode_to_native(0x0000),
-            utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
-            utf8::unicode_to_native(0x110000)
-          ],
-          "prop_invmap('$prop') code point list is correct");
+if ($v_unicode_version gt v3.1.0) {
+    $prop = "ASCII_Hex_Digit";
+    ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
+    is($format, 's', "prop_invmap() format of '$prop' is 's'");
+    is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
+    if ($::IS_ASCII) {
+        is_deeply($invlist_ref, [ 0x0000, 0x0030, 0x003A,
+                                0x0041, 0x0047,
+                                0x0061, 0x0067, 0x110000
+                                ],
+            "prop_invmap('$prop') code point list is correct");
+    }
+    elsif ($::IS_EBCDIC) {
+        is_deeply($invlist_ref, [
+                utf8::unicode_to_native(0x0000),
+                utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
+                utf8::unicode_to_native(0x110000)
+            ],
+            "prop_invmap('$prop') code point list is correct");
+    }
+    is_deeply($invmap_ref, [ 'N', 'Y', 'N', 'Y', 'N', 'Y', 'N', 'N' ] ,
+            "prop_invmap('$prop') map list is correct");
 }
-is_deeply($invmap_ref, [ 'N', 'Y', 'N', 'Y', 'N', 'Y', 'N', 'N' ] ,
-          "prop_invmap('$prop') map list is correct");
 
 is(prop_invlist("Unknown property"), undef, "prop_invlist(<Unknown property>) returns undef");
 is(prop_invlist(undef), undef, "prop_invlist(undef) returns undef");
@@ -1240,36 +1358,38 @@ is(prop_invlist("InKana"), undef, "prop_invlist(<user-defined property returns u
 # are there in the files.  As a small hedge against that, test some
 # prop_invlist() tables fully with the known correct result.  We choose
 # ASCII_Hex_Digit again, as it is stable.
-if ($::IS_ASCII) {
-    @invlist = prop_invlist("AHex");
-    is_deeply(\@invlist, [ 0x0030, 0x003A, 0x0041,
-                                 0x0047, 0x0061, 0x0067 ],
-          "prop_invlist('AHex') is exactly the expected set of points");
-    @invlist = prop_invlist("AHex=f");
-    is_deeply(\@invlist, [ 0x0000, 0x0030, 0x003A, 0x0041,
-                                 0x0047, 0x0061, 0x0067 ],
-          "prop_invlist('AHex=f') is exactly the expected set of points");
-}
-elsif ($::IS_EBCDIC) { # Relies on the ranges 0-9, a-f, and A-F each being
-                    # contiguous
-    @invlist = prop_invlist("AHex");
-    is_deeply(\@invlist, [
-            utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
-       ],
-       "prop_invlist('AHex') is exactly the expected set of points");
-    @invlist = prop_invlist("AHex=f");
-    is_deeply(\@invlist, [
-            utf8::unicode_to_native(0x0000),
-            utf8::unicode_to_native(0x0061),
-            utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041),
-            utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030),
-            utf8::unicode_to_native(0x0039) + 1,
-       ],
-       "prop_invlist('AHex=f') is exactly the expected set of points");
+if ($v_unicode_version gt v3.1.0) {
+    if ($::IS_ASCII) {
+        @invlist = prop_invlist("AHex");
+        is_deeply(\@invlist, [ 0x0030, 0x003A, 0x0041,
+                                    0x0047, 0x0061, 0x0067 ],
+            "prop_invlist('AHex') is exactly the expected set of points");
+        @invlist = prop_invlist("AHex=f");
+        is_deeply(\@invlist, [ 0x0000, 0x0030, 0x003A, 0x0041,
+                                    0x0047, 0x0061, 0x0067 ],
+            "prop_invlist('AHex=f') is exactly the expected set of points");
+    }
+    elsif ($::IS_EBCDIC) { # Relies on the ranges 0-9, a-f, and A-F each being
+                        # contiguous
+        @invlist = prop_invlist("AHex");
+        is_deeply(\@invlist, [
+                utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
+        ],
+        "prop_invlist('AHex') is exactly the expected set of points");
+        @invlist = prop_invlist("AHex=f");
+        is_deeply(\@invlist, [
+                utf8::unicode_to_native(0x0000),
+                utf8::unicode_to_native(0x0061),
+                utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041),
+                utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030),
+                utf8::unicode_to_native(0x0039) + 1,
+        ],
+        "prop_invlist('AHex=f') is exactly the expected set of points");
+    }
 }
 
 sub fail_with_diff ($$$$) {
@@ -1547,7 +1667,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
                 fail("prop_invmap('$prop')");
                 diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap");
             }
-        next PROPERTY;
+            next PROPERTY;
         }
     }
 
@@ -1555,6 +1675,14 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
     # normalized version.
     $name = &utf8::_loose_name(lc $name);
 
+    # In the case of a combination property, both a map table and a match
+    # table are generated.  For all the tests except prop_invmap(), this is
+    # irrelevant, but for prop_invmap, having an 'is' prefix forces it to
+    # return the match table; otherwise the map.  We thus need to distinguish
+    # between the two forms.  The property name is what has this information.
+    $name = &utf8::_loose_name(lc $prop)
+                         if exists $Unicode::UCD::combination_property{$name};
+
     # Add in the characters that are supposed to be ignored to test loose
     # matching, which the tested function applies to all properties
     $display_prop = "$extra_chars$prop" unless $display_prop;
@@ -2309,7 +2437,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
         # And remove the aliases.  We read in the Name_Alias property, and go
         # through them one by one.
         my ($aliases_code_points, $aliases_maps, undef, undef)
-                                                = &prop_invmap('Name_Alias');
+                = &prop_invmap('_Perl_Name_Alias', '_perl_core_internal_ok');
         for (my $i = 0; $i < @$aliases_code_points; $i++) {
             my $code_point = $aliases_code_points->[$i];
 
@@ -2534,11 +2662,13 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
 # A few tests of search_invlist
 use Unicode::UCD qw(search_invlist);
 
-my ($scripts_ranges_ref, $scripts_map_ref) = prop_invmap("Script");
-my $index = search_invlist($scripts_ranges_ref, 0x390);
-is($scripts_map_ref->[$index], "Greek", "U+0390 is Greek");
-my @alpha_invlist = prop_invlist("Alpha");
-is(search_invlist(\@alpha_invlist, ord("\t")), undef, "search_invlist returns undef for code points before first one on the list");
+if ($v_unicode_version ge v3.1.0) { # No Script property before this
+    my ($scripts_ranges_ref, $scripts_map_ref) = prop_invmap("Script");
+    my $index = search_invlist($scripts_ranges_ref, 0x390);
+    is($scripts_map_ref->[$index], "Greek", "U+0390 is Greek");
+    my @alpha_invlist = prop_invlist("Alpha");
+    is(search_invlist(\@alpha_invlist, ord("\t")), undef, "search_invlist returns undef for code points before first one on the list");
+}
 
 ok($/ eq $input_record_separator,  "The record separator didn't get overridden");
 
diff --git a/lib/locale.t b/lib/locale.t
index 1ebd0ce39c..1b510d25e6 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -848,7 +848,8 @@ sub disp_str ($) {
             }
             else {
                 $result .= "  " unless $prev_was_punct;
-                $result .= charnames::viacode(ord $char);
+                my $name = charnames::viacode(ord $char);
+                $result .= (defined $name) ? $name : ':unknown:';
                 $prev_was_punct = 0;
             }
         }
diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl
index ef5fec6295..f892334d45 100644
--- a/lib/unicore/README.perl
+++ b/lib/unicore/README.perl
@@ -1,15 +1,12 @@
 # The goal is for perl to compile and reasonably run any version of Unicode.
-# But in v5.22, the earliest version that this works for is Unicode 5.1.
 # Working reasonably well doesn't mean that the test suite will run without
-# showing errors.  You may be able to compile an earlier version, and get
-# things to sort-of work. A few of the very-Unicode specific test files have been
+# showing errors.  A few of the very-Unicode specific test files have been
 # modified to account for different versions, but most have not.  For example,
 # some tests use characters that aren't encoded in all Unicode versions; others
 # have hard-coded the General Categories for a code point that were correct at
 # the time the test was written.  Perl itself will not compile under Unicode
 # releases prior to 3.0 without a simple change to Unicode::Normalize.
-# mktables contains instructions for this, as well as other hints for using
-# older Unicode versions.
+# mktables contains instructions for this.
 
 # The *.txt files were copied from
 
@@ -17,14 +14,20 @@
 
 # (which always points to the latest version) with subdirectories 'extracted' and
 # 'auxiliary'.  Older versions are located under Public with an appropriate name.
+# They are also available via http at www.unicode.org/versions/
+#
 
 # The Unihan files were not included due to space considerations.  Also NOT
-# included were any *.html files.  It is possible to add the Unihan files, and
-# edit mktables (see instructions near its beginning) to look at them.
+# included were any *.html files.  It is possible to add the Unihan files and
+# have some properties from them automatically compiled.  By editing mktables
+# (see instructions near its beginning) you can add other Unihan properties.
 
 # The file named 'version' should exist and be a single line with the Unicode
 # version, like:
+#
 # 5.2.0
+#
+# (without the initial '# ')
 
 # To be 8.3 filesystem friendly, the names of some of the input files have been
 # changed from the values that are in the Unicode DB.  Not all of the Test
@@ -58,6 +61,8 @@ mv extracted/DerivedJoiningType.txt extracted/DJoinType.txt
 mv extracted/DerivedLineBreak.txt extracted/DLineBreak.txt
 mv extracted/DerivedNumericType.txt extracted/DNumType.txt
 mv extracted/DerivedNumericValues.txt extracted/DNumValues.txt
+rmdir extracted 2>/dev/null     # Will fail if non-empty, but if it is empty
+                                # was an early release that didn't have it.
 
 mv auxiliary/GraphemeBreakTest.txt auxiliary/GCBTest.txt
 mv auxiliary/LineBreakTest.txt auxiliary/LBTest.txt
@@ -81,14 +86,31 @@ mv Unihan_Variants.txt UnihanVariants.txt
 # filesystems.
 
 # mktables is used to generate the tables used by the rest of Perl.  It will
-# warn you about any *.txt files in the directory substructure that it doesn't
-# know about.  You should remove any so-identified, or edit mktables to add
-# them to its lists to process.  You can run
+# warn you about any *.txt and *.html files in the directory substructure that
+# it doesn't know about.  You should remove any so-identified, or edit mktables
+# to add them to its lists to process.  You can run
 #
 #    mktables -globlist
 #
-#to have it try to process these tables generically.
-#
+# to have it try to process these tables generically.
+
+# COMPILING ON OLDER UNICODE VERSIONS
+#
+# To compile perl for use with an older Unicode release, delete everything in
+# the lib/unicore directory except mktables and Makefile.  Then download the
+# Unicode-supplied files for the desired version to that directory  (A url for
+# these is given earlier in this file).  Then create the 'version' file with a
+# single line, like '6.1.0'.  Do a 'make test' from the project level.  You
+# will get some porting errors for needing to regen.  Regenerate what it tells
+# you are needed, and make test again.  If you compile an old enough version,
+# you will also have to download a few files from later Unicode versions,
+# following the instructions that will be given if warranted.  It should
+# compile in any release without warnings, except for some casing conflicts
+# in Unicode 2.1.8, and some extraneous files will show up in very early
+# releases of the form qr/diff.*\.txt/.  If you add Unihan.txt, one line is in error in
+#
+# Other glitches are noted in mktables under 'UNICODE VERSIONS NOTES'
+
 # FOR PUMPKINS
 #
 # The files are inter-related.  If you take the latest UnicodeData.txt, for
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 572c299038..27cb45ab9d 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -4,17 +4,9 @@
 # Any files created or read by this program should be listed in 'mktables.lst'
 # Use -makelist to regenerate it.
 
-# Needs 'no overloading' to run faster on miniperl.  Code commented out at the
-# subroutine objaddr can be used instead to work as far back (untested) as
-# 5.8: needs pack "U".  But almost all occurrences of objaddr have been
-# removed in favor of using 'no overloading'.  You also would have to go
-# through and replace occurrences like:
-#       my $addr = do { no overloading; pack 'J', $self; }
-# with
-#       my $addr = main::objaddr $self;
-# (or reverse commit 9b01bafde4b022706c3d6f947a0963f821b2e50b
-# that instituted the change to main::objaddr, and subsequent commits that
-# changed 0+$self to pack 'J', $self.)
+# There was an attempt when this was first rewritten to make it 5.8
+# compatible, but that has now been abandoned, and newer constructs are used
+# as convenient.
 
 my $start_time;
 BEGIN { # Get the time the script started running; do it at compilation to
@@ -32,6 +24,7 @@ use File::Path;
 use File::Spec;
 use Text::Tabs;
 use re "/aa";
+use feature 'state';
 
 sub DEBUG () { 0 }  # Set to 0 for production; 1 for development
 my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
@@ -292,8 +285,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # As mentioned earlier, some properties are given in more than one file.  In
 # particular, the files in the extracted directory are supposedly just
 # reformattings of the others.  But they contain information not easily
-# derivable from the other files, including results for Unihan, which this
-# program doesn't ordinarily look at, and for unassigned code points.  They
+# derivable from the other files, including results for Unihan (which isn't
+# usually available to this program) and for unassigned code points.  They
 # also have historically had errors or been incomplete.  In an attempt to
 # create the best possible data, this program thus processes them first to
 # glean information missing from the other files; then processes those other
@@ -411,24 +404,19 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 #
 # A NOTE ON UNIHAN
 #
-# This program can generate tables from the Unihan database.  But it doesn't
-# by default, letting the CPAN module Unicode::Unihan handle them.  Prior to
-# version 5.2, this database was in a single file, Unihan.txt.  In 5.2 the
-# database was split into 8 different files, all beginning with the letters
-# 'Unihan'.  This program will read those file(s) if present, but it needs to
-# know which of the many properties in the file(s) should have tables created
-# for them.  It will create tables for any properties listed in
-# PropertyAliases.txt and PropValueAliases.txt, plus any listed in the
-# @cjk_properties array and the @cjk_property_values array.  Thus, if a
-# property you want is not in those files of the release you are building
-# against, you must add it to those two arrays.  Starting in 4.0, the
-# Unicode_Radical_Stroke was listed in those files, so if the Unihan database
-# is present in the directory, a table will be generated for that property.
-# In 5.2, several more properties were added.  For your convenience, the two
-# arrays are initialized with all the 6.0 listed properties that are also in
-# earlier releases.  But these are commented out.  You can just uncomment the
-# ones you want, or use them as a template for adding entries for other
-# properties.
+# This program can generate tables from the Unihan database.  But that db
+# isn't normally available, so it is marked as optional.  Prior to version
+# 5.2, this database was in a single file, Unihan.txt.  In 5.2 the database
+# was split into 8 different files, all beginning with the letters 'Unihan'.
+# If you plunk those files down into the directory mktables ($0) is in, this
+# program will read them and automatically create tables for the properties
+# from it that are listed in PropertyAliases.txt and PropValueAliases.txt,
+# plus any you add to the @cjk_properties array and the @cjk_property_values
+# array, being sure to add necessary '# @missings' lines to the latter.  For
+# Unicode versions earlier than 5.2, most of the Unihan properties are not
+# listed at all in PropertyAliases nor PropValueAliases.  This program assumes
+# for these early releases that you want the properties that are specified in
+# the 5.2 release.
 #
 # You may need to adjust the entries to suit your purposes.  setup_unihan(),
 # and filter_unihan_line() are the functions where this is done.  This program
@@ -437,8 +425,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 #
 # There is a bug in the 3.2 data file in which some values for the
 # kPrimaryNumeric property have commas and an unexpected comment.  A filter
-# could be added for these; or for a particular installation, the Unihan.txt
-# file could be edited to fix them.
+# could be added to correct these; or for a particular installation, the
+# Unihan.txt file could be edited to fix them.
 #
 # HOW TO ADD A FILE TO BE PROCESSED
 #
@@ -484,13 +472,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # handled by Unicode::Normalize, nor will it compile when presented a version
 # that has them.  However, you can trivially get it to compile by simply
 # ignoring those decompositions, by changing the croak to a carp.  At the time
-# of this writing, the line (in cpan/Unicode-Normalize/mkheader) reads
+# of this writing, the line (in cpan/Unicode-Normalize/Normalize.pm or
+# cpan/Unicode-Normalize/mkheader) reads
 #
 #   croak("Weird Canonical Decomposition of U+$h");
 #
 # Simply comment it out.  It will compile, but will not know about any three
-# character decompositions.  If using the .pm version, there is a similar
-# line.
+# character decompositions.
 
 # The number of code points in \p{alpha=True} halved in 2.1.9.  It turns out
 # that the reason is that the CJK block starting at 4E00 was removed from
@@ -513,10 +501,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # name for the class, it would not have been affected, but if it used the
 # mnemonic, it would have been.
 #
-# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that code
+# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that, code
 # points which eventually came to have this script property value, instead
 # mapped to "Unknown".  But in the next release all these code points were
 # moved to \p{sc=common} instead.
+
+# The tests furnished  by Unicode for testing WordBreak and SentenceBreak
+# generate errors in 5.0 and earlier.
 #
 # The default for missing code points for BidiClass is complicated.  Starting
 # in 3.1.1, the derived file DBidiClass.txt handles this, but this program
@@ -596,8 +587,8 @@ our $to_trace = 0;
                                     || $caller_name eq 'trace');
 
         my $output = "";
+        #print STDERR __LINE__, ": ", join ", ", @input, "\n";
         foreach my $string (@input) {
-            #print STDERR __LINE__, ": ", join ", ", @input, "\n";
             if (ref $string eq 'ARRAY' || ref $string eq 'HASH') {
                 $output .= simple_dumper($string);
             }
@@ -623,10 +614,11 @@ our $to_trace = 0;
 
 # This is for a rarely used development feature that allows you to compare two
 # versions of the Unicode standard without having to deal with changes caused
-# by the code points introduced in the later version.  Change the 0 to a
-# string containing a SINGLE dotted Unicode release number (e.g. "2.1").  Only
-# code points introduced in that release and earlier will be used; later ones
-# are thrown away.  You use the version number of the earliest one you want to
+# by the code points introduced in the later version.  You probably also want
+# to use the -annotate option when using this.  Change the 0 to a string
+# containing a SINGLE dotted Unicode release number (e.g. "2.1").  Only code
+# points introduced in that release and earlier will be used; later ones are
+# thrown away.  You use the version number of the earliest one you want to
 # compare; then run this program on directory structures containing each
 # release, and compare the outputs.  These outputs will therefore include only
 # the code points common to both releases, and you can see the changes caused
@@ -861,33 +853,8 @@ if ($v_version gt v3.2.0) {
                                 'Canonical_Combining_Class=Attached_Below_Left'
 }
 
-# These are listed in the Property aliases file in 6.0, but Unihan is ignored
-# unless explicitly added.
-if ($v_version ge v5.2.0) {
-    my $unihan = 'Unihan; remove from list if using Unihan';
-    foreach my $table (qw (
-                           kAccountingNumeric
-                           kOtherNumeric
-                           kPrimaryNumeric
-                           kCompatibilityVariant
-                           kIICore
-                           kIRG_GSource
-                           kIRG_HSource
-                           kIRG_JSource
-                           kIRG_KPSource
-                           kIRG_MSource
-                           kIRG_KSource
-                           kIRG_TSource
-                           kIRG_USource
-                           kIRG_VSource
-                           kRSUnicode
-                        ))
-    {
-        $why_suppress_if_empty_warn_if_not{$table} = $unihan;
-    }
-}
-
-# Enum values for to_output_map() method in the Map_Table package.
+# Enum values for to_output_map() method in the Map_Table package. (0 is don't
+# output)
 my $EXTERNAL_MAP = 1;
 my $INTERNAL_MAP = 2;
 my $OUTPUT_ADJUSTED = 3;
@@ -913,13 +880,6 @@ my %global_to_output_map = (
     Decomposition_Type => 0,
 );
 
-# Properties that this program ignores.
-my @unimplemented_properties;
-
-# With this release, it is automatically handled if the Unihan db is
-# downloaded
-push @unimplemented_properties, 'Unicode_Radical_Stroke' if $v_version le v5.2.0;
-
 # There are several types of obsolete properties defined by Unicode.  These
 # must be hand-edited for every new Unicode release.
 my %why_deprecated;  # Generates a deprecated warning message if used.
@@ -959,8 +919,6 @@ my %why_obsolete;    # Documentation only
         # existence is not noted in the comment.
         'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or prop_invmap() or charprop() in Unicode::UCD::',
 
-        'Indic_Matra_Category' => "Withdrawn by Unicode while still provisional",
-
         # Don't suppress ISO_Comment, as otherwise special handling is needed
         # to differentiate between it and gc=c, which can be written as 'isc',
         # which is the same characters as ISO_Comment's short name.
@@ -1046,45 +1004,13 @@ if ($v_version ge v6.0.0) {
 my @output_mapped_properties = split "\n", <<END;
 END
 
-# If you are using the Unihan database in a Unicode version before 5.2, you
-# need to add the properties that you want to extract from it to this table.
-# For your convenience, the properties in the 6.0 PropertyAliases.txt file are
-# listed, commented out
+# If you want more Unihan properties than the default, you need to add them to
+# these arrays.  Depending on the property type, @missing lines might have to
+# be added to the second array.  A sample entry would be (including the '#'):
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
 my @cjk_properties = split "\n", <<'END';
-#cjkAccountingNumeric; kAccountingNumeric
-#cjkOtherNumeric; kOtherNumeric
-#cjkPrimaryNumeric; kPrimaryNumeric
-#cjkCompatibilityVariant; kCompatibilityVariant
-#cjkIICore ; kIICore
-#cjkIRG_GSource; kIRG_GSource
-#cjkIRG_HSource; kIRG_HSource
-#cjkIRG_JSource; kIRG_JSource
-#cjkIRG_KPSource; kIRG_KPSource
-#cjkIRG_KSource; kIRG_KSource
-#cjkIRG_TSource; kIRG_TSource
-#cjkIRG_USource; kIRG_USource
-#cjkIRG_VSource; kIRG_VSource
-#cjkRSUnicode; kRSUnicode                ; Unicode_Radical_Stroke; URS
 END
-
-# Similarly for the property values.  For your convenience, the lines in the
-# 6.0 PropertyAliases.txt file are listed.  Just remove the first BUT NOT both
-# '#' marks (for Unicode versions before 5.2)
 my @cjk_property_values = split "\n", <<'END';
-## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
-## @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
-## @missing: 0000..10FFFF; cjkIICore; <none>
-## @missing: 0000..10FFFF; cjkIRG_GSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_HSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_JSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KPSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_TSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_USource; <none>
-## @missing: 0000..10FFFF; cjkIRG_VSource; <none>
-## @missing: 0000..10FFFF; cjkOtherNumeric; NaN
-## @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
-## @missing: 0000..10FFFF; cjkRSUnicode; <none>
 END
 
 # The input files don't list every code point.  Those not listed are to be
@@ -1109,7 +1035,7 @@ my %default_mapping = (
     Decomposition_Type => 'None',
     East_Asian_Width => "Neutral",
     FC_NFKC_Closure => $CODE_POINT,
-    General_Category => 'Cn',
+    General_Category => ($v_version le 6.3.0) ? 'Cn' : 'Unassigned',
     Grapheme_Cluster_Break => 'Other',
     Hangul_Syllable_Type => 'NA',
     ISO_Comment => "",
@@ -1140,39 +1066,6 @@ my %default_mapping = (
     Word_Break => 'Other',
 );
 
-# Below are files that Unicode furnishes, but this program ignores, and why.
-# NormalizationCorrections.txt requires some more explanation.  It documents
-# the cumulative fixes to erroneous normalizations in earlier Unicode
-# versions.  Its main purpose is so that someone running on an earlier version
-# can use this file to override what got published in that earlier release.
-# It would be easy for mktables to read and handle this file.  But all the
-# corrections in it should already be in the other files for the release it
-# is.  To get it to actually mean something useful, someone would have to be
-# using an earlier Unicode release, and copy it to the files for that release
-# and recomplile.  So far there has been no demand to do that, so this hasn't
-# been implemented.
-my %ignored_files = (
-    'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points',
-    'Index.txt' => 'Alphabetical index of Unicode characters',
-    'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl',
-    'NamesList.txt' => 'Annotated list of characters',
-    'NamesList.html' => 'Describes the format and contents of F<NamesList.txt>',
-    'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base',
-    'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)',
-    'ReadMe.txt' => 'Documentation',
-    'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized.  This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
-    'StandardizedVariants.html' => 'Provides a visual display of the standard variant sequences derived from F<StandardizedVariants.txt>.',
-    'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
-    'USourceData.txt' => 'Documentation of status and cross reference of proposals for encoding by Unicode of Unihan characters',
-    'USourceGlyphs.pdf' => 'Pictures of the characters in F<USourceData.txt>',
-    'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
-);
-
-my %skipped_files;  # List of files that we skip
-
 ### End of externally interesting definitions, except for @input_file_objects
 
 my $HEADER=<<"EOF";
@@ -1199,7 +1092,9 @@ my $DEVELOPMENT_ONLY=<<"EOF";
 
 EOF
 
-my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF";
+my $MAX_UNICODE_CODEPOINT_STRING = ($v_version ge v2.0.0)
+                                   ? "10FFFF"
+                                   : "FFFF";
 my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING;
 my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1;
 
@@ -1229,8 +1124,7 @@ my $code_point_re = qr/\b$run_on_code_point_re/;
 # defaults for code points not listed (i.e., missing) in the file.  The code
 # depends on this ending with a semi-colon, so it can assume it is a valid
 # field when the line is split() by semi-colons
-my $missing_defaults_prefix =
-            qr/^#\s+\@missing:\s+0000\.\.$MAX_UNICODE_CODEPOINT_STRING\s*;/;
+my $missing_defaults_prefix = qr/^#\s+\@missing:\s+0000\.\.10FFFF\s*;/;
 
 # Property types.  Unicode has more types, but these are sufficient for our
 # purposes.
@@ -1307,11 +1201,15 @@ my $OBSOLETE = 'O';
 my $a_bold_obsolete = "an 'B<$OBSOLETE>'";
 my $A_bold_obsolete = "An 'B<$OBSOLETE>'";
 
+# Aliases can also have an extra status:
+my $INTERNAL_ALIAS = 'P';
+
 my %status_past_participles = (
     $DISCOURAGED => 'discouraged',
     $STABILIZED => 'stabilized',
     $OBSOLETE => 'obsolete',
     $DEPRECATED => 'deprecated',
+    $INTERNAL_ALIAS => 'reserved for Perl core internal use only',
 );
 
 # Table fates.  These are somewhat ordered, so that fates < $MAP_PROXIED should be
@@ -1374,18 +1272,21 @@ my %loose_to_file_of;       # loosely maps table names to their respective
                             # files
 my %stricter_to_file_of;    # same; but for stricter mapping.
 my %loose_property_to_file_of; # Maps a loose property name to its map file
+my %strict_property_to_file_of; # Same, but strict
 my @inline_definitions = "V0"; # Each element gives a definition of a unique
                             # inversion list.  When a definition is inlined,
                             # its value in the hash it's in (one of the two
                             # defined just above) will include an index into
                             # this array.  The 0th element is initialized to
-                            # the definition for a zero length invwersion list
+                            # the definition for a zero length inversion list
 my %file_to_swash_name;     # Maps the file name to its corresponding key name
                             # in the hash %utf8::SwashInfo
 my %nv_floating_to_rational; # maps numeric values floating point numbers to
                              # their rational equivalent
 my %loose_property_name_of; # Loosely maps (non_string) property names to
                             # standard form
+my %strict_property_name_of; # Strictly maps (non_string) property names to
+                            # standard form
 my %string_property_loose_to_name; # Same, for string properties.
 my %loose_defaults;         # keys are of form "prop=value", where 'prop' is
                             # the property name in standard loose form, and
@@ -1395,12 +1296,16 @@ my %loose_to_standard_value; # loosely maps table names to the canonical
                             # alias for them
 my %ambiguous_names;        # keys are alias names (in standard form) that
                             # have more than one possible meaning.
+my %combination_property;   # keys are alias names (in standard form) that
+                            # have both a map table, and a binary one that
+                            # yields true for all non-null maps.
 my %prop_aliases;           # Keys are standard property name; values are each
                             # one's aliases
 my %prop_value_aliases;     # Keys of top level are standard property name;
                             # values are keys to another hash,  Each one is
                             # one of the property's values, in standard form.
                             # The values are that prop-val's aliases.
+my %skipped_files;          # List of files that we skip
 my %ucd_pod;    # Holds entries that will go into the UCD section of the pod
 
 # Most properties are immune to caseless matching, otherwise you would get
@@ -1477,6 +1382,8 @@ my @named_sequences;       # NamedSequences.txt contents.
 my %potential_files;       # Generated list of all .txt files in the directory
                            # structure so we can warn if something is being
                            # ignored.
+my @missing_early_files;   # Generated list of absent files that we need to
+                           # proceed in compiling this early Unicode version
 my @files_actually_output; # List of files we generated.
 my @more_Names;            # Some code point names are compound; this is used
                            # to store the extra components of them.
@@ -1493,6 +1400,7 @@ my $block;
 my $perl_charname;
 my $print;
 my $All;
+my $Assigned;   # All assigned characters in this Unicode release
 my $script;
 
 # Are there conflicting names because of beginning with 'In_', or 'Is_'
@@ -1616,7 +1524,7 @@ sub populate_char_info ($) {
         elsif ($gc-> table('Unassigned')->contains($i)) {
             $annotate_char_type[$i] = $UNASSIGNED_TYPE;
             $printable[$i] = 0;
-            if ($v_version lt v2.0.0) { # No blocks in earliest releases
+            if (defined $block) { # No blocks in earliest releases
                 $viacode[$i] = 'Unassigned';
                 $end = $gc-> table('Unassigned')->containing_range($i)->end;
             }
@@ -1633,12 +1541,7 @@ sub populate_char_info ($) {
                                                     containing_range($i)->end);
             }
         }
-        elsif ($v_version lt v2.0.0) {  # No surrogates in earliest releases
-            $viacode[$i] = $gc->value_of($i);
-            $annotate_char_type[$i] = $UNKNOWN_TYPE;
-            $printable[$i] = 0;
-        }
-        elsif ($gc-> table('Surrogate')->contains($i)) {
+        elsif ($perl->table('_Perl_Surrogate')->contains($i)) {
             $viacode[$i] = 'Surrogate';
             $annotate_char_type[$i] = $SURROGATE_TYPE;
             $printable[$i] = 0;
@@ -2110,6 +2013,7 @@ package Input_file;
 # while(next_line()) {...} loop.
 #
 # You can also set up handlers to
+#   0) call during object construction time, after everything else is done
 #   1) call before the first line is read, for pre processing
 #   2) call to adjust each line of the input before the main handler gets
 #      them.  This can be automatically generated, if appropriately simple
@@ -2121,19 +2025,29 @@ package Input_file;
 # each_line_handler()s.  So, if the format of the line is not in the desired
 # format for the main handler, these are used to do that adjusting.  They can
 # be stacked (by enclosing them in an [ anonymous array ] in the constructor,
-# so the $_ output of one is used as the input to the next.  None of the other
-# handlers are stackable, but could easily be changed to be so.
+# so the $_ output of one is used as the input to the next.  The eof handler
+# is also stackable, but none of the others are, but could easily be changed
+# to be so.
+#
+# Some properties are used by the Perl core but aren't defined until later
+# Unicode releases.  The perl interpreter would have problems working when
+# compiled with an earlier Unicode version that doesn't have them, so we need
+# to define them somehow for those releases.  The 'Early' constructor
+# parameter can be used to automatically handle this.  It is essentially
+# ignored if the Unicode version being compiled has a data file for this
+# property.  Either code to execute or a file to read can be specified.
+# Details are at the %early definition.
 #
 # Most of the handlers can call insert_lines() or insert_adjusted_lines()
 # which insert the parameters as lines to be processed before the next input
-# file line is read.  This allows the EOF handler to flush buffers, for
+# file line is read.  This allows the EOF handler(s) to flush buffers, for
 # example.  The difference between the two routines is that the lines inserted
 # by insert_lines() are subjected to the each_line_handler()s.  (So if you
-# called it from such a handler, you would get infinite recursion.)  Lines
-# inserted by insert_adjusted_lines() go directly to the main handler without
-# any adjustments.  If the  post-processing handler calls any of these, there
-# will be no effect.  Some error checking for these conditions could be added,
-# but it hasn't been done.
+# called it from such a handler, you would get infinite recursion without some
+# mechanism to prevent that.)  Lines inserted by insert_adjusted_lines() go
+# directly to the main handler without any adjustments.  If the
+# post-processing handler calls any of these, there will be no effect.  Some
+# error checking for these conditions could be added, but it hasn't been done.
 #
 # carp_bad_line() should be called to warn of bad input lines, which clears $_
 # to prevent further processing of the line.  This routine will output the
@@ -2169,10 +2083,16 @@ sub trace { return main::trace(@_); }
     main::set_access('property', \%property, qw{ c r });
 
     my %optional;
-    # If this is true, the file is optional.  If not present, no warning is
-    # output.  If it is present, the string given by this parameter is
-    # evaluated, and if false the file is not processed.
-    main::set_access('optional', \%optional, 'c', 'r');
+    # This is either an unsigned number, or a list of property names.  In the
+    # former case, if it is non-zero, it means the file is optional, so if the
+    # file is absent, no warning about that is output.  In the latter case, it
+    # is a list of properties that the file (exclusively) defines.  If the
+    # file is present, tables for those properties will be produced; if
+    # absent, none will, even if they are listed elsewhere (namely
+    # PropertyAliases.txt and PropValueAliases.txt) as being in this release,
+    # and no warnings will be raised about them not being available.  (And no
+    # warning about the file itself will be raised.)
+    main::set_access('optional', \%optional, qw{ c readable_array } );
 
     my %non_skip;
     # This is used for debugging, to skip processing of all but a few input
@@ -2181,16 +2101,19 @@ sub trace { return main::trace(@_); }
     main::set_access('non_skip', \%non_skip, 'c');
 
     my %skip;
-    # This is used to skip processing of this input file semi-permanently,
-    # when it evaluates to true.  The value should be the reason the file is
-    # being skipped.  It is used for files that we aren't planning to process
-    # anytime soon, but want to allow to be in the directory and not raise a
-    # message that we are not handling.  Mostly for test files.  This is in
-    # contrast to the non_skip element, which is supposed to be used very
-    # temporarily for debugging.  Sets 'optional' to 1.  Also, files that we
-    # pretty much will never look at can be placed in the global
-    # %ignored_files instead.  Ones used here will be added to %skipped files
-    main::set_access('skip', \%skip, 'c');
+    # This is used to skip processing of this input file (semi-) permanently.
+    # The value should be the reason the file is being skipped.  It is used
+    # for files that we aren't planning to process anytime soon, but want to
+    # allow to be in the directory and be checked for their names not
+    # conflicting with any other files on a DOS 8.3 name filesystem, but to
+    # not otherwise be processed, and to not raise a warning about not being
+    # handled.  In the constructor call, any value that evaluates to a numeric
+    # 0 or undef means don't skip.  Any other value is a string giving the
+    # reason it is being skippped, and this will appear in generated pod.
+    # However, an empty string reason will suppress the pod entry.
+    # Internally, calls that evaluate to numeric 0 are changed into undef to
+    # distinguish them from an empty string call.
+    main::set_access('skip', \%skip, 'c', 'r');
 
     my %each_line_handler;
     # list of subroutines to look at and filter each non-comment line in the
@@ -2221,21 +2144,28 @@ sub trace { return main::trace(@_); }
     main::set_access('has_missings_defaults',
                                         \%has_missings_defaults, qw{ c r });
 
+    my %construction_time_handler;
+    # Subroutine to call at the end of the new method.  If undef, no such
+    # handler is called.
+    main::set_access('construction_time_handler',
+                                        \%construction_time_handler, qw{ c });
+
     my %pre_handler;
     # Subroutine to call before doing anything else in the file.  If undef, no
     # such handler is called.
     main::set_access('pre_handler', \%pre_handler, qw{ c });
 
     my %eof_handler;
-    # Subroutine to call upon getting an EOF on the input file, but before
+    # Subroutines to call upon getting an EOF on the input file, but before
     # that is returned to the main handler.  This is to allow buffers to be
     # flushed.  The handler is expected to call insert_lines() or
     # insert_adjusted() with the buffered material
-    main::set_access('eof_handler', \%eof_handler, qw{ c r });
+    main::set_access('eof_handler', \%eof_handler, qw{ c });
 
     my %post_handler;
     # Subroutine to call after all the lines of the file are read in and
-    # processed.  If undef, no such handler is called.
+    # processed.  If undef, no such handler is called.  Note that this cannot
+    # add lines to be processed; instead use eof_handler
     main::set_access('post_handler', \%post_handler, qw{ c });
 
     my %progress_message;
@@ -2263,6 +2193,69 @@ sub trace { return main::trace(@_); }
     # storage of '@missing' defaults lines
     main::set_access('missings', \%missings);
 
+    my %early;
+    # Used for properties that must be defined (for Perl's purposes) on
+    # versions of Unicode earlier than Unicode itself defines them.  The
+    # parameter is an array (it would be better to be a hash, but not worth
+    # bothering about due to its rare use).
+    #
+    # The first element is either a code reference to call when in a release
+    # earlier than the Unicode file is available in, or it is an alternate
+    # file to use instead of the non-existent one.  This file must have been
+    # plunked down in the same directory as mktables.  Should you be compiling
+    # on a release that needs such a file, mktables will abort the
+    # compilation, and tell you where to get the necessary file(s), and what
+    # name(s) to use to store them as.
+    # In the case of specifying an alternate file, the array must contain two
+    # further elements:
+    #
+    # [1] is the name of the property that will be generated by this file.
+    # The class automatically takes the input file and excludes any code
+    # points in it that were not assigned in the Unicode version being
+    # compiled.  It then uses this result to define the property in the given
+    # version.  Since the property doesn't actually exist in the Unicode
+    # version being compiled, this should be a name accessible only by core
+    # perl.  If it is the same name as the regular property, the constructor
+    # will mark the output table as a $PLACEHOLDER so that it doesn't actually
+    # get output, and so will be unusable by non-core code.  Otherwise it gets
+    # marked as $INTERNAL_ONLY.
+    #
+    # [2] is a property value to assign (only when compiling Unicode 1.1.5) to
+    # the Hangul syllables in that release (which were ripped out in version
+    # 2) for the given property .  (Hence it is ignored except when compiling
+    # version 1.  You only get one value that applies to all of them, which
+    # may not be the actual reality, but probably nobody cares anyway for
+    # these obsolete characters.)
+    #
+    # Not all files can be handled in the above way, and so the code ref
+    # alternative is available.  It can do whatever it needs to.  The other
+    # array elements are optional in this case, and the code is free to use or
+    # ignore them if they are present.
+    #
+    # Internally, the constructor unshifts a 0 or 1 onto this array to
+    # indicate if an early alternative is actually being used or not.  This
+    # makes for easier testing later on.
+    main::set_access('early', \%early, 'c');
+
+    my %required_even_in_debug_skip;
+    # debug_skip is used to speed up compilation during debugging by skipping
+    # processing files that are not needed for the task at hand.  However,
+    # some files pretty much can never be skipped, and this is used to specify
+    # that this is one of them.  In order to skip this file, the call to the
+    # constructor must be edited to comment out this parameter.
+    main::set_access('required_even_in_debug_skip',
+                     \%required_even_in_debug_skip, 'c');
+
+    my %withdrawn;
+    # Some files get removed from the Unicode DB.  This is a version object
+    # giving the first release without this file.
+    main::set_access('withdrawn', \%withdrawn, 'c');
+
+    my %in_this_release;
+    # Calculated value from %first_released and %withdrawn.  Are we compiling
+    # a Unicode release which includes this file?
+    main::set_access('in_this_release', \%in_this_release);
+
     sub _next_line;
     sub _next_line_with_remapped_range;
 
@@ -2275,22 +2268,23 @@ sub trace { return main::trace(@_); }
         # Set defaults
         $handler{$addr} = \&main::process_generic_property_file;
         $non_skip{$addr} = 0;
-        $skip{$addr} = 0;
+        $skip{$addr} = undef;
         $has_missings_defaults{$addr} = $NO_DEFAULTS;
         $handle{$addr} = undef;
         $added_lines{$addr} = [ ];
         $remapped_lines{$addr} = [ ];
         $each_line_handler{$addr} = [ ];
+        $eof_handler{$addr} = [ ];
         $errors{$addr} = { };
         $missings{$addr} = [ ];
+        $early{$addr} = [ ];
+        $optional{$addr} = [ ];
 
         # Two positional parameters.
         return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
         $file{$addr} = main::internal_file_to_platform(shift);
         $first_released{$addr} = shift;
 
-        undef $file{$addr} if $first_released{$addr} gt $v_version;
-
         # The rest of the arguments are key => value pairs
         # %constructor_fields has been set up earlier to list all possible
         # ones.  Either set or push, depending on how the default has been set
@@ -2322,30 +2316,206 @@ sub trace { return main::trace(@_); }
             delete $args{$key};
         };
 
-        # If the file has a property for it, it means that the property is not
-        # listed in the file's entries.  So add a handler to the list of line
-        # handlers to insert the property name into the lines, to provide a
-        # uniform interface to the final processing subroutine.
-        # the final code doesn't have to worry about that.
-        if ($property{$addr}) {
-            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
+        $non_skip{$addr} = 1 if $required_even_in_debug_skip{$addr};
+
+        # Convert 0 (meaning don't skip) to undef
+        undef $skip{$addr} unless $skip{$addr};
+
+        # Handle the case where this file is optional
+        my $pod_message_for_non_existent_optional = "";
+        if ($optional{$addr}->@*) {
+
+            # First element is the pod message
+            $pod_message_for_non_existent_optional
+                                                = shift $optional{$addr}->@*;
+            # Convert a 0 'Optional' argument to an empty list to make later
+            # code more concise.
+            if (   $optional{$addr}->@*
+                && $optional{$addr}->@* == 1
+                && $optional{$addr}[0] ne ""
+                && $optional{$addr}[0] !~ /\D/
+                && $optional{$addr}[0] == 0)
+            {
+                $optional{$addr} = [ ];
+            }
+            else {  # But if the only element doesn't evaluate to 0, make sure
+                    # that this file is indeed considered optional below.
+                unshift $optional{$addr}->@*, 1;
+            }
+        }
+
+        my $progress;
+        my $function_instead_of_file = 0;
+
+        # If we are compiling a Unicode release earlier than the file became
+        # available, the constructor may have supplied a substitute
+        if ($first_released{$addr} gt $v_version && $early{$addr}->@*) {
+
+            # Yes, we have a substitute, that we will use; mark it so
+            unshift $early{$addr}->@*, 1;
+
+            # See the definition of %early for what the array elements mean.
+            # If we have a property this defines, create a table and default
+            # map for it now (at essentially compile time), so that it will be
+            # available for the whole of run time.  (We will want to add this
+            # name as an alias when we are using the official property name;
+            # but this must be deferred until run(), because at construction
+            # time the official names have yet to be defined.)
+            if ($early{$addr}[2]) {
+                my $fate = ($property{$addr}
+                            && $property{$addr} eq $early{$addr}[2])
+                          ? $PLACEHOLDER
+                          : $INTERNAL_ONLY;
+                my $prop_object = Property->new($early{$addr}[2],
+                                                Fate => $fate,
+                                                Perl_Extension => 1,
+                                                );
+
+                # Use the default mapping for the regular property for this
+                # substitute one.
+                if (    defined $property{$addr}
+                    &&  defined $default_mapping{$property{$addr}})
+                {
+                    $prop_object
+                        ->set_default_map($default_mapping{$property{$addr}});
+                }
+            }
+
+            if (ref $early{$addr}[1] eq 'CODE') {
+                $function_instead_of_file = 1;
+
+                # If the first element of the array is a code ref, the others
+                # are optional.
+                $handler{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2]
+                                                if defined $early{$addr}[2];
+                $progress = "substitute $file{$addr}";
+
+                undef $file{$addr};
+            }
+            else {  # Specifying a substitute file
+
+                if (! main::file_exists($early{$addr}[1])) {
+
+                    # If we don't see the substitute file, generate an error
+                    # message giving the needed things, and add it to the list
+                    # of such to output before actual processing happens
+                    # (hence the user finds out all of them in one run).
+                    # Instead of creating a general method for NameAliases,
+                    # hard-code it here, as there is unlikely to ever be a
+                    # second one which needs special handling.
+                    my $string_version = ($file{$addr} eq "NameAliases.txt")
+                                    ? 'at least 6.1 (the later, the better)'
+                                    : sprintf "%vd", $first_released{$addr};
+                    push @missing_early_files, <<END;
+'$file{$addr}' version $string_version should be copied to '$early{$addr}[1]'.
+END
+                    ;
+                    return;
+                }
+                $progress = $early{$addr}[1];
+                $progress .= ", substituting for $file{$addr}" if $file{$addr};
+                $file{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2];
+
+                # Ignore code points not in the version being compiled
+                push $each_line_handler{$addr}->@*, \&_exclude_unassigned;
+
+                if (   $v_version lt v2.0        # Hanguls in this release ...
+                    && defined $early{$addr}[3]) # ... need special treatment
+                {
+                    push $eof_handler{$addr}->@*, \&_fixup_obsolete_hanguls;
+                }
+            }
+
+            # And this substitute is valid for all releases.
+            $first_released{$addr} = v0;
+        }
+        else {  # Normal behavior
+            $progress = $file{$addr};
+            unshift $early{$addr}->@*, 0; # No substitute
         }
 
-        if ($non_skip{$addr} && ! $debug_skip && $verbosity) {
-            print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
+        my $file = $file{$addr};
+        $progress_message{$addr} = "Processing $progress"
+                                            unless $progress_message{$addr};
+
+        # A file should be there if it is within the window of versions for
+        # which Unicode supplies it
+        if ($withdrawn{$addr} && $withdrawn{$addr} le $v_version) {
+            $in_this_release{$addr} = 0;
+            $skip{$addr} = "";
         }
+        else {
+            $in_this_release{$addr} = $first_released{$addr} le $v_version;
 
-        # If skipping, set to optional, and add to list of ignored files,
-        # including its reason
-        if ($skip{$addr}) {
-            $optional{$addr} = 1;
-            $skipped_files{$file{$addr}} = $skip{$addr} if $file{$addr};
+            # Check that the file for this object (possibly using a substitute
+            # for early releases) exists or we have a function alternative
+            if (   ! $function_instead_of_file
+                && ! main::file_exists($file))
+            {
+                # Here there is nothing available for this release.  This is
+                # fine if we aren't expecting anything in this release.
+                if (! $in_this_release{$addr}) {
+                    $skip{$addr} = "";  # Don't remark since we expected
+                                        # nothing and got nothing
+                }
+                elsif ($optional{$addr}->@*) {
+
+                    # Here the file is optional in this release; Use the
+                    # passed in text to document this case in the pod.
+                    $skip{$addr} = $pod_message_for_non_existent_optional;
+                }
+                elsif (   $in_this_release{$addr}
+                       && ! defined $skip{$addr}
+                       && defined $file)
+                { # Doesn't exist but should.
+                    $skip{$addr} = "'$file' not found.  Possibly Big problems";
+                    Carp::my_carp($skip{$addr});
+                }
+            }
+            elsif ($debug_skip && ! defined $skip{$addr} && ! $non_skip{$addr})
+            {
+
+                # The file exists; if not skipped for another reason, and we are
+                # skipping most everything during debugging builds, use that as
+                # the skip reason.
+                $skip{$addr} = '$debug_skip is on'
+            }
+        }
+
+        if (   ! $debug_skip
+            && $non_skip{$addr}
+            && ! $required_even_in_debug_skip{$addr}
+            && $verbosity)
+        {
+            print "Warning: " . __PACKAGE__ . " constructor for $file has useless 'non_skip' in it\n";
+        }
+
+        # Here, we have figured out if we will be skipping this file or not.
+        # If so, we add any single property it defines to any passed in
+        # optional property list.  These will be dealt with at run time.
+        if (defined $skip{$addr}) {
+            if ($property{$addr}) {
+                push $optional{$addr}->@*, $property{$addr};
+            }
+        } # Otherwise, are going to process the file.
+        elsif ($property{$addr}) {
+
+            # If the file has a property defined in the constructor for it, it
+            # means that the property is not listed in the file's entries.  So
+            # add a handler (to the list of line handlers) to insert the
+            # property name into the lines, to provide a uniform interface to
+            # the final processing subroutine.
+            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
         }
         elsif ($properties{$addr}) {
 
-            # Add a handler for each line in the input so that it creates a
-            # separate input line for each property in those input lines, thus
-            # making them suitable for process_generic_property_file().
+            # Similarly, there may be more than one property represented on
+            # each line, with no clue but the constructor input what those
+            # might be.  Add a handler for each line in the input so that it
+            # creates a separate input line for each property in those input
+            # lines, thus making them suitable to handle generically.
 
             push @{$each_line_handler{$addr}},
                  sub {
@@ -2376,7 +2546,7 @@ sub trace { return main::trace(@_); }
                 };
         }
 
-        {   # On non-ascii platforms, we use a special handler
+        {   # On non-ascii platforms, we use a special pre-handler
             no strict;
             no warnings 'once';
             *next_line = (main::NON_ASCII_PLATFORM)
@@ -2384,6 +2554,9 @@ sub trace { return main::trace(@_); }
                          : *_next_line;
         }
 
+        &{$construction_time_handler{$addr}}($self)
+                                        if $construction_time_handler{$addr};
+
         return $self;
     }
 
@@ -2401,13 +2574,13 @@ sub trace { return main::trace(@_); }
         return __PACKAGE__ . " object for " . $self->file;
     }
 
-    # flag to make sure extracted files are processed early
-    my $seen_non_extracted_non_age = 0;
-
     sub run {
         # Process the input object $self.  This opens and closes the file and
         # calls all the handlers for it.  Currently,  this can only be called
-        # once per file, as it destroy's the EOF handler
+        # once per file, as it destroy's the EOF handlers
+
+        # flag to make sure extracted files are processed early
+        state $seen_non_extracted_non_age = 0;
 
         my $self = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -2416,61 +2589,14 @@ sub trace { return main::trace(@_); }
 
         my $file = $file{$addr};
 
-        # Don't process if not expecting this file (because released later
-        # than this Unicode version), and isn't there.  This means if someone
-        # copies it into an earlier version's directory, we will go ahead and
-        # process it.
-        return if $first_released{$addr} gt $v_version
-                  && (! defined $file || ! -e $file);
-
-        # If in debugging mode and this file doesn't have the non-skip
-        # flag set, and isn't one of the critical files, skip it.
-        if ($debug_skip
-            && $first_released{$addr} ne v0
-            && ! $non_skip{$addr})
-        {
-            print "Skipping $file in debugging\n" if $verbosity;
-            return;
-        }
-
-        # File could be optional
-        if ($optional{$addr}) {
-            return unless -e $file;
-            my $result = eval $optional{$addr};
-            if (! defined $result) {
-                Carp::my_carp_bug("Got '$@' when tried to eval $optional{$addr}.  $file Skipped.");
-                return;
-            }
-            if (! $result) {
-                if ($verbosity) {
-                    print STDERR "Skipping processing input file '$file' because '$optional{$addr}' is not true\n";
-                }
-                return;
-            }
-        }
-
-        if (! defined $file || ! -e $file) {
-
-            # If the file doesn't exist, see if have internal data for it
-            # (based on first_released being 0).
-            if ($first_released{$addr} eq v0) {
-                $handle{$addr} = 'pretend_is_open';
-            }
-            else {
-                if (! $optional{$addr}  # File could be optional
-                    && $v_version ge $first_released{$addr})
-                {
-                    print STDERR "Skipping processing input file '$file' because not found\n" if $v_version ge $first_released{$addr};
-                }
-                return;
-            }
+        if (! $file) {
+            $handle{$addr} = 'pretend_is_open';
         }
         else {
-
-            # Here, the file exists.  Some platforms may change the case of
-            # its name
             if ($seen_non_extracted_non_age) {
-                if ($file =~ /$EXTRACTED/i) {
+                if ($file =~ /$EXTRACTED/i) # Some platforms may change the
+                                            # case of the file's name
+                {
                     Carp::my_carp_bug(main::join_lines(<<END
 $file should be processed just after the 'Prop...Alias' files, and before
 anything not in the $EXTRACTED_DIR directory.  Proceeding, but the results may
@@ -2480,7 +2606,10 @@ END
                 }
             }
             elsif ($EXTRACTED_DIR
-                    && $first_released{$addr} ne v0
+
+                    # We only do this check for generic property files
+                    && $handler{$addr} == \&main::process_generic_property_file
+
                     && $file !~ /$EXTRACTED/i
                     && lc($file) ne 'dage.txt')
             {
@@ -2490,71 +2619,137 @@ END
                 $seen_non_extracted_non_age = 1;
             }
 
-            # And mark the file as having being processed, and warn if it
+            # Mark the file as having being processed, and warn if it
             # isn't a file we are expecting.  As we process the files,
             # they are deleted from the hash, so any that remain at the
             # end of the program are files that we didn't process.
             my $fkey = File::Spec->rel2abs($file);
-            my $expecting = delete $potential_files{lc($fkey)};
+            my $exists = delete $potential_files{lc($fkey)};
+
+            Carp::my_carp("Was not expecting '$file'.")
+                                    if $exists && ! $in_this_release{$addr};
+
+            # If there is special handling for compiling Unicode releases
+            # earlier than the first one in which Unicode defines this
+            # property ...
+            if ($early{$addr}->@* > 1) {
+
+                # Mark as processed any substitute file that would be used in
+                # such a release
+                $fkey = File::Spec->rel2abs($early{$addr}[1]);
+                delete $potential_files{lc($fkey)};
+
+                # As commented in the constructor code, when using the
+                # official property, we still have to allow the publicly
+                # inaccessible early name so that the core code which uses it
+                # will work regardless.
+                if (! $early{$addr}[0] && $early{$addr}->@* > 2) {
+                    my $early_property_name = $early{$addr}[2];
+                    if ($property{$addr} ne $early_property_name) {
+                        main::property_ref($property{$addr})
+                                            ->add_alias($early_property_name);
+                    }
+                }
+            }
+
+            # We may be skipping this file ...
+            if (defined $skip{$addr}) {
 
-            Carp::my_carp("Was not expecting '$file'.") if
-                    ! $expecting
-                    && ! defined $handle{$addr};
+                # If the file isn't supposed to be in this release, there is
+                # nothing to do
+                if ($in_this_release{$addr}) {
+
+                    # But otherwise, we may print a message
+                    if ($debug_skip) {
+                        print STDERR "Skipping input file '$file'",
+                                     " because '$skip{$addr}'\n";
+                    }
+
+                    # And add it to the list of skipped files, which is later
+                    # used to make the pod
+                    $skipped_files{$file} = $skip{$addr};
+
+                    # The 'optional' list contains properties that are also to
+                    # be skipped along with the file.  (There may also be
+                    # digits which are just placeholders to make sure it isn't
+                    # an empty list
+                    foreach my $property ($optional{$addr}->@*) {
+                        next unless $property =~ /\D/;
+                        my $prop_object = main::property_ref($property);
+                        next unless defined $prop_object;
+                        $prop_object->set_fate($SUPPRESSED, $skip{$addr});
+                    }
+                }
 
-            # Having deleted from expected files, we can quit if not to do
-            # anything.  Don't print progress unless really want verbosity
-            if ($skip{$addr}) {
-                print "Skipping $file.\n" if $verbosity >= $VERBOSE;
                 return;
             }
 
-            # Open the file, converting the slashes used in this program
-            # into the proper form for the OS
+            # Here, we are going to process the file.  Open it, converting the
+            # slashes used in this program into the proper form for the OS
             my $file_handle;
             if (not open $file_handle, "<", $file) {
                 Carp::my_carp("Can't open $file.  Skipping: $!");
-                return 0;
+                return;
             }
             $handle{$addr} = $file_handle; # Cache the open file handle
 
-            if ($v_version ge v3.2.0 && lc($file) ne 'unicodedata.txt') {
+            # If possible, make sure that the file is the correct version.
+            # (This data isn't available on early Unicode releases or in
+            # UnicodeData.txt.)  We don't do this check if we are using a
+            # substitute file instead of the official one (though the code
+            # could be extended to do so).
+            if ($in_this_release{$addr}
+                && ! $early{$addr}[0]
+                && lc($file) ne 'unicodedata.txt')
+            {
                 if ($file !~ /^Unihan/i) {
-                    $_ = <$file_handle>;
-                    if ($_ !~ / - $string_version \. /x) {
-                        chomp;
-                        $_ =~ s/^#\s*//;
-                        die Carp::my_carp("File '$file' is version '$_'.  It should be version $string_version");
+
+                    # The non-Unihan files started getting version numbers in
+                    # 3.2, but some files in 4.0 are unchanged from 3.2, and
+                    # marked as 3.2.  4.0.1 is the first version where there
+                    # are no files marked as being from less than 4.0, though
+                    # some are marked as 4.0.  In versions after that, the
+                    # numbers are correct.
+                    if ($v_version ge v4.0.1) {
+                        $_ = <$file_handle>;    # The version number is in the
+                                                # very first line
+                        if ($_ !~ / - $string_version \. /x) {
+                            chomp;
+                            $_ =~ s/^#\s*//;
+
+                            # 4.0.1 had some valid files that weren't updated.
+                            if (! ($v_version eq v4.0.1 && $_ =~ /4\.0\.0/)) {
+                                die Carp::my_carp("File '$file' is version "
+                                                . "'$_'.  It should be "
+                                                . "version $string_version");
+                            }
+                        }
                     }
                 }
-                else {
+                elsif ($v_version ge v6.0.0) { # Unihan
+
+                    # Unihan files didn't get accurate version numbers until
+                    # 6.0.  The version is somewhere in the first comment
+                    # block
                     while (<$file_handle>) {
                         if ($_ !~ /^#/) {
-                            Carp::my_carp_bug("Could not find the expected version info in file '$file'");
+                            Carp::my_carp_bug("Could not find the expected "
+                                            . "version info in file '$file'");
                             last;
                         }
                         chomp;
                         $_ =~ s/^#\s*//;
                         next if $_ !~ / version: /x;
                         last if $_ =~ /$string_version/;
-                        die Carp::my_carp("File '$file' is '$_'.  It should be version $string_version");
+                        die Carp::my_carp("File '$file' is version "
+                                        . "'$_'.  It should be "
+                                        . "version $string_version");
                     }
                 }
             }
         }
 
-        if ($verbosity >= $PROGRESS) {
-            if ($progress_message{$addr}) {
-                print "$progress_message{$addr}\n";
-            }
-            else {
-                # If using a virtual file, say so.
-                print "Processing ", (-e $file)
-                                       ? $file
-                                       : "substitute $file",
-                                     "\n";
-            }
-        }
-
+        print "$progress_message{$addr}\n" if $verbosity >= $PROGRESS;
 
         # Call any special handler for before the file.
         &{$pre_handler{$addr}}($self) if $pre_handler{$addr};
@@ -2742,11 +2937,11 @@ END
             return 1;
         } # End of looping through lines.
 
-        # If there is an EOF handler, call it (only once) and if it generates
+        # If there are EOF handlers, call each (only once) and if it generates
         # more lines to process go back in the loop to handle them.
-        if ($eof_handler{$addr}) {
-            &{$eof_handler{$addr}}($self);
-            $eof_handler{$addr} = "";   # Currently only get one shot at it.
+        while ($eof_handler{$addr}->@*) {
+            &{$eof_handler{$addr}[0]}($self);
+            shift $eof_handler{$addr}->@*;   # Currently only get one shot at it.
             goto LINE if $added_lines{$addr};
         }
 
@@ -2943,6 +3138,82 @@ END
         return @return;
     }
 
+    sub _exclude_unassigned {
+
+        # Takes the range in $_ and excludes code points that aren't assigned
+        # in this release
+
+        state $skip_inserted_count = 0;
+
+        # Ignore recursive calls.
+        if ($skip_inserted_count) {
+            $skip_inserted_count--;
+            return;
+        }
+
+        # Find what code points are assigned in this release
+        main::calculate_Assigned() if ! defined $Assigned;
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my ($range, @remainder)
+            = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+
+        # Examine the range.
+        if ($range =~ /^ ($code_point_re) (?:\.\. ($code_point_re) )? $/x)
+        {
+            my $low = hex $1;
+            my $high = (defined $2) ? hex $2 : $low;
+
+            # Split the range into subranges of just those code points in it
+            # that are assigned.
+            my @ranges = (Range_List->new(Initialize
+                              => Range->new($low, $high)) & $Assigned)->ranges;
+
+            # Do nothing if nothing in the original range is assigned in this
+            # release; handle normally if everything is in this release.
+            if (! @ranges) {
+                $_ = "";
+            }
+            elsif (@ranges != 1) {
+
+                # Here, some code points in the original range aren't in this
+                # release; @ranges gives the ones that are.  Create fake input
+                # lines for each of the ranges, and set things up so that when
+                # this routine is called on that fake input, it will do
+                # nothing.
+                $skip_inserted_count = @ranges;
+                my $remainder = join ";", @remainder;
+                for my $range (@ranges) {
+                    $self->insert_lines(sprintf("%04X..%04X;%s",
+                                    $range->start, $range->end, $remainder));
+                }
+                $_ = "";    # The original range is now defunct.
+            }
+        }
+
+        return;
+    }
+
+    sub _fixup_obsolete_hanguls {
+
+        # This is called only when compiling Unicode version 1.  All Unicode
+        # data for subsequent releases assumes that the code points that were
+        # Hangul syllables in this release only are something else, so if
+        # using such data, we have to override it
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $object = main::property_ref($property{$addr});
+        $object->add_map(0x3400, 0x4DFF,
+                         $early{$addr}[3],  # Passed-in value for these
+                         Replace => $UNCONDITIONALLY);
+    }
+
     sub _insert_property_into_line {
         # Add a property field to $_, if this file requires it.
 
@@ -3027,6 +3298,8 @@ package Multi_Default;
         #        .
         #        .
         #        'U'));
+        # It is best to leave the final value be the one that matches the
+        # above-Unicode code points.
 
         my $class = shift;
 
@@ -3732,7 +4005,7 @@ sub trace { return main::trace(@_); }
         #       => $MULTIPLE_BEFORE means that if this range duplicates an
         #                         existing one, but has a different value,
         #                         don't replace the existing one, but insert
-        #                         this, one so that the same range can occur
+        #                         this one so that the same range can occur
         #                         multiple times.  They are stored LIFO, so
         #                         that the final one inserted is the first one
         #                         returned in an ordered search of the table.
@@ -3747,6 +4020,7 @@ sub trace { return main::trace(@_); }
         #                         existing range, this one is discarded
         #                         (leaving the existing one in its original,
         #                         higher priority position
+        #       => $CROAK         Die with an error if is already there
         #       => anything else  is the same as => $IF_NOT_EQUIVALENT
         #
         # "same value" means identical for non-type-0 ranges, and it means
@@ -3835,7 +4109,7 @@ sub trace { return main::trace(@_); }
 
                 # Here, the new range starts just after the current highest in
                 # the range list, and they have the same type and value.
-                # Extend the current range to incorporate the new one.
+                # Extend the existing range to incorporate the new one.
                 @{$r}[-1]->set_end($end);
             }
 
@@ -4228,7 +4502,7 @@ sub trace { return main::trace(@_); }
             # In other words,
             #   r[$i-1]->end < $start <= r[$i]->end
             # And:
-            #   r[$i-1]->end < $start <= $end <= r[$j+1]->start
+            #   r[$i-1]->end < $start <= $end < r[$j+1]->start
             #
             # Also:
             #   $clean_insert is a boolean which is set true if and only if
@@ -5093,6 +5367,7 @@ sub trace { return main::trace(@_); }
         my $note = delete $args{'Note'};
         my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
         my $perl_extension = delete $args{'Perl_Extension'};
+        my $suppression_reason = delete $args{'Suppression_Reason'};
 
         # Shouldn't have any left over
         Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5134,11 +5409,12 @@ END
         {
             $fate{$addr} = $SUPPRESSED;
         }
-        elsif ($fate{$addr} == $SUPPRESSED
-               && ! exists $why_suppressed{$property{$addr}->complete_name})
-        {
-            Carp::my_carp_bug("There is no current capability to set the reason for suppressing.");
-            # perhaps Fate => [ $SUPPRESSED, "reason" ]
+        elsif ($fate{$addr} == $SUPPRESSED) {
+            Carp::my_carp_bug("Need reason for suppressing") unless $suppression_reason;
+            # Though currently unused
+        }
+        elsif ($suppression_reason) {
+            Carp::my_carp_bug("A reason was given for suppressing, but not suppressed");
         }
 
         # If hasn't set its status already, see if it is on one of the
@@ -5266,17 +5542,18 @@ END
         my %args = @_;
         my $loose_match = delete $args{'Fuzzy'};
 
-        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
-        $make_re_pod_entry = $YES unless defined $make_re_pod_entry;
-
         my $ok_as_filename = delete $args{'OK_as_Filename'};
         $ok_as_filename = 1 unless defined $ok_as_filename;
 
-        my $status = delete $args{'Status'};
-        $status = $NORMAL unless defined $status;
-
         # An internal name does not get documented, unless overridden by the
-        # input.
+        # input; same for making tests for it.
+        my $status = delete $args{'Status'} || (($name =~ /^_/)
+                                                ? $INTERNAL_ALIAS
+                                                : $NORMAL);
+        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'}
+                                            // (($status ne $INTERNAL_ALIAS)
+                                               ? (($name =~ /^_/) ? $NO : $YES)
+                                               : $NO);
         my $ucd = delete $args{'UCD'} // (($name =~ /^_/) ? 0 : 1);
 
         Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5346,7 +5623,7 @@ END
                 $insert_position,
                 0,
                 Alias->new($name, $loose_match, $make_re_pod_entry,
-                                                $ok_as_filename, $status, $ucd);
+                           $ok_as_filename, $status, $ucd);
 
         # This name may be shorter than any existing ones, so clear the cache
         # of the shortest, so will have to be recalculated.
@@ -6267,7 +6544,8 @@ END
         }
 
         # Save the reason for suppression for output
-        if ($fate == $SUPPRESSED && defined $reason) {
+        if ($fate >= $SUPPRESSED) {
+            $reason = "" unless defined $reason;
             $why_suppressed{$complete_name{$addr}} = $reason;
         }
 
@@ -6763,7 +7041,7 @@ END
         # The ranges that map to the default aren't output, so subtract that
         # to get those actually output.  A property with matching tables
         # already has the information calculated.
-        if ($property->type != $STRING) {
+        if ($property->type != $STRING && $property->type != $FORCED_BINARY) {
             $count -= $property->table($default_map)->count;
         }
         elsif (defined $default_map) {
@@ -6839,9 +7117,11 @@ END
         $comment .= "This file returns the $mapping:\n";
 
         my $ucd_accessible_name = "";
+        my $has_underscore_name = 0;
         my $full_name = $self->property->full_name;
         for my $i (0 .. @property_aliases - 1) {
             my $name = $property_aliases[$i]->name;
+            $has_underscore_name = 1 if $name =~ /^_/;
             $comment .= sprintf("%-8s%s\n", " ", $name . '(cp)');
             if ($property_aliases[$i]->ucd) {
                 if ($name eq $full_name) {
@@ -6854,7 +7134,12 @@ END
         }
         $comment .= "\nwhere 'cp' is $cp.";
         if ($ucd_accessible_name) {
-            $comment .= "  Note that $these_mappings $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+            $comment .= "  Note that $these_mappings";
+            if ($has_underscore_name) {
+                $comment .= " (except for the one(s) that begin with an underscore)";
+            }
+            $comment .= " $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+
         }
 
         # And append any commentary already set from the actual property.
@@ -6888,7 +7173,7 @@ END
                 # There are tables which end up only having one element per
                 # range, but it is not worth keeping track of for making just
                 # this comment a little better.
-                $comment.= <<END;
+                $comment .= <<END;
 non-comment portions of the main body of lines of this file is:
 START\\tSTOP\\tMAPPING where START is the starting code point of the
 range, in hex; STOP is the ending point, or if omitted, the range has just one
@@ -7848,7 +8133,6 @@ END
                             main::uniques($leader, @{$equivalents{$addr}});
         my $has_unrelated = (@parents >= 2);  # boolean, ? are there unrelated
                                               # tables
-
         for my $parent (@parents) {
 
             my $property = $parent->property;
@@ -7906,7 +8190,7 @@ END
                 # commentary that the other combinations are possible.
                 # Because regular expressions don't recognize things like
                 # \p{jsn=}, only look at non-null right-hand-sides
-                my @property_aliases = $table_property->aliases;
+                my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table_property->aliases;
                 my @table_aliases = grep { $_->name ne "" } $table->aliases;
 
                 # The alias lists above are already ordered in the order we
@@ -7918,8 +8202,7 @@ END
                                     ?  main::max(scalar @table_aliases,
                                                  scalar @property_aliases)
                                     : 0;
-                trace "$listed_combos, tables=", scalar @table_aliases, "; names=", scalar @property_aliases if main::DEBUG;
-
+                trace "$listed_combos, tables=", scalar @table_aliases, "; property names=", scalar @property_aliases if main::DEBUG;
 
                 my $property_had_compound_name = 0;
 
@@ -8070,8 +8353,14 @@ END
             foreach my $flag (sort keys %flags) {
                 $comment .= <<END;
 '$flag' below means that this form is $flags{$flag}.
-Consult $pod_file.pod
 END
+                if ($flag eq $INTERNAL_ALIAS) {
+                    $comment .= "DO NOT USE!!!";
+                }
+                else {
+                    $comment .= "Consult $pod_file.pod";
+                }
+                $comment .= "\n";
             }
             $comment .= "\n";
         }
@@ -8436,18 +8725,27 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
             $perl_extension
                         = $self->perl_extension if ! defined $perl_extension;
 
+            my $fate;
+            my $suppression_reason = "";
+            if ($self->name =~ /^_/) {
+                $fate = $SUPPRESSED;
+                $suppression_reason = "Parent property is internal only";
+            }
+            elsif ($self->fate >= $SUPPRESSED) {
+                $fate = $self->fate;
+                $suppression_reason = $why_suppressed{$self->complete_name};
+
+            }
+            elsif ($name =~ /^_/) {
+                $fate = $INTERNAL_ONLY;
+            }
             $table = Match_Table->new(
                                 Name => $name,
                                 Perl_Extension => $perl_extension,
                                 _Alias_Hash => $table_ref{$addr},
                                 _Property => $self,
-
-                                # gets property's fate and status by default,
-                                # except if the name begind with an
-                                # underscore, default it to internal
-                                Fate => ($name =~ /^_/)
-                                         ? $INTERNAL_ONLY
-                                         : $self->fate,
+                                Fate => $fate,
+                                Suppression_Reason => $suppression_reason,
                                 Status => $self->status,
                                 _Status_Info => $self->status_info,
                                 %args);
@@ -8562,10 +8860,13 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         # Swash names are used only on either
         # 1) legacy-only properties, because the formats for these are
         #    unchangeable, and they have had these lines in them; or
-        # 2) regular map tables; otherwise there should be no access to the
+        # 2) regular or internal-only map tables
+        # 3) otherwise there should be no access to the
         #    property map table from other parts of Perl.
         return if $map{$addr}->fate != $ORDINARY
-                  && $map{$addr}->fate != $LEGACY_ONLY;
+                  && $map{$addr}->fate != $LEGACY_ONLY
+                  && ! ($map{$addr}->name =~ /^_/
+                        && $map{$addr}->fate == $INTERNAL_ONLY);
 
         return $file{$addr} if defined $file{$addr};
         return $map{$addr}->external_name;
@@ -8589,9 +8890,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         # to it.
         return 0 if $type{$addr} == $STRING;
 
-        # Don't generate anything for unimplemented properties.
-        return 0 if grep { $self->complete_name eq $_ }
-                                                    @unimplemented_properties;
         # Otherwise, do.
         return 1;
     }
@@ -8808,7 +9106,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
         my $addr = do { no overloading; pack 'J', $self; };
-        if ($fate == $SUPPRESSED) {
+        if ($fate >= $SUPPRESSED) {
             $why_suppressed{$self->complete_name} = $reason;
         }
 
@@ -8894,15 +9192,15 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
 
 package main;
 
-    sub display_chr {
-        # Converts an ordinal printable character value to a displayable
-        # string, using a dotted circle to hold combining characters.
+sub display_chr {
+    # Converts an ordinal printable character value to a displayable string,
+    # using a dotted circle to hold combining characters.
 
-        my $ord = shift;
-        my $chr = chr $ord;
-        return $chr if $ccc->table(0)->contains($ord);
-        return "\x{25CC}$chr";
-    }
+    my $ord = shift;
+    my $chr = chr $ord;
+    return $chr if $ccc->table(0)->contains($ord);
+    return "\x{25CC}$chr";
+}
 
 sub join_lines($) {
     # Returns lines of the input joined together, so that they can be folded
@@ -9443,7 +9741,6 @@ sub dump_inside_out {
 
     my $object = shift;
     my $fields_ref = shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
     my $addr = do { no overloading; pack 'J', $object; };
 
@@ -9520,6 +9817,17 @@ sub _operator_not_equal {
     return ! _operator_equal($self, $other);
 }
 
+sub substitute_PropertyAliases($) {
+    # Deal with early releases that don't have the crucial PropertyAliases.txt
+    # file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_aliases());
+
+    process_PropertyAliases($file_object);
+}
+
+
 sub process_PropertyAliases($) {
     # This reads in the PropertyAliases.txt file, which contains almost all
     # the character properties in Unicode and their equivalent aliases:
@@ -9532,11 +9840,6 @@ sub process_PropertyAliases($) {
     my $file= shift;
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This whole file was non-existent in early releases, so use our own
-    # internal one.
-    $file->insert_lines(get_old_property_aliases())
-                                                if ! -e 'PropertyAliases.txt';
-
     # Add any cjk properties that may have been defined.
     $file->insert_lines(@cjk_properties);
 
@@ -9546,8 +9849,17 @@ sub process_PropertyAliases($) {
 
         my $full = $data[1];
 
+        # This line is defective in early Perls.  The property in Unihan.txt
+        # is kRSUnicode.
+        if ($full eq 'Unicode_Radical_Stroke' && @data < 3) {
+            push @data, qw(cjkRSUnicode kRSUnicode);
+        }
+
         my $this = Property->new($data[0], Full_Name => $full);
 
+        $this->set_fate($SUPPRESSED, $why_suppressed{$full})
+                                                    if $why_suppressed{$full};
+
         # Start looking for more aliases after these two.
         for my $i (2 .. @data - 1) {
             $this->add_alias($data[$i]);
@@ -9573,18 +9885,6 @@ sub finish_property_setup {
         Property->new('JSN', Full_Name => 'Jamo_Short_Name');
     }
 
-    # These two properties must be defined in all releases so we can generate
-    # the tables from them to make regex \X work, but suppress their output so
-    # aren't application visible prior to releases where they should be
-    if (! defined property_ref('GCB')) {
-        Property->new('GCB', Full_Name => 'Grapheme_Cluster_Break',
-                      Fate => $PLACEHOLDER);
-    }
-    if (! defined property_ref('hst')) {
-        Property->new('hst', Full_Name => 'Hangul_Syllable_Type',
-                      Fate => $PLACEHOLDER);
-    }
-
     # These are used so much, that we set globals for them.
     $gc = property_ref('General_Category');
     $block = property_ref('Block');
@@ -9701,22 +10001,15 @@ sub finish_property_setup {
     # for non-assigned code points; 'AL' for assigned.
     if (file_exists("${EXTRACTED}DLineBreak.txt") || -e 'LineBreak.txt') {
         my $lb = property_ref('Line_Break');
-        if ($v_version gt 3.2.0) {
+        if (file_exists("${EXTRACTED}DLineBreak.txt")) {
             $lb->set_default_map('Unknown');
         }
         else {
-            my $default = Multi_Default->new( 'Unknown' => '$gc->table("Cn")',
-                                              'AL');
+            my $default = Multi_Default->new('AL' => '~ $gc->table("Cn")',
+                                             'Unknown',
+                                            );
             $lb->set_default_map($default);
         }
-
-        # If has the URS property, make sure that the standard aliases are in
-        # it, since not in the input tables in some versions.
-        my $urs = property_ref('Unicode_Radical_Stroke');
-        if (defined $urs) {
-            $urs->add_alias('cjkRSUnicode');
-            $urs->add_alias('kRSUnicode');
-        }
     }
 
     # For backwards compatibility with applications that may read the mapping
@@ -9874,6 +10167,16 @@ END
     return @return;
 }
 
+sub substitute_PropValueAliases($) {
+    # Deal with early releases that don't have the crucial
+    # PropValueAliases.txt file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_value_aliases());
+
+    process_PropValueAliases($file_object);
+}
+
 sub process_PropValueAliases {
     # This file contains values that properties look like:
     # bc ; AL        ; Arabic_Letter
@@ -9899,35 +10202,29 @@ sub process_PropValueAliases {
     my $file= shift;
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This whole file was non-existent in early releases, so use our own
-    # internal one if necessary.
-    if (! -e 'PropValueAliases.txt') {
-        $file->insert_lines(get_old_property_value_aliases());
-    }
-
     if ($v_version lt 4.0.0) {
         $file->insert_lines(split /\n/, <<'END'
-hst; L                                ; Leading_Jamo
-hst; LV                               ; LV_Syllable
-hst; LVT                              ; LVT_Syllable
-hst; NA                               ; Not_Applicable
-hst; T                                ; Trailing_Jamo
-hst; V                                ; Vowel_Jamo
+Hangul_Syllable_Type; L                                ; Leading_Jamo
+Hangul_Syllable_Type; LV                               ; LV_Syllable
+Hangul_Syllable_Type; LVT                              ; LVT_Syllable
+Hangul_Syllable_Type; NA                               ; Not_Applicable
+Hangul_Syllable_Type; T                                ; Trailing_Jamo
+Hangul_Syllable_Type; V                                ; Vowel_Jamo
 END
         );
     }
     if ($v_version lt 4.1.0) {
         $file->insert_lines(split /\n/, <<'END'
-GCB; CN                               ; Control
-GCB; CR                               ; CR
-GCB; EX                               ; Extend
-GCB; L                                ; L
-GCB; LF                               ; LF
-GCB; LV                               ; LV
-GCB; LVT                              ; LVT
-GCB; T                                ; T
-GCB; V                                ; V
-GCB; XX                               ; Other
+_Perl_GCB; CN                               ; Control
+_Perl_GCB; CR                               ; CR
+_Perl_GCB; EX                               ; Extend
+_Perl_GCB; L                                ; L
+_Perl_GCB; LF                               ; LF
+_Perl_GCB; LV                               ; LV
+_Perl_GCB; LVT                              ; LVT
+_Perl_GCB; T                                ; T
+_Perl_GCB; V                                ; V
+_Perl_GCB; XX                               ; Other
 END
         );
     }
@@ -9942,7 +10239,6 @@ END
     # program generates for this block property value
     #$file->insert_lines('blk; n/a; Herited');
 
-
     # Process each line of the file ...
     while ($file->next_line) {
 
@@ -9959,6 +10255,11 @@ END
         # thus shifting the former field 0 to after them.)
         splice (@data, 0, 0, splice(@data, 1, 2)) if $property eq 'ccc';
 
+        if ($v_version le v5.0.0 && $property eq 'blk' && $data[1] =~ /-/) {
+            my $new_style = $data[1] =~ s/-/_/gr;
+            splice @data, 1, 0, $new_style;
+        }
+
         # Field 0 is a short name unless "n/a"; field 1 is the full name.  If
         # there is no short name, use the full one in element 1
         if ($data[0] eq "n/a") {
@@ -10552,7 +10853,8 @@ sub output_perl_charnames_line ($$) {
                                                             $line));
                     }
 
-                    # And process the first range, like any other.
+                    # And set things up so that the below will process this first
+                    # range, like any other.
                     $low = $this_range->start;
                     $high = $this_range->end;
                 }
@@ -10963,11 +11265,12 @@ END
         my $file = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-        # Create a new property specially located that is a combination of the
+        # Create a new property specially located that is a combination of
         # various Name properties: Name, Unicode_1_Name, Named Sequences, and
-        # Name_Alias properties.  (The final duplicates elements of the
-        # first.)  A comment for it will later be constructed based on the
-        # actual properties present and used
+        # _Perl_Name_Alias properties.  (The final one duplicates elements of the
+        # first, and starting in v6.1, is the same as the 'Name_Alias
+        # property.)  A comment for the new property will later be constructed
+        # based on the actual properties present and used
         $perl_charname = Property->new('Perl_Charnames',
                        Default_Map => "",
                        Directory => File::Spec->curdir(),
@@ -12028,10 +12331,8 @@ sub filter_old_style_case_folding {
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
     my @fields = split /\s*;\s*/;
-    if ($fields[0] =~ /^ 013 [01] $/x) { # The two turkish fields
-        $fields[1] = 'I';
-    }
-    elsif ($fields[1] eq 'L') {
+
+    if ($fields[1] eq 'L') {
         $fields[1] = 'C';             # L => C always
     }
     elsif ($fields[1] eq 'E') {
@@ -12339,6 +12640,68 @@ sub filter_numeric_value_line {
 { # Closure
     my %unihan_properties;
 
+    sub construct_unihan {
+
+        my $file_object = shift;
+
+        return unless file_exists($file_object->file);
+
+        if ($v_version lt v4.0.0) {
+            push @cjk_properties, 'URS ; Unicode_Radical_Stroke';
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none>
+END
+        }
+
+        if ($v_version ge v3.0.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkIRG_GSource; kIRG_GSource
+cjkIRG_JSource; kIRG_JSource
+cjkIRG_KSource; kIRG_KSource
+cjkIRG_TSource; kIRG_TSource
+cjkIRG_VSource; kIRG_VSource
+END
+        push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkIRG_GSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_JSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_KSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_TSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_VSource; <none>
+END
+        }
+        if ($v_version ge v3.1.0) {
+            push @cjk_properties, 'cjkIRG_HSource; kIRG_HSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_HSource; <none>';
+        }
+        if ($v_version ge v3.1.1) {
+            push @cjk_properties, 'cjkIRG_KPSource; kIRG_KPSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_KPSource; <none>';
+        }
+        if ($v_version ge v3.2.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkAccountingNumeric; kAccountingNumeric
+cjkCompatibilityVariant; kCompatibilityVariant
+cjkOtherNumeric; kOtherNumeric
+cjkPrimaryNumeric; kPrimaryNumeric
+END
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
+# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
+# @missing: 0000..10FFFF; cjkOtherNumeric; NaN
+# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
+END
+        }
+        if ($v_version gt v4.0.0) {
+            push @cjk_properties, 'cjkIRG_USource; kIRG_USource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_USource; <none>';
+        }
+
+        if ($v_version ge v4.1.0) {
+            push @cjk_properties, 'cjkIICore ; kIICore';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIICore; <none>';
+        }
+    }
+
     sub setup_unihan {
         # Do any special setup for Unihan properties.
 
@@ -12351,16 +12714,16 @@ sub filter_numeric_value_line {
         my $iicore = property_ref('kIICore');
         if (defined $iicore) {
             $iicore->set_type($FORCED_BINARY);
-            $iicore->table("Y")->add_note("Forced to a binary property as per unicode.org UAX #38.");
+            $iicore->table("Y")->add_note("Matches any code point which has a non-null value for this property; see unicode.org UAX #38.");
 
             # Unicode doesn't include the maps for this property, so don't
             # warn that they are missing.
             $iicore->set_pre_declared_maps(0);
             $iicore->add_comment(join_lines( <<END
-This property contains enum values, but Unicode UAX #38 says it should be
-interpreted as binary, so Perl creates tables for both 1) its enum values,
-plus 2) true/false tables in which it is considered true for all code points
-that have a non-null value
+This property contains string values, but any non-empty ones are considered to
+be 'core', so Perl creates tables for both: 1) its string values, plus 2)
+tables so that \\p{kIICore} matches any code point which has a non-empty
+value for this property.
 END
             ));
         }
@@ -12439,7 +12802,7 @@ sub filter_blocks_lines {
 
     # Change hyphens and blanks in the block name field only
     $fields[1] =~ s/[ -]/_/g;
-    $fields[1] =~ s/_ ( [a-z] ) /_\u$1/g;   # Capitalize first letter of word
+    $fields[1] =~ s/_ ( [a-z] ) /_\u$1/xg;   # Capitalize first letter of word
 
     $_ = join("; ", @fields);
     return;
@@ -12657,12 +13020,22 @@ sub generate_hst {
 END
 );
 
-    # The Hangul syllables in version 1 are completely different than what came
-    # after, so just ignore them there.
+    # The Hangul syllables in version 1 are at different code points than
+    # those that came along starting in version 2, and have different names;
+    # they comprise about 60% of the code points of the later version.
+    # From my (khw) research on them (see <558493EB.4000807@att.net>), the
+    # initial set is a subset of the later version, with different English
+    # transliterations.  I did not see an easy mapping between them.  The
+    # later set includes essentially all possibilities, even ones that aren't
+    # in modern use (if they ever were), and over 96% of the new ones are type
+    # LVT.  Mathematically, the early set must also contain a preponderance of
+    # LVT values.  In lieu of doing nothing, we just set them all to LVT, and
+    # expect that this will be right most of the time, which is better than
+    # not being right at all.
     if ($v_version lt v2.0.0) {
         my $property = property_ref($file->property);
+        $file->insert_lines("3400..4DFF; LVT\n");
         push @tables_that_may_be_empty, $property->table('LV')->complete_name;
-        push @tables_that_may_be_empty, $property->table('LVT')->complete_name;
         return;
     }
 
@@ -12712,7 +13085,6 @@ sub generate_GCB {
 
     # Also from http://www.unicode.org/reports/tr29/tr29-3.html.
     foreach my $code_point ( qw{
-                                40000
                                 09BE 09D7 0B3E 0B57 0BBE 0BD7 0CC2 0CD5 0CD6
                                 0D3E 0D57 0DCF 0DDF FF9E FF9F 1D165 1D16E 1D16F
                                 }
@@ -12735,275 +13107,50 @@ sub generate_GCB {
         generate_hst($file);
     }
 
-    return;
+    main::process_generic_property_file($file);
 }
 
-sub setup_early_name_alias {
-    my $file= shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This has the effect of pretending that the Name_Alias property was
-    # available in all Unicode releases.  Strictly speaking, this property
-    # should not be availabe in early releases, but doing this allows
-    # charnames.pm to work on older releases without change.  Prior to v5.16
-    # it had these names hard-coded inside it.  Unicode 6.1 came along and
-    # created these names, and so they were removed from charnames.
+sub fixup_early_perl_name_alias {
 
-    my $aliases = property_ref('Name_Alias');
-    if (! defined $aliases) {
-        $aliases = Property->new('Name_Alias', Default_Map => "");
-    }
+    # Different versions of Unicode have varying support for the name synonyms
+    # below.  Just include everything.  As of 6.1, all these are correct in
+    # the Unicode-supplied file.
 
-    $file->insert_lines(get_old_name_aliases());
+    my $file= shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    return;
-}
 
-sub get_old_name_aliases () {
+    # ALERT did not come along until 6.0, at which point it became preferred
+    # over BELL.  By inserting it last in early releases, BELL is preferred
+    # over it; and vice-vers in 6.0
+    my $type_for_bell = ($v_version lt v6.0.0)
+               ? 'correction'
+               : 'alternate';
+    $file->insert_lines(split /\n/, <<END
+0007;BELL; $type_for_bell
+000A;LINE FEED (LF);alternate
+000C;FORM FEED (FF);alternate
+000D;CARRIAGE RETURN (CR);alternate
+0085;NEXT LINE (NEL);alternate
+END
+
+    );
 
-    # The Unicode_1_Name field, contains most of these names.  One would
-    # expect, given the field's name, that its values would be fixed across
-    # versions, giving the true Unicode version 1 name for the character.
-    # Sadly, this is not the case.  Actually Version 1.1.5 had no names for
-    # any of the controls; Version 2.0 introduced names for the C0 controls,
-    # and 3.0 introduced C1 names.  3.0.1 removed the name INDEX; and 3.2
-    # changed some names: it
+    # One might think that the the 'Unicode_1_Name' field, could work for most
+    # of the above names, but sadly that field varies depending on the
+    # release.  Version 1.1.5 had no names for any of the controls; Version
+    # 2.0 introduced names for the C0 controls, and 3.0 introduced C1 names.
+    # 3.0.1 removed the name INDEX; and 3.2 changed some names:
     #   changed to parenthesized versions like "NEXT LINE" to
     #       "NEXT LINE (NEL)";
     #   changed PARTIAL LINE DOWN to PARTIAL LINE FORWARD
     #   changed PARTIAL LINE UP to PARTIAL LINE BACKWARD;;
     #   changed e.g. FILE SEPARATOR to INFORMATION SEPARATOR FOUR
-    # This list contains all the names that were defined so that
-    # charnames::vianame(), etc. understand them all EVEN if this version of
-    # Unicode didn't specify them (this could be construed as a bug).
-    # mktables elsewhere gives preference to the Unicode_1_Name field over
-    # these names, so that viacode() will return the correct value for that
-    # version of Unicode, except when that version doesn't define a name,
-    # viacode() will return one anyway (this also could be construed as a
-    # bug).  But these potential "bugs" allow for the smooth working of code
-    # on earlier Unicode releases.
-
-    my @return = split /\n/, <<'END';
-0000;NULL;control
-0000;NUL;abbreviation
-0001;START OF HEADING;control
-0001;SOH;abbreviation
-0002;START OF TEXT;control
-0002;STX;abbreviation
-0003;END OF TEXT;control
-0003;ETX;abbreviation
-0004;END OF TRANSMISSION;control
-0004;EOT;abbreviation
-0005;ENQUIRY;control
-0005;ENQ;abbreviation
-0006;ACKNOWLEDGE;control
-0006;ACK;abbreviation
-0007;BELL;control
-0007;BEL;abbreviation
-0008;BACKSPACE;control
-0008;BS;abbreviation
-0009;CHARACTER TABULATION;control
-0009;HORIZONTAL TABULATION;control
-0009;HT;abbreviation
-0009;TAB;abbreviation
-000A;LINE FEED;control
-000A;LINE FEED (LF);control
-000A;NEW LINE;control
-000A;END OF LINE;control
-000A;LF;abbreviation
-000A;NL;abbreviation
-000A;EOL;abbreviation
-000B;LINE TABULATION;control
-000B;VERTICAL TABULATION;control
-000B;VT;abbreviation
-000C;FORM FEED;control
-000C;FORM FEED (FF);control
-000C;FF;abbreviation
-000D;CARRIAGE RETURN;control
-000D;CARRIAGE RETURN (CR);control
-000D;CR;abbreviation
-000E;SHIFT OUT;control
-000E;LOCKING-SHIFT ONE;control
-000E;SO;abbreviation
-000F;SHIFT IN;control
-000F;LOCKING-SHIFT ZERO;control
-000F;SI;abbreviation
-0010;DATA LINK ESCAPE;control
-0010;DLE;abbreviation
-0011;DEVICE CONTROL ONE;control
-0011;DC1;abbreviation
-0012;DEVICE CONTROL TWO;control
-0012;DC2;abbreviation
-0013;DEVICE CONTROL THREE;control
-0013;DC3;abbreviation
-0014;DEVICE CONTROL FOUR;control
-0014;DC4;abbreviation
-0015;NEGATIVE ACKNOWLEDGE;control
-0015;NAK;abbreviation
-0016;SYNCHRONOUS IDLE;control
-0016;SYN;abbreviation
-0017;END OF TRANSMISSION BLOCK;control
-0017;ETB;abbreviation
-0018;CANCEL;control
-0018;CAN;abbreviation
-0019;END OF MEDIUM;control
-0019;EOM;abbreviation
-001A;SUBSTITUTE;control
-001A;SUB;abbreviation
-001B;ESCAPE;control
-001B;ESC;abbreviation
-001C;INFORMATION SEPARATOR FOUR;control
-001C;FILE SEPARATOR;control
-001C;FS;abbreviation
-001D;INFORMATION SEPARATOR THREE;control
-001D;GROUP SEPARATOR;control
-001D;GS;abbreviation
-001E;INFORMATION SEPARATOR TWO;control
-001E;RECORD SEPARATOR;control
-001E;RS;abbreviation
-001F;INFORMATION SEPARATOR ONE;control
-001F;UNIT SEPARATOR;control
-001F;US;abbreviation
-0020;SP;abbreviation
-007F;DELETE;control
-007F;DEL;abbreviation
-0080;PADDING CHARACTER;figment
-0080;PAD;abbreviation
-0081;HIGH OCTET PRESET;figment
-0081;HOP;abbreviation
-0082;BREAK PERMITTED HERE;control
-0082;BPH;abbreviation
-0083;NO BREAK HERE;control
-0083;NBH;abbreviation
-0084;INDEX;control
-0084;IND;abbreviation
-0085;NEXT LINE;control
-0085;NEXT LINE (NEL);control
-0085;NEL;abbreviation
-0086;START OF SELECTED AREA;control
-0086;SSA;abbreviation
-0087;END OF SELECTED AREA;control
-0087;ESA;abbreviation
-0088;CHARACTER TABULATION SET;control
-0088;HORIZONTAL TABULATION SET;control
-0088;HTS;abbreviation
-0089;CHARACTER TABULATION WITH JUSTIFICATION;control
-0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
-0089;HTJ;abbreviation
-008A;LINE TABULATION SET;control
-008A;VERTICAL TABULATION SET;control
-008A;VTS;abbreviation
-008B;PARTIAL LINE FORWARD;control
-008B;PARTIAL LINE DOWN;control
-008B;PLD;abbreviation
-008C;PARTIAL LINE BACKWARD;control
-008C;PARTIAL LINE UP;control
-008C;PLU;abbreviation
-008D;REVERSE LINE FEED;control
-008D;REVERSE INDEX;control
-008D;RI;abbreviation
-008E;SINGLE SHIFT TWO;control
-008E;SINGLE-SHIFT-2;control
-008E;SS2;abbreviation
-008F;SINGLE SHIFT THREE;control
-008F;SINGLE-SHIFT-3;control
-008F;SS3;abbreviation
-0090;DEVICE CONTROL STRING;control
-0090;DCS;abbreviation
-0091;PRIVATE USE ONE;control
-0091;PRIVATE USE-1;control
-0091;PU1;abbreviation
-0092;PRIVATE USE TWO;control
-0092;PRIVATE USE-2;control
-0092;PU2;abbreviation
-0093;SET TRANSMIT STATE;control
-0093;STS;abbreviation
-0094;CANCEL CHARACTER;control
-0094;CCH;abbreviation
-0095;MESSAGE WAITING;control
-0095;MW;abbreviation
-0096;START OF GUARDED AREA;control
-0096;START OF PROTECTED AREA;control
-0096;SPA;abbreviation
-0097;END OF GUARDED AREA;control
-0097;END OF PROTECTED AREA;control
-0097;EPA;abbreviation
-0098;START OF STRING;control
-0098;SOS;abbreviation
-0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
-0099;SGC;abbreviation
-009A;SINGLE CHARACTER INTRODUCER;control
-009A;SCI;abbreviation
-009B;CONTROL SEQUENCE INTRODUCER;control
-009B;CSI;abbreviation
-009C;STRING TERMINATOR;control
-009C;ST;abbreviation
-009D;OPERATING SYSTEM COMMAND;control
-009D;OSC;abbreviation
-009E;PRIVACY MESSAGE;control
-009E;PM;abbreviation
-009F;APPLICATION PROGRAM COMMAND;control
-009F;APC;abbreviation
-00A0;NBSP;abbreviation
-00AD;SHY;abbreviation
-200B;ZWSP;abbreviation
-200C;ZWNJ;abbreviation
-200D;ZWJ;abbreviation
-200E;LRM;abbreviation
-200F;RLM;abbreviation
-202A;LRE;abbreviation
-202B;RLE;abbreviation
-202C;PDF;abbreviation
-202D;LRO;abbreviation
-202E;RLO;abbreviation
-FEFF;BYTE ORDER MARK;alternate
-FEFF;BOM;abbreviation
-FEFF;ZWNBSP;abbreviation
-END
-
-    if ($v_version ge v3.0.0) {
-        push @return, split /\n/, <<'END';
-180B; FVS1; abbreviation
-180C; FVS2; abbreviation
-180D; FVS3; abbreviation
-180E; MVS; abbreviation
-202F; NNBSP; abbreviation
-END
-    }
-
-    if ($v_version ge v3.2.0) {
-        push @return, split /\n/, <<'END';
-034F; CGJ; abbreviation
-205F; MMSP; abbreviation
-2060; WJ; abbreviation
-END
-        # Add in VS1..VS16
-        my $cp = 0xFE00 - 1;
-        for my $i (1..16) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-    if ($v_version ge v4.0.0) { # Add in VS17..VS256
-        my $cp = 0xE0100 - 17;
-        for my $i (17..256) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-
-    # ALERT did not come along until 6.0, at which point it became preferred
-    # over BELL, and was never in the Unicode_1_Name field.  For the same
-    # reasons, that the other names are made known to all releases by this
-    # function, we make ALERT known too.  By inserting it
-    # last in early releases, BELL is preferred over it; and vice-vers in 6.0
-    my $alert = '0007; ALERT; control';
-    if ($v_version lt v6.0.0) {
-        push @return, $alert;
-    }
-    else {
-        unshift @return, $alert;
-    }
+    #
+    # All these are present in the 6.1 NameAliases.txt
 
-    return @return;
+    return;
 }
 
 sub filter_later_version_name_alias_line {
@@ -13030,14 +13177,29 @@ sub filter_later_version_name_alias_line {
 sub filter_early_version_name_alias_line {
 
     # Early versions did not have the trailing alias type field; implicitly it
-    # was 'correction'.   But our synthetic lines we add in this program do
-    # have it, so test for the type field.
-    $_ .= "; correction" if $_ !~ /;.*;/;
+    # was 'correction'.
+    $_ .= "; correction";
 
     filter_later_version_name_alias_line;
     return;
 }
 
+sub filter_all_caps_script_names {
+
+    # Some early Unicode releases had the script names in all CAPS.  This
+    # converts them to just the first letter of each word being capital.
+
+    my ($range, $script, @remainder)
+        = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+    my @words = split "_", $script;
+    for my $word (@words) {
+        $word =
+            ucfirst(lc($word)) if $word ne 'CJK';
+    }
+    $script = join "_", @words;
+    $_ = join ";", $range, $script, @remainder;
+}
+
 sub finish_Unicode() {
     # This routine should be called after all the Unicode files have been read
     # in.  It:
@@ -13188,72 +13350,81 @@ END
         # Add any remaining code points to the mapping, using the default for
         # missing code points.
         my $default_table;
-        if (defined (my $default_map = $property->default_map)) {
+        my $default_map = $property->default_map;
+        if ($property_type == $FORCED_BINARY) {
 
-            # Make sure there is a match table for the default
-            if (! defined ($default_table = $property->table($default_map))) {
-                $default_table = $property->add_match_table($default_map);
+            # A forced binary property creates a 'Y' table that matches all
+            # non-default values.  The actual string values are also written out
+            # as a map table.  (The default value will almost certainly be the
+            # empty string, so the pod glosses over the distinction, and just
+            # talks about empty vs non-empty.)
+            my $yes = $property->table("Y");
+            foreach my $range ($property->ranges) {
+                next if $range->value eq $default_map;
+                $yes->add_range($range->start, $range->end);
             }
+            $property->table("N")->set_complement($yes);
+        }
+        else {
+            if (defined $default_map) {
 
-            # And, if the property is binary, the default table will just
-            # be the complement of the other table.
-            if ($property_type == $BINARY) {
-                my $non_default_table;
-
-                # Find the non-default table.
-                for my $table ($property->tables) {
-                    next if $table == $default_table;
-                    $non_default_table = $table;
+                # Make sure there is a match table for the default
+                if (! defined ($default_table = $property->table($default_map)))
+                {
+                    $default_table = $property->add_match_table($default_map);
                 }
-                $default_table->set_complement($non_default_table);
-            }
-            else {
 
-                # This fills in any missing values with the default.  It's not
-                # necessary to do this with binary properties, as the default
-                # is defined completely in terms of the Y table.
-                $property->add_map(0, $MAX_WORKING_CODEPOINT,
-                                   $default_map, Replace => $NO);
-            }
-        }
+                # And, if the property is binary, the default table will just
+                # be the complement of the other table.
+                if ($property_type == $BINARY) {
+                    my $non_default_table;
 
-        # Have all we need to populate the match tables.
-        my $maps_should_be_defined = $property->pre_declared_maps;
-        foreach my $range ($property->ranges) {
-            my $map = $range->value;
-            my $table = $property->table($map);
-            if (! defined $table) {
+                    # Find the non-default table.
+                    for my $table ($property->tables) {
+                        if ($table == $default_table) {
+                            if ($v_version le v5.0.0) {
+                                $table->add_alias($_) for qw(N No F False);
+                            }
+                            next;
+                        } elsif ($v_version le v5.0.0) {
+                            $table->add_alias($_) for qw(Y Yes T True);
+                        }
+                        $non_default_table = $table;
+                    }
+                    $default_table->set_complement($non_default_table);
+                }
+                else {
 
-                # Integral and rational property values are not necessarily
-                # defined in PropValueAliases, but whether all the other ones
-                # should be depends on the property.
-                if ($maps_should_be_defined
-                    && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
-                {
-                    Carp::my_carp("Table '$property_name=$map' should have been defined.  Defining it now.")
+                    # This fills in any missing values with the default.  It's
+                    # not necessary to do this with binary properties, as the
+                    # default is defined completely in terms of the Y table.
+                    $property->add_map(0, $MAX_WORKING_CODEPOINT,
+                                    $default_map, Replace => $NO);
                 }
-                $table = $property->add_match_table($map);
             }
 
-            next if $table->complement != 0;    # Don't need to populate these
-            $table->add_range($range->start, $range->end);
-        }
+            # Have all we need to populate the match tables.
+            my $maps_should_be_defined = $property->pre_declared_maps;
+            foreach my $range ($property->ranges) {
+                my $map = $range->value;
+                my $table = $property->table($map);
+                if (! defined $table) {
 
-        # A forced binary property has additional true/false tables which
-        # should have been set up when it was forced into binary.  The false
-        # table matches exactly the same set as the property's default table.
-        # The true table matches the complement of that.  The false table is
-        # not the same as an additional set of aliases on top of the default
-        # table, so use 'set_equivalent_to'.  If it were implemented as
-        # additional aliases, various things would have to be adjusted, but
-        # especially, if the user wants to get a list of names for the table
-        # using Unicode::UCD::prop_value_aliases(), s/he should get a
-        # different set depending on whether they want the default table or
-        # the false table.
-        if ($property_type == $FORCED_BINARY) {
-            $property->table('N')->set_equivalent_to($default_table,
-                                                     Related => 1);
-            $property->table('Y')->set_complement($default_table);
+                    # Integral and rational property values are not
+                    # necessarily defined in PropValueAliases, but whether all
+                    # the other ones should be depends on the property.
+                    if ($maps_should_be_defined
+                        && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
+                    {
+                        Carp::my_carp("Table '$property_name=$map' should "
+                                    . "have been defined.  Defining it now.")
+                    }
+                    $table = $property->add_match_table($map);
+                }
+
+                next if $table->complement != 0; # Don't need to populate these
+                $table->add_range($range->start, $range->end);
+            }
         }
 
         # For Perl 5.6 compatibility, all properties matchable in regexes can
@@ -13321,8 +13492,6 @@ END
     $gc->table('Ll')->set_caseless_equivalent($LC);
     $gc->table('Lu')->set_caseless_equivalent($LC);
 
-    my $Cs = $gc->table('Cs');
-
     # Create digit and case fold tables with the original file names for
     # backwards compatibility with applications that read them directly.
     my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
@@ -13408,6 +13577,26 @@ sub pre_3_dot_1_Nl () {
     return $Nl;
 }
 
+sub calculate_Assigned() {  # Calculate the gc != Cn code points; may be
+                            # called before the Cn's are completely filled.
+                            # Works on Unicodes earlier than ones that
+                            # explicitly specify Cn.
+    return if defined $Assigned;
+
+    if (! defined $gc || $gc->is_empty()) {
+        Carp::my_carp_bug("calculate_Assigned() called before $gc is populated");
+    }
+
+    $Assigned = $perl->add_match_table('Assigned',
+                                Description  => "All assigned code points",
+                                );
+    while (defined (my $range = $gc->each_range())) {
+        my $standard_value = standardize($range->value);
+        next if $standard_value eq 'cn' || $standard_value eq 'unassigned';
+        $Assigned->add_range($range->start, $range->end);
+    }
+}
+
 sub compile_perl() {
     # Create perl-defined tables.  Almost all are part of the pseudo-property
     # named 'perl' internally to this program.  Many of these are recommended
@@ -13446,16 +13635,12 @@ sub compile_perl() {
     }
 
     my $Any = $perl->add_match_table('Any',
-                                     Description  => "All Unicode code points: [\\x{0000}-\\x{10FFFF}]",
+                                     Description  => "All Unicode code points: [\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
                                      );
-    $Any->add_range(0, 0x10FFFF);
+    $Any->add_range(0, $MAX_UNICODE_CODEPOINT);
     $Any->add_alias('Unicode');
 
-    # Assigned is the opposite of gc=unassigned
-    my $Assigned = $perl->add_match_table('Assigned',
-                                Description  => "All assigned code points",
-                                Initialize => ~ $gc->table('Unassigned'),
-                                );
+    calculate_Assigned();
 
     # Our internal-only property should be treated as more than just a
     # synonym; grandfather it in to the pod.
@@ -13501,32 +13686,27 @@ sub compile_perl() {
 
         # There are quite a few code points in Lower, that aren't in gc=lc,
         # and not all are in all releases.
-        foreach my $code_point (    utf8::unicode_to_native(0xAA),
-                                    utf8::unicode_to_native(0xBA),
-                                    0x02B0 .. 0x02B8,
-                                    0x02C0 .. 0x02C1,
-                                    0x02E0 .. 0x02E4,
-                                    0x0345,
-                                    0x037A,
-                                    0x1D2C .. 0x1D6A,
-                                    0x1D78,
-                                    0x1D9B .. 0x1DBF,
-                                    0x2071,
-                                    0x207F,
-                                    0x2090 .. 0x209C,
-                                    0x2170 .. 0x217F,
-                                    0x24D0 .. 0x24E9,
-                                    0x2C7C .. 0x2C7D,
-                                    0xA770,
-                                    0xA7F8 .. 0xA7F9,
-        ) {
-            # Don't include the code point unless it is assigned in this
-            # release
-            my $category = $gc->value_of(hex $code_point);
-            next if ! defined $category || $category eq 'Cn';
-
-            $Lower += $code_point;
-        }
+        my $temp = Range_List->new(Initialize => [
+                                                utf8::unicode_to_native(0xAA),
+                                                utf8::unicode_to_native(0xBA),
+                                                0x02B0 .. 0x02B8,
+                                                0x02C0 .. 0x02C1,
+                                                0x02E0 .. 0x02E4,
+                                                0x0345,
+                                                0x037A,
+                                                0x1D2C .. 0x1D6A,
+                                                0x1D78,
+                                                0x1D9B .. 0x1DBF,
+                                                0x2071,
+                                                0x207F,
+                                                0x2090 .. 0x209C,
+                                                0x2170 .. 0x217F,
+                                                0x24D0 .. 0x24E9,
+                                                0x2C7C .. 0x2C7D,
+                                                0xA770,
+                                                0xA7F8 .. 0xA7F9,
+                                ]);
+        $Lower += $temp & $Assigned;
     }
     my $Posix_Lower = $perl->add_match_table("PosixLower",
                             Description => "[a-z]",
@@ -13832,6 +14012,7 @@ sub compile_perl() {
     );
     $Space->add_alias('XPerlSpace');    # Pre-existing synonyms
     $Space->add_alias('SpacePerl');
+    $Space->add_alias('Space') if $v_version lt v4.1.0;
 
     my $Posix_space = $perl->add_match_table("PosixSpace",
                             Description => "\\t, \\n, \\cK, \\f, \\r, and ' '.  (\\cK is vertical tab)",
@@ -13847,11 +14028,18 @@ sub compile_perl() {
                             Initialize => $Cntrl & $ASCII,
                             );
 
+    my $perl_surrogate = $perl->add_match_table('_Perl_Surrogate');
+    if (defined (my $Cs = $gc->table('Cs'))) {
+        $perl_surrogate += $Cs;
+    }
+    else {
+        push @tables_that_may_be_empty, '_Perl_Surrogate';
+    }
+
     # $controls is a temporary used to construct Graph.
     my $controls = Range_List->new(Initialize => $gc->table('Unassigned')
-                                                + $gc->table('Control'));
-    # Cs not in release 1
-    $controls += $gc->table('Surrogate') if defined $gc->table('Surrogate');
+                                                + $gc->table('Control')
+                                                + $perl_surrogate);
 
     # Graph is  ~space &  ~(Cc|Cs|Cn) = ~(space + $controls)
     my $Graph = $perl->add_match_table('Graph', Full_Name => 'XPosixGraph',
@@ -13957,14 +14145,21 @@ sub compile_perl() {
     # 31f05a37c4e9c37a7263491f2fc0237d836e1a80 for a more complete description
     # of the MU issue.
     foreach my $range ($loc_problem_folds->ranges) {
-        foreach my $code_point($range->start .. $range->end) {
+        foreach my $code_point ($range->start .. $range->end) {
             my $fold_range = $cf->containing_range($code_point);
             next unless defined $fold_range;
 
+            # Skip if folds to itself
+            next if $fold_range->value eq $CODE_POINT;
+
             my @hex_folds = split " ", $fold_range->value;
-            my $start_cp = hex $hex_folds[0];
+            my $start_cp = $hex_folds[0];
+            next if $start_cp eq $CODE_POINT;
+            $start_cp = hex $start_cp;
             foreach my $i (0 .. @hex_folds - 1) {
-                my $cp = hex $hex_folds[$i];
+                my $cp = $hex_folds[$i];
+                next if $cp eq $CODE_POINT;
+                $cp = hex $cp;
                 next unless $cp > 255;    # Already have the < 256 ones
 
                 $loc_problem_folds->add_range($cp, $cp);
@@ -13978,9 +14173,13 @@ sub compile_perl() {
          Description =>
               "Code points whose fold is a string of more than one character",
     );
+    if ($v_version lt v3.0.1) {
+        push @tables_that_may_be_empty, '_Perl_Folds_To_Multi_Char';
+    }
 
     # Look through all the known folds to populate these tables.
     foreach my $range ($cf->ranges) {
+        next if $range->value eq $CODE_POINT;
         my $start = $range->start;
         my $end = $range->end;
         $any_folds->add_range($start, $end);
@@ -14190,7 +14389,7 @@ sub compile_perl() {
                                     + utf8::unicode_to_native(0xA0) # NBSP
                         );
 
-    my @composition = ('Name', 'Unicode_1_Name', 'Name_Alias');
+    my @composition = ('Name', 'Unicode_1_Name', '_Perl_Name_Alias');
 
     if (@named_sequences) {
         push @composition, 'Named_Sequence';
@@ -14201,15 +14400,15 @@ sub compile_perl() {
 
     my $alias_sentence = "";
     my %abbreviations;
-    my $alias = property_ref('Name_Alias');
-    $perl_charname->set_proxy_for('Name_Alias');
-
-    # Add each entry in Name_Alias to Perl_Charnames.  Where these go with
-    # respect to any existing entry depends on the entry type.  Corrections go
-    # before said entry, as they should be returned in preference over the
-    # existing entry.  (A correction to a correction should be later in the
-    # Name_Alias table, so it will correctly precede the erroneous correction
-    # in Perl_Charnames.)
+    my $alias = property_ref('_Perl_Name_Alias');
+    $perl_charname->set_proxy_for('_Perl_Name_Alias');
+
+    # Add each entry in _Perl_Name_Alias to Perl_Charnames.  Where these go
+    # with respect to any existing entry depends on the entry type.
+    # Corrections go before said entry, as they should be returned in
+    # preference over the existing entry.  (A correction to a correction
+    # should be later in the _Perl_Name_Alias table, so it will correctly
+    # precede the erroneous correction in Perl_Charnames.)
     #
     # Abbreviations go after everything else, so they are saved temporarily in
     # a hash for later.
@@ -14244,7 +14443,7 @@ sub compile_perl() {
         $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type);
     }
     $alias_sentence = <<END;
-The Name_Alias property adds duplicate code point entries that are
+The _Perl_Name_Alias property adds duplicate code point entries that are
 alternatives to the original name.  If an addition is a corrected
 name, it will be physically first in the table.  The original (less correct,
 but still valid) name will be next; then any alternatives, in no particular
@@ -14252,8 +14451,9 @@ order; and finally any abbreviations, again in no particular order.
 END
 
     # Now add the Unicode_1 names for the controls.  The Unicode_1 names had
-    # precedence before 6.1, so should be first in the file; the other names
-    # have precedence starting in 6.1,
+    # precedence before 6.1, including the awful ones like "LINE FEED (LF)",
+    # so should be first in the file; the other names have precedence starting
+    # in 6.1,
     my $before_or_after = ($v_version lt v6.1.0)
                           ? $MULTIPLE_BEFORE
                           : $MULTIPLE_AFTER;
@@ -14283,12 +14483,6 @@ END
                                         Replace => $before_or_after);
     }
 
-    # But in this version only, the ALERT has precedence over BELL, the
-    # Unicode_1_Name that would otherwise have precedence.
-    if ($v_version eq v6.0.0) {
-        $perl_charname->add_duplicate(7, 'ALERT', Replace => $MULTIPLE_BEFORE);
-    }
-
     # Now that have everything added, add in abbreviations after
     # everything else.  Sort so results don't change between runs of this
     # program
@@ -14398,6 +14592,25 @@ END
         $unassigned->set_equivalent_to($age_default, Related => 1);
     }
 
+    my $patws = $perl->add_match_table('_Perl_PatWS',
+                                       Perl_Extension => 1,
+                                       Fate => $INTERNAL_ONLY);
+    if (defined (my $off_patws = property_ref('Pattern_White_Space'))) {
+        $patws->initialize($off_patws->table('Y'));
+    }
+    else {
+        $patws->initialize([ ord("\t"),
+                             ord("\n"),
+                             utf8::unicode_to_native(0x0B), # VT
+                             ord("\f"),
+                             ord("\r"),
+                             ord(" "),
+                             utf8::unicode_to_native(0x85), # NEL
+                             0x200E..0x200F,             # Left, Right marks
+                             0x2028..0x2029              # Line, Paragraph seps
+                           ] );
+    }
+
     # See L<perlfunc/quotemeta>
     my $quotemeta = $perl->add_match_table('_Perl_Quotemeta',
                                            Perl_Extension => 1,
@@ -14406,22 +14619,78 @@ END
                                            # Initialize to what's common in
                                            # all Unicode releases.
                                            Initialize =>
-                                                $Space
-                                                + $gc->table('Control')
+                                                  $gc->table('Control')
+                                                + $Space
+                                                + $patws
+                                                + ((~ $Word) & $ASCII)
                            );
 
-    # In early releases without the proper Unicode properties, just set to \W.
-    if (! defined (my $patsyn = property_ref('Pattern_Syntax'))
-        || ! defined (my $patws = property_ref('Pattern_White_Space'))
-        || ! defined (my $di = property_ref('Default_Ignorable_Code_Point')))
-    {
-        $quotemeta += ~ $Word;
+    if (defined (my $patsyn = property_ref('Pattern_Syntax'))) {
+        $quotemeta += $patsyn->table('Y');
     }
     else {
-        $quotemeta += $patsyn->table('Y')
-                   + $patws->table('Y')
-                   + $di->table('Y')
-                   + ((~ $Word) & $ASCII);
+        $quotemeta += ((~ $Word) & Range->new(0, 255))
+                    - utf8::unicode_to_native(0xA8)
+                    - utf8::unicode_to_native(0xAF)
+                    - utf8::unicode_to_native(0xB2)
+                    - utf8::unicode_to_native(0xB3)
+                    - utf8::unicode_to_native(0xB4)
+                    - utf8::unicode_to_native(0xB7)
+                    - utf8::unicode_to_native(0xB8)
+                    - utf8::unicode_to_native(0xB9)
+                    - utf8::unicode_to_native(0xBC)
+                    - utf8::unicode_to_native(0xBD)
+                    - utf8::unicode_to_native(0xBE);
+        $quotemeta += [ # These are above-Latin1 patsyn; hence should be the
+                        # same in all releases
+                        0x2010 .. 0x2027,
+                        0x2030 .. 0x203E,
+                        0x2041 .. 0x2053,
+                        0x2055 .. 0x205E,
+                        0x2190 .. 0x245F,
+                        0x2500 .. 0x2775,
+                        0x2794 .. 0x2BFF,
+                        0x2E00 .. 0x2E7F,
+                        0x3001 .. 0x3003,
+                        0x3008 .. 0x3020,
+                        0x3030 .. 0x3030,
+                        0xFD3E .. 0xFD3F,
+                        0xFE45 .. 0xFE46
+                      ];
+    }
+
+    if (defined (my $di = property_ref('Default_Ignorable_Code_Point'))) {
+        $quotemeta += $di->table('Y')
+    }
+    else {
+        if ($v_version ge v2.0) {
+            $quotemeta += $gc->table('Cf')
+                       +  $gc->table('Cs');
+        }
+        $quotemeta += $gc->table('Cc')
+                    - $Space;
+        my $temp = Range_List->new(Initialize => [ 0x180B .. 0x180D,
+                                                   0x2060 .. 0x206F,
+                                                   0xFE00 .. 0xFE0F,
+                                                   0xFFF0 .. 0xFFFB,
+                                                   0xE0000 .. 0xE0FFF,
+                                                  ]);
+        $quotemeta += $temp & $Assigned;
+    }
+
+    my $nchar = $perl->add_match_table('_Perl_Nchar',
+                                       Perl_Extension => 1,
+                                       Fate => $INTERNAL_ONLY);
+    if (defined (my $off_nchar = property_ref('Nchar'))) {
+        $nchar->initialize($off_nchar->table('Y'));
+    }
+    else {
+        $nchar->initialize([ 0xFFFE .. 0xFFFF ]);
+        if ($v_version ge v2.0) {   # First release with these nchars
+            for (my $i = 0x1FFFE; $i <= 0x10FFFE; $i += 0x10000) {
+                $nchar += [ $i .. $i+1 ];
+            }
+        }
     }
 
     # Finished creating all the perl properties.  All non-internal non-string
@@ -14743,14 +15012,15 @@ sub register_file_for_name($$$) {
     my $file = shift;            # The file name in the final directory.
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    trace "table=$table, file=$file, directory=@$directory_ref" if main::DEBUG && $to_trace;
+    trace "table=$table, file=$file, directory=@$directory_ref, fate=", $table->fate if main::DEBUG && $to_trace;
 
     if ($table->isa('Property')) {
         $table->set_file_path(@$directory_ref, $file);
         push @map_properties, $table;
 
         # No swash means don't do the rest of this.
-        return if $table->fate != $ORDINARY;
+        return if $table->fate != $ORDINARY
+                  && ! ($table->name =~ /^_/ && $table->fate == $INTERNAL_ONLY);
 
         # Get the path to the file
         my @path = $table->file_path;
@@ -14764,7 +15034,12 @@ sub register_file_for_name($$$) {
         # property's map table
         foreach my $alias ($table->aliases) {
             my $name = $alias->name;
-            $loose_property_to_file_of{standardize($name)} = $file;
+            if ($name =~ /^_/) {
+                $strict_property_to_file_of{lc $name} = $file;
+            }
+            else {
+                $loose_property_to_file_of{standardize($name)} = $file;
+            }
         }
 
         # And a way for utf8_heavy to find the proper key in the SwashInfo
@@ -14972,7 +15247,22 @@ sub register_file_for_name($$$) {
         # Remove interior underscores.
         (my $filename = $name) =~ s/ (?<=.) _ (?=.) //xg;
 
-        # Change any non-word character into an underscore, and truncate to 8.
+        # Convert the dot in floating point numbers to an underscore
+        $filename =~ s/\./_/ if $filename =~ / ^ \d+ \. \d+ $ /x;
+
+        my $suffix = "";
+
+        # Extract any suffix, delete any non-word character, and truncate to 3
+        # after the dot
+        if ($filename =~ m/ ( .*? ) ( \. .* ) /x) {
+            $filename = $1;
+            $suffix = $2;
+            $suffix =~ s/\W+//g;
+            substr($suffix, 4) = "" if length($suffix) > 4;
+        }
+
+        # Change any non-word character outside the suffix into an underscore,
+        # and truncate to 8.
         $filename =~ s/\W+/_/g;   # eg., "L&" -> "L_"
         substr($filename, 8) = "" if length($filename) > 8;
 
@@ -14984,7 +15274,7 @@ sub register_file_for_name($$$) {
         #     InGreekE
         #     InGreek2
         my $warned = 0;
-        while (my $num = $base_names{$path}{lc $filename}++) {
+        while (my $num = $base_names{$path}{lc "$filename$suffix"}++) {
             $num++; # so basenames with numbers start with '2', which
                     # just looks more natural.
 
@@ -15491,9 +15781,9 @@ sub make_ucd_table_pod_entries {
             $$info_ref .= $full_name;
         }
 
-        # And the full-name entry includes the short name, if different
+        # And the full-name entry includes the short name, if shorter
         if ($info_ref == \$full_info
-            && $standard_short_name ne $standard_full_name)
+            && length $standard_short_name < length $standard_full_name)
         {
             $full_info =~ s/\.\Z//;
             $full_info .= "  " if $full_info;
@@ -15517,6 +15807,17 @@ sub make_ucd_table_pod_entries {
         $full_info .= ".  " if $full_info;
         $full_info .= $more_info;
     }
+    if ($table->property->type == $FORCED_BINARY) {
+        if ($full_info) {
+            $full_info =~ s/\.\Z//;
+            $full_info .= ".  ";
+        }
+        $full_info .= "This is a combination property which has both:"
+                    . " 1) a map to various string values; and"
+                    . " 2) a map to boolean Y/N, where 'Y' means the"
+                    . " string value is non-empty.  Add the prefix 'is'"
+                    . " to the prop_invmap() call to get the latter";
+    }
 
     # These keep track if have created full and short name pod entries for the
     # property
@@ -15548,6 +15849,9 @@ sub make_ucd_table_pod_entries {
             $info = $other_info;
         }
 
+        $combination_property{$standard} = 1
+                                  if $table->property->type == $FORCED_BINARY;
+
         # Here, we have set up the two columns for this entry.  But if an
         # entry already exists for this name, we have to decide which one
         # we're going to later output.
@@ -15614,9 +15918,9 @@ sub pod_alphanumeric_sort {
     # The first few character columns are filler, plus the '\p{'; and get rid
     # of all the trailing stuff, starting with the trailing '}', so as to sort
     # on just 'Name=Value'
-    (my $a = lc $a) =~ s/^ .*? { //x;
+    (my $a = lc $a) =~ s/^ .*? \{ //x;
     $a =~ s/}.*//;
-    (my $b = lc $b) =~ s/^ .*? { //x;
+    (my $b = lc $b) =~ s/^ .*? \{ //x;
     $b =~ s/}.*//;
 
     # Determine if the two operands are both internal only or both not.
@@ -15761,6 +16065,7 @@ END
     # The sort will cause the alphabetically first properties to be added to
     # each list first, so each list will be sorted.
     foreach my $property (sort keys %why_suppressed) {
+        next unless $why_suppressed{$property};
         push @{$why_list{$why_suppressed{$property}}}, $property;
     }
 
@@ -15823,13 +16128,12 @@ END
     }
 
     # Similiarly, generate a list of files that we don't use, grouped by the
-    # reasons why.  First, create a hash whose keys are the reasons, and whose
-    # values are anonymous arrays of all the files that share that reason.
+    # reasons why (Don't output if the reason is empty).  First, create a hash
+    # whose keys are the reasons, and whose values are anonymous arrays of all
+    # the files that share that reason.
     my %grouped_by_reason;
-    foreach my $file (keys %ignored_files) {
-        push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
-    }
     foreach my $file (keys %skipped_files) {
+        next unless $skipped_files{$file};
         push @{$grouped_by_reason{$skipped_files{$file}}}, $file;
     }
 
@@ -16284,6 +16588,10 @@ sub make_Heavy () {
                            = simple_dumper(\%loose_property_name_of, ' ' x 4);
     chomp $loose_property_name_of;
 
+    my $strict_property_name_of
+                           = simple_dumper(\%strict_property_name_of, ' ' x 4);
+    chomp $strict_property_name_of;
+
     my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
     chomp $stricter_to_file_of;
 
@@ -16322,6 +16630,10 @@ sub make_Heavy () {
                         = simple_dumper(\%loose_property_to_file_of, ' ' x 4);
     chomp $loose_property_to_file_of;
 
+    my $strict_property_to_file_of
+                        = simple_dumper(\%strict_property_to_file_of, ' ' x 4);
+    chomp $strict_property_to_file_of;
+
     my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4);
     chomp $file_to_swash_name;
 
@@ -16337,6 +16649,11 @@ $INTERNAL_ONLY_HEADER
 $loose_property_name_of
 );
 
+# Same, but strict names
+\%utf8::strict_property_name_of = (
+$strict_property_name_of
+);
+
 # Gives the definitions (in the form of inversion lists) for those properties
 # whose definitions aren't kept in files
 \@utf8::inline_definitions = (
@@ -16385,6 +16702,11 @@ $caseless_equivalent_to
 $loose_property_to_file_of
 );
 
+# Property names to mapping files
+\%utf8::strict_property_to_file_of = (
+$strict_property_to_file_of
+);
+
 # Files to the swash names within them.
 \%utf8::file_to_swash_name = (
 $file_to_swash_name
@@ -16756,8 +17078,8 @@ sub make_UCD () {
     # an element for the Hangul syllables in the appropriate place, and
     # otherwise changes the name to include the "-<code point>" suffix.
     my @algorithm_names;
-    my $done_hangul = 0;
-
+    my $done_hangul = $v_version lt v2.0.0;  # Hanguls as we know them came
+                                             # along in this version
     # Copy it linearly.
     for my $i (0 .. @code_points_ending_in_code_point - 1) {
 
@@ -16808,6 +17130,9 @@ sub make_UCD () {
     my $ambiguous_names = simple_dumper(\%ambiguous_names, ' ' x 4);
     chomp $ambiguous_names;
 
+    my $combination_property = simple_dumper(\%combination_property, ' ' x 4);
+    chomp $combination_property;
+
     my $loose_defaults = simple_dumper(\%loose_defaults, ' ' x 4);
     chomp $loose_defaults;
 
@@ -16872,6 +17197,13 @@ $ambiguous_names
 $loose_defaults
 );
 
+# The properties that are combinations, in that they have both a map table and
+# a match table.  This is actually for UCD.t, so it knows how to test for
+# these.
+\%Unicode::UCD::combination_property = (
+$combination_property
+);
+
 # All combinations of names that are suppressed.
 # This is actually for UCD.t, so it knows which properties shouldn't have
 # entries.  If it got any bigger, would probably want to put it in its own
@@ -16972,14 +17304,8 @@ sub write_all_tables() {
             # with it or not.
             my $expected_empty =
 
-                # $perl should be empty, as well as properties that we just
-                # don't do anything with
-                ($is_property
-                    && ($table == $perl
-                        || grep { $complete_name eq $_ }
-                                                    @unimplemented_properties
-                    )
-                )
+                # $perl should be empty
+                ($is_property && ($table == $perl))
 
                 # Match tables in properties we skipped populating should be
                 # empty
@@ -17045,7 +17371,7 @@ sub write_all_tables() {
                 : ($is_property)
                   ? # All these types of map tables will be full because
                     # they will have been populated with defaults
-                    ($type == $ENUM || $type == $FORCED_BINARY)
+                    ($type == $ENUM)
 
                   : # A match table should match everything if its method
                     # shows it should
@@ -17206,12 +17532,14 @@ sub write_all_tables() {
                         }
                     }
                     else {
-                        if (exists ($loose_property_name_of{$alias_standard}))
-                        {
-                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $loose_property_name_of{$alias_standard}.  Old name is retained");
+                        my $hash_ref = ($alias_standard =~ /^_/)
+                                       ? \%strict_property_name_of
+                                       : \%loose_property_name_of;
+                        if (exists $hash_ref->{$alias_standard}) {
+                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $hash_ref->{$alias_standard}.  Old name is retained");
                         }
                         else {
-                            $loose_property_name_of{$alias_standard}
+                            $hash_ref->{$alias_standard}
                                                 = $standard_property_name;
                         }
 
@@ -17462,7 +17790,7 @@ sub generate_tests($$$$$) {
     my @output;
     # Create a complete set of tests, with complements.
     if (defined $valid_code) {
-	push @output, <<"EOC"
+        push @output, <<"EOC"
 Expect(1, $valid_code, '\\p{$name}', $warning);
 Expect(0, $valid_code, '\\p{^$name}', $warning);
 Expect(0, $valid_code, '\\P{$name}', $warning);
@@ -17470,7 +17798,7 @@ Expect(1, $valid_code, '\\P{^$name}', $warning);
 EOC
     }
     if (defined $invalid_code) {
-	push @output, <<"EOC"
+        push @output, <<"EOC"
 Expect(0, $invalid_code, '\\p{$name}', $warning);
 Expect(1, $invalid_code, '\\p{^$name}', $warning);
 Expect(1, $invalid_code, '\\P{$name}', $warning);
@@ -17708,8 +18036,10 @@ sub make_property_test_script() {
             # Test each possible combination of the property's aliases with
             # the table's.  If this gets to be too many, could do what is done
             # in the set_final_comment() for Tables
-            my @table_aliases = $table->aliases;
-            my @property_aliases = $table->property->aliases;
+            my @table_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table->aliases;
+            next unless @table_aliases;
+            my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table->property->aliases;
+            next unless @property_aliases;
 
             # Every property can be optionally be prefixed by 'Is_', so test
             # that those work, by creating such a new alias for each
@@ -17941,84 +18271,97 @@ END
     return;
 }
 
+# Skip reasons, so will be exact same text and hence the files with each
+# reason will get grouped together in perluniprops.
+my $Documentation = "Documentation";
+my $Indic_Skip
+            = "Provisional; for the analysis and processing of Indic scripts";
+my $Validation = "Validation Tests";
+my $Validation_Documentation = "Documentation of validation Tests";
+
 # This is a list of the input files and how to handle them.  The files are
 # processed in their order in this list.  Some reordering is possible if
-# desired, but the v0 files should be first, and the extracted before the
-# others except DAge.txt (as data in an extracted file can be over-ridden by
-# the non-extracted.  Some other files depend on data derived from an earlier
-# file, like UnicodeData requires data from Jamo, and the case changing and
-# folding requires data from Unicode.  Mostly, it is safest to order by first
-# version releases in (except the Jamo).  DAge.txt is read before the
-# extracted ones because of the rarely used feature $compare_versions.  In the
-# unlikely event that there were ever an extracted file that contained the Age
-# property information, it would have to go in front of DAge.
+# desired, but the PropertyAliases and PropValueAliases files should be first,
+# and the extracted before the others except DAge.txt (as data in an extracted
+# file can be over-ridden by the non-extracted.  Some other files depend on
+# data derived from an earlier file, like UnicodeData requires data from Jamo,
+# and the case changing and folding requires data from Unicode.  Mostly, it is
+# safest to order by first version releases in (except the Jamo).  DAge.txt is
+# read before the extracted ones because of the rarely used feature
+# $compare_versions.  In the unlikely event that there were ever an extracted
+# file that contained the Age property information, it would have to go in
+# front of DAge.
 #
 # The version strings allow the program to know whether to expect a file or
 # not, but if a file exists in the directory, it will be processed, even if it
 # is in a version earlier than expected, so you can copy files from a later
 # release into an earlier release's directory.
 my @input_file_objects = (
-    Input_file->new('PropertyAliases.txt', v0,
+    Input_file->new('PropertyAliases.txt', v3.2,
                     Handler => \&process_PropertyAliases,
-                    ),
+                    Early => [ \&substitute_PropertyAliases ],
+                    Required_Even_in_Debug_Skip => 1,
+                   ),
     Input_file->new(undef, v0,  # No file associated with this
                     Progress_Message => 'Finishing property setup',
                     Handler => \&finish_property_setup,
-                    ),
-    Input_file->new('PropValueAliases.txt', v0,
+                   ),
+    Input_file->new('PropValueAliases.txt', v3.2,
                      Handler => \&process_PropValueAliases,
+                     Early => [ \&substitute_PropValueAliases ],
                      Has_Missings_Defaults => $NOT_IGNORED,
-                     ),
+                     Required_Even_in_Debug_Skip => 1,
+                    ),
     Input_file->new('DAge.txt', v3.2.0,
                     Has_Missings_Defaults => $NOT_IGNORED,
                     Property => 'Age'
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DGeneralCategory.txt", v3.1.0,
                     Property => 'General_Category',
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DCombiningClass.txt", v3.1.0,
                     Property => 'Canonical_Combining_Class',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DNumType.txt", v3.1.0,
                     Property => 'Numeric_Type',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DEastAsianWidth.txt", v3.1.0,
                     Property => 'East_Asian_Width',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DLineBreak.txt", v3.1.0,
                     Property => 'Line_Break',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DBidiClass.txt", v3.1.1,
                     Property => 'Bidi_Class',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DDecompositionType.txt", v3.1.0,
                     Property => 'Decomposition_Type',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DBinaryProperties.txt", v3.1.0),
     Input_file->new("${EXTRACTED}DNumValues.txt", v3.1.0,
                     Property => 'Numeric_Value',
                     Each_Line_Handler => \&filter_numeric_value_line,
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new("${EXTRACTED}DJoinGroup.txt", v3.1.0,
                     Property => 'Joining_Group',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
 
     Input_file->new("${EXTRACTED}DJoinType.txt", v3.1.0,
                     Property => 'Joining_Type',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new('Jamo.txt', v2.0.0,
                     Property => 'Jamo_Short_Name',
                     Each_Line_Handler => \&filter_jamo_line,
-                    ),
+                   ),
     Input_file->new('UnicodeData.txt', v1.1.5,
                     Pre_Handler => \&setup_UnicodeData,
 
@@ -18053,7 +18396,12 @@ my @input_file_objects = (
                                             \&filter_UnicodeData_line,
                                          ],
                     EOF_Handler => \&EOF_UnicodeData,
-                    ),
+                   ),
+    Input_file->new('CJKXREF.TXT', v1.1.5,
+                    Withdrawn => v2.0.0,
+                    Skip => 'Gives the mapping of CJK code points '
+                          . 'between Unicode and various other standards',
+                   ),
     Input_file->new('ArabicShaping.txt', v2.0.0,
                     Each_Line_Handler =>
                         ($v_version lt 4.1.0)
@@ -18063,29 +18411,46 @@ my @input_file_objects = (
                     # not used by Perl
                     Properties => [ '<ignored>', 'Joining_Type', 'Joining_Group' ],
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new('Blocks.txt', v2.0.0,
                     Property => 'Block',
                     Has_Missings_Defaults => $NOT_IGNORED,
                     Each_Line_Handler => \&filter_blocks_lines
-                    ),
+                   ),
+    Input_file->new('Index.txt', v2.0.0,
+                    Skip => 'Alphabetical index of Unicode characters',
+                   ),
+    Input_file->new('NamesList.txt', v2.0.0,
+                    Skip => 'Annotated list of characters',
+                   ),
     Input_file->new('PropList.txt', v2.0.0,
                     Each_Line_Handler => (($v_version lt v3.1.0)
                                             ? \&filter_old_style_proplist
                                             : undef),
-                    ),
+                   ),
+    Input_file->new('Props.txt', v2.0.0,
+                    Withdrawn => v3.0.0,
+                    Skip => 'A subset of F<PropList.txt> (which is used instead)',
+                   ),
+    Input_file->new('ReadMe.txt', v2.0.0,
+                    Skip => $Documentation,
+                   ),
     Input_file->new('Unihan.txt', v2.0.0,
+                    Withdrawn => v5.2.0,
+                    Construction_Time_Handler => \&construct_unihan,
                     Pre_Handler => \&setup_unihan,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'Unicode_Radical_Stroke'
+                                ],
                     Each_Line_Handler => \&filter_unihan_line,
-                        ),
+                   ),
     Input_file->new('SpecialCasing.txt', v2.1.8,
                     Each_Line_Handler => ($v_version eq 2.1.8)
                                          ? \&filter_2_1_8_special_casing_line
                                          : \&filter_special_casing_line,
                     Pre_Handler => \&setup_special_casing,
                     Has_Missings_Defaults => $IGNORED,
-                    ),
+                   ),
     Input_file->new(
                     'LineBreak.txt', v3.0.0,
                     Has_Missings_Defaults => $NOT_IGNORED,
@@ -18094,7 +18459,7 @@ my @input_file_objects = (
                     Each_Line_Handler => (($v_version lt v3.1.0)
                                         ? \&filter_early_ea_lb
                                         : undef),
-                    ),
+                   ),
     Input_file->new('EastAsianWidth.txt', v3.0.0,
                     Property => 'East_Asian_Width',
                     Has_Missings_Defaults => $NOT_IGNORED,
@@ -18102,10 +18467,14 @@ my @input_file_objects = (
                     Each_Line_Handler => (($v_version lt v3.1.0)
                                         ? \&filter_early_ea_lb
                                         : undef),
-                    ),
+                   ),
     Input_file->new('CompositionExclusions.txt', v3.0.0,
                     Property => 'Composition_Exclusion',
-                    ),
+                   ),
+    Input_file->new('UnicodeData.html', v3.0.0,
+                    Withdrawn => v4.0.1,
+                    Skip => $Documentation,
+                   ),
     Input_file->new('BidiMirroring.txt', v3.0.1,
                     Property => 'Bidi_Mirroring_Glyph',
                     Has_Missings_Defaults => ($v_version lt v6.2.0)
@@ -18114,12 +18483,15 @@ my @input_file_objects = (
                                               # anything to us, we will use the
                                               # null string
                                               : $IGNORED,
-
-                    ),
-    Input_file->new("NormTest.txt", v3.0.0,
-                     Handler => \&process_NormalizationsTest,
-                     Skip => ($make_norm_test_script) ? 0 : 'Validation Tests',
-                    ),
+                   ),
+    Input_file->new('NamesList.html', v3.0.0,
+                    Skip => 'Describes the format and contents of '
+                          . 'F<NamesList.txt>',
+                   ),
+    Input_file->new('UnicodeCharacterDatabase.html', v3.0.0,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
     Input_file->new('CaseFolding.txt', v3.0.1,
                     Pre_Handler => \&setup_case_folding,
                     Each_Line_Handler =>
@@ -18129,105 +18501,222 @@ my @input_file_objects = (
                            \&filter_case_folding_line
                         ],
                     Has_Missings_Defaults => $IGNORED,
-                    ),
+                   ),
+    Input_file->new("NormTest.txt", v3.0.1,
+                     Handler => \&process_NormalizationsTest,
+                     Skip => ($make_norm_test_script) ? 0 : $Validation,
+                   ),
     Input_file->new('DCoreProperties.txt', v3.1.0,
                     # 5.2 changed this file
                     Has_Missings_Defaults => (($v_version ge v5.2.0)
                                             ? $NOT_IGNORED
                                             : $NO_DEFAULTS),
-                    ),
+                   ),
+    Input_file->new('DProperties.html', v3.1.0,
+                    Withdrawn => v3.2.0,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('PropList.html', v3.1.0,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
     Input_file->new('Scripts.txt', v3.1.0,
                     Property => 'Script',
+                    Each_Line_Handler => (($v_version le v4.0.0)
+                                          ? \&filter_all_caps_script_names
+                                          : undef),
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new('DNormalizationProps.txt', v3.1.0,
                     Has_Missings_Defaults => $NOT_IGNORED,
                     Each_Line_Handler => (($v_version lt v4.0.1)
                                       ? \&filter_old_style_normalization_lines
                                       : undef),
-                    ),
-    Input_file->new('HangulSyllableType.txt', v0,
+                   ),
+    Input_file->new('DerivedProperties.html', v3.1.1,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('HangulSyllableType.txt', v4.0,
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    Property => 'Hangul_Syllable_Type',
-                    Pre_Handler => ($v_version lt v4.0.0)
-                                   ? \&generate_hst
-                                   : undef,
-                    ),
+                    Early => [ \&generate_hst, 'Hangul_Syllable_Type' ],
+                    Property => 'Hangul_Syllable_Type'
+                   ),
+    Input_file->new('NormalizationCorrections.txt', v3.2.0,
+                     # This documents the cumulative fixes to erroneous
+                     # normalizations in earlier Unicode versions.  Its main
+                     # purpose is so that someone running on an earlier
+                     # version can use this file to override what got
+                     # published in that earlier release.  It would be easy
+                     # for mktables to handle this file.  But all the
+                     # corrections in it should already be in the other files
+                     # for the release it is.  To get it to actually mean
+                     # something useful, someone would have to be using an
+                     # earlier Unicode release, and copy it into the directory
+                     # for that release and recomplile.  So far there has been
+                     # no demand to do that, so this hasn't been implemented.
+                    Skip => 'Documentation of corrections already '
+                          . 'incorporated into the Unicode data base',
+                   ),
+    Input_file->new('StandardizedVariants.html', v3.2.0,
+                    Skip => 'Provides a visual display of the standard '
+                          . 'variant sequences derived from '
+                          . 'F<StandardizedVariants.txt>.',
+                        # I don't know why the html came earlier than the
+                        # .txt, but both are skipped anyway, so it doesn't
+                        # matter.
+                   ),
+    Input_file->new('StandardizedVariants.txt', v4.0.0,
+                    Skip => 'Certain glyph variations for character display '
+                          . 'are standardized.  This lists the non-Unihan '
+                          . 'ones; the Unihan ones are also not used by '
+                          . 'Perl, and are in a separate Unicode data base '
+                          . 'L<http://www.unicode.org/ivd>',
+                   ),
+    Input_file->new('UCD.html', v4.0.0,
+                    Withdrawn => v5.2,
+                    Skip => $Documentation,
+                   ),
     Input_file->new("$AUXILIARY/WordBreakProperty.txt", v4.1.0,
+                    Early => [ "WBsubst.txt", '_Perl_WB', 'ALetter' ],
                     Property => 'Word_Break',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
-    Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v0,
+                   ),
+    Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v4.1,
+                    Early => [ \&generate_GCB, '_Perl_GCB' ],
                     Property => 'Grapheme_Cluster_Break',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    Pre_Handler => ($v_version lt v4.1.0)
-                                   ? \&generate_GCB
-                                   : undef,
-                    ),
+                   ),
     Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
                     Handler => \&process_GCB_test,
-                    ),
-    Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
-                    Skip => 'Validation Tests',
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/GraphemeBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
     Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
                     Handler => \&process_SB_test,
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/SentenceBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
     Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
                     Handler => \&process_WB_test,
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/WordBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
     Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
                     Property => 'Sentence_Break',
+                    Early => [ "SBsubst.txt", '_Perl_SB', 'OLetter' ],
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
     Input_file->new('NamedSequences.txt', v4.1.0,
                     Handler => \&process_NamedSequences
-                    ),
-    Input_file->new('NameAliases.txt', v0,
+                   ),
+    Input_file->new('Unihan.html', v4.1.0,
+                    Withdrawn => v5.2,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('NameAliases.txt', v5.0,
                     Property => 'Name_Alias',
-                    Pre_Handler => ($v_version le v6.0.0)
-                                   ? \&setup_early_name_alias
-                                   : undef,
                     Each_Line_Handler => ($v_version le v6.0.0)
                                    ? \&filter_early_version_name_alias_line
                                    : \&filter_later_version_name_alias_line,
-                    ),
+                   ),
+        # NameAliases.txt came along in v5.0.  The above constructor handles
+        # this.  But until 6.1, it was lacking some information needed by core
+        # perl.  The constructor below handles that.  It is either a kludge or
+        # clever, depending on your point of view.  The 'Withdrawn' parameter
+        # indicates not to use it at all starting in 6.1 (so the above
+        # constructor applies), and the 'v6.1' parameter indicates to use the
+        # Early parameter before 6.1.  Therefore 'Early" is always used,
+        # yielding the internal-only property '_Perl_Name_Alias', which it
+        # gets from a NameAliases.txt from 6.1 or later stored in
+        # N_Asubst.txt.  In combination with the above constructor,
+        # 'Name_Alias' is publicly accessible starting with v5.0, and the
+        # better 6.1 version is accessible to perl core in all releases.
+    Input_file->new("NameAliases.txt", v6.1,
+                    Withdrawn => v6.1,
+                    Early => [ "N_Asubst.txt", '_Perl_Name_Alias', "" ],
+                    Property => 'Name_Alias',
+                    EOF_Handler => \&fixup_early_perl_name_alias,
+                    Each_Line_Handler =>
+                                       \&filter_later_version_name_alias_line,
+                   ),
+    Input_file->new('NamedSqProv.txt', v5.0.0,
+                    Skip => 'Named sequences proposed for inclusion in a '
+                          . 'later version of the Unicode Standard; if you '
+                          . 'need them now, you can append this file to '
+                          . 'F<NamedSequences.txt> and recompile perl',
+                   ),
+    Input_file->new("$AUXILIARY/LBTest.txt", v5.1.0,
+                    Skip => $Validation,
+                   ),
+    Input_file->new("$AUXILIARY/LineBreakTest.html", v5.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
     Input_file->new("BidiTest.txt", v5.2.0,
-                    Skip => 'Validation Tests',
-                    ),
+                    Skip => $Validation,
+                   ),
     Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanDataDictionaryLike.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanIRGSources.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'kCompatibilityVariant',
+                                  'kIICore',
+                                  'kIRG_GSource',
+                                  'kIRG_HSource',
+                                  'kIRG_JSource',
+                                  'kIRG_KPSource',
+                                  'kIRG_MSource',
+                                  'kIRG_KSource',
+                                  'kIRG_TSource',
+                                  'kIRG_USource',
+                                  'kIRG_VSource',
+                               ],
                     Pre_Handler => \&setup_unihan,
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanNumericValues.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'kAccountingNumeric',
+                                  'kOtherNumeric',
+                                  'kPrimaryNumeric',
+                                ],
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanOtherMappings.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanRadicalStrokeCounts.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'Unicode_Radical_Stroke'
+                                ],
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanReadings.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
     Input_file->new('UnihanVariants.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                     Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
+    Input_file->new('CJKRadicals.txt', v5.2.0,
+                    Skip => 'Maps the kRSUnicode property values to '
+                          . 'corresponding code points',
+                   ),
+    Input_file->new('EmojiSources.txt', v6.0.0,
+                    Skip => 'Maps certain Unicode code points to their '
+                          . 'legacy Japanese cell-phone values',
+                   ),
     Input_file->new('ScriptExtensions.txt', v6.0.0,
                     Property => 'Script_Extensions',
                     Pre_Handler => \&setup_script_extensions,
@@ -18235,39 +18724,74 @@ my @input_file_objects = (
                     Has_Missings_Defaults => (($v_version le v6.0.0)
                                             ? $NO_DEFAULTS
                                             : $IGNORED),
-                    ),
-    # The two Indic files are actually available starting in v6.0.0, but their
-    # property values are missing from PropValueAliases.txt in that release,
-    # so that further work would have to be done to get them to work properly
-    # for that release.
-    Input_file->new('IndicMatraCategory.txt', v6.1.0,
+                   ),
+    # These two Indic files are actually not usable as-is until 6.1.0,
+    # because their property values are missing from PropValueAliases.txt
+    # until that release, so that further work would have to be done to get
+    # them to work properly, which isn't worth it because of them being
+    # provisional.
+    Input_file->new('IndicMatraCategory.txt', v6.0.0,
+                    Withdrawn => v8.0.0,
                     Property => 'Indic_Matra_Category',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    Skip => "Withdrawn by Unicode while still provisional",
-                    ),
-    Input_file->new('IndicSyllabicCategory.txt', v6.1.0,
+                    Skip => $Indic_Skip,
+                   ),
+    Input_file->new('IndicSyllabicCategory.txt', v6.0.0,
                     Property => 'Indic_Syllabic_Category',
                     Has_Missings_Defaults => $NOT_IGNORED,
                     Skip => (($v_version lt v8.0.0)
-                              ? "Provisional; for the analysis and processing of Indic scripts"
+                              ? $Indic_Skip
                               : 0),
-                    ),
+                   ),
+    Input_file->new('USourceData.txt', v6.2.0,
+                    Skip => 'Documentation of status and cross reference of '
+                          . 'proposals for encoding by Unicode of Unihan '
+                          . 'characters',
+                   ),
+    Input_file->new('USourceGlyphs.pdf', v6.2.0,
+                    Skip => 'Pictures of the characters in F<USourceData.txt>',
+                   ),
     Input_file->new('BidiBrackets.txt', v6.3.0,
-                    Properties => [ 'Bidi_Paired_Bracket', 'Bidi_Paired_Bracket_Type' ],
+                    Properties => [ 'Bidi_Paired_Bracket',
+                                    'Bidi_Paired_Bracket_Type'
+                                  ],
                     Has_Missings_Defaults => $NO_DEFAULTS,
-                    ),
+                   ),
     Input_file->new("BidiCharacterTest.txt", v6.3.0,
-                    Skip => 'Validation Tests',
-                    ),
+                    Skip => $Validation,
+                   ),
     Input_file->new('IndicPositionalCategory.txt', v8.0.0,
                     Property => 'Indic_Positional_Category',
                     Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
 );
 
 # End of all the preliminaries.
 # Do it...
 
+if (@missing_early_files) {
+    print simple_fold(join_lines(<<END
+
+The compilation cannot be completed because one or more required input files,
+listed below, are missing.  This is because you are compiling Unicode version
+$string_version, which predates the existence of these file(s).  To fully
+function, perl needs the data that these files would have contained if they
+had been in this release.  To work around this, create copies of later
+versions of the missing files in the directory containing '$0'.  (Perl will
+make the necessary adjustments to the data to compensate for it not being the
+same version as is being compiled.)  The files are available from unicode.org,
+via either ftp or http.  If using http, they will be under
+www.unicode.org/versions/.  Below are listed the source file name of each
+missing file, the Unicode version to copy it from, and the name to store it
+as.  (Note that the listed source file name may not be exactly the one that
+Unicode calls it.  If you don't find it, you can look it up in 'README.perl'
+to get the correct name.)
+END
+    ));
+    print simple_fold(join_lines("\n$_")) for @missing_early_files;
+    exit 2;
+}
+
 if ($compare_versions) {
     Carp::my_carp(<<END
 Warning.  \$compare_versions is set.  Output is not suitable for production
@@ -18276,17 +18800,13 @@ END
 }
 
 # Put into %potential_files a list of all the files in the directory structure
-# that could be inputs to this program, excluding those that we should ignore.
-# Use absolute file names because it makes it easier across machine types.
-my @ignored_files_full_names = map { File::Spec->rel2abs(
-                                     internal_file_to_platform($_))
-                                } keys %ignored_files;
+# that could be inputs to this program
 File::Find::find({
     wanted=>sub {
-        return unless /\.txt$/i;  # Some platforms change the name's case
+        return unless / \. ( txt | htm l? ) $ /xi;  # Some platforms change the
+                                                    # name's case
         my $full = lc(File::Spec->rel2abs($_));
-        $potential_files{$full} = 1
-                    if ! grep { $full eq lc($_) } @ignored_files_full_names;
+        $potential_files{$full} = 1;
         return;
     }
 }, File::Spec->curdir());
@@ -18344,8 +18864,7 @@ else {
             # The paths are stored with relative names, and with '/' as the
             # delimiter; convert to absolute on this machine
             my $full = lc(File::Spec->rel2abs(internal_file_to_platform($input)));
-            $potential_files{lc $full} = 1
-                if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
+            $potential_files{lc $full} = 1;
         }
     }
 
@@ -18422,7 +18941,7 @@ my @input_files = qw(version Makefile);
 foreach my $object (@input_file_objects) {
     my $file = $object->file;
     next if ! defined $file;    # Not all objects have files
-    next if $object->optional && ! -e $file;
+    next if defined $object->skip;;
     push @input_files,  $file;
 }
 
@@ -18444,7 +18963,6 @@ foreach my $in (@input_files) {
         my ($volume, $directories, $file ) = File::Spec->splitpath($in);
         $directories =~ s;/$;;;     # Can have extraneous trailing '/'
         my @directories = File::Spec->splitdir($directories);
-        my $base = $file =~ s/\.txt$//;
         construct_filename($file, 'mutable', \@directories);
     }
 }
@@ -18593,6 +19111,16 @@ use warnings;
 my $Tests = 0;
 my $Fails = 0;
 
+# loc_tools.pl requires this function to be defined
+sub ok($@) {
+    my ($pass, @msg) = @_;
+    print "not " unless $pass;
+    print "ok ";
+    print ++$Tests;
+    print " - ", join "", @msg if @msg;
+    print "\n";
+}
+
 sub Expect($$$$) {
     my $expected = shift;
     my $ord = shift;
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 1ba73b2874..51137b5623 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -404,7 +404,11 @@ sub _loose_name ($) {
                 # If didn't find it, try again with looser matching by editing
                 # out the applicable characters on the rhs and looking up
                 # again.
+                my $strict_property_and_table;
                 if (! defined $file) {
+
+                    # This isn't used unless the name begins with 'to'
+                    $strict_property_and_table = $property_and_table =~  s/^to//r;
                     $table = _loose_name($table);
                     $property_and_table = "$prefix$table";
                     print STDERR __LINE__, ": $property_and_table\n" if DEBUG;
@@ -443,61 +447,51 @@ sub _loose_name ($) {
                 ## is to use Unicode::UCD.
                 ##
                 # Only check if caller wants non-binary
-                my $retried = 0;
-                if ($minbits != 1 && $property_and_table =~ s/^to//) {{
+                if ($minbits != 1) {
+                    if ($property_and_table =~ s/^to//) {
                     # Look input up in list of properties for which we have
-                    # mapping files.
-                    if (defined ($file =
+                    # mapping files.  First do it with the strict approach
+                        if (defined ($file = $utf8::strict_property_to_file_of{
+                                                    $strict_property_and_table}))
+                        {
+                            $type = $utf8::file_to_swash_name{$file};
+                            print STDERR __LINE__, ": type set to $type\n"
+                                                                        if DEBUG;
+                            $file = "$unicore_dir/$file.pl";
+                            last GETFILE;
+                        }
+                        elsif (defined ($file =
                           $utf8::loose_property_to_file_of{$property_and_table}))
-                    {
-                        $type = $utf8::file_to_swash_name{$file};
-                        print STDERR __LINE__, ": type set to $type\n" if DEBUG;
-                        $file = "$unicore_dir/$file.pl";
-                        last GETFILE;
-                    }   # If that fails see if there is a corresponding binary
-                        # property file
-                    elsif (defined ($file =
-                                   $utf8::loose_to_file_of{$property_and_table}))
-                    {
+                        {
+                            $type = $utf8::file_to_swash_name{$file};
+                            print STDERR __LINE__, ": type set to $type\n"
+                                                                        if DEBUG;
+                            $file = "$unicore_dir/$file.pl";
+                            last GETFILE;
+                        }   # If that fails see if there is a corresponding binary
+                            # property file
+                        elsif (defined ($file =
+                                    $utf8::loose_to_file_of{$property_and_table}))
+                        {
 
-                        # Here, there is no map file for the property we are
-                        # trying to get the map of, but this is a binary
-                        # property, and there is a file for it that can easily
-                        # be translated to a mapping.
-
-                        # In the case of properties that are forced to binary,
-                        # they are a combination.  We return the actual
-                        # mapping instead of the binary.  If the input is
-                        # something like 'Tocjkkiicore', it will be found in
-                        # %loose_property_to_file_of above as => 'To/kIICore'.
-                        # But the form like ToIskiicore won't be.  To fix
-                        # this, it was easiest to do it here.  These
-                        # properties are the complements of the default
-                        # property, so there is an entry in %loose_to_file_of
-                        # that is 'iskiicore' => '!kIICore/N', If we find such
-                        # an entry, strip off things and try again, which
-                        # should find the entry in %loose_property_to_file_of.
-                        # Actual binary properties that are of this form, such
-                        # as this entry: 'ishrkt' => '!Perl/Any' will also be
-                        # retried, but won't be in %loose_property_to_file_of,
-                        # and instead the next time through, it will find
-                        # 'hrkt' => '!Perl/Any' and proceed.
-                        redo if ! $retried
-                                && $file =~ /^!/
-                                && $property_and_table =~ s/^is//;
-
-                        # This is a binary property.  Setting this here causes
-                        # it to be stored as such in the cache, so if someone
-                        # comes along later looking for just a binary, they
-                        # get it.
-                        $minbits = 1;
-
-                        # The 0+ makes sure is numeric
-                        $invert_it = 0 + $file =~ s/!//;
-                        $file = "$unicore_dir/lib/$file.pl" unless $file =~ m!^#/!;
-                        last GETFILE;
+                            # Here, there is no map file for the property we
+                            # are trying to get the map of, but this is a
+                            # binary property, and there is a file for it that
+                            # can easily be translated to a mapping, so use
+                            # that, treating this as a binary property.
+                            # Setting 'minbits' here causes it to be stored as
+                            # such in the cache, so if someone comes along
+                            # later looking for just a binary, they get it.
+                            $minbits = 1;
+
+                            # The 0+ makes sure is numeric
+                            $invert_it = 0 + $file =~ s/!//;
+                            $file = "$unicore_dir/lib/$file.pl"
+                                                         unless $file =~ m!^#/!;
+                            last GETFILE;
+                        }
                     }
-                } }
+                }
 
                 ##
                 ## If we reach this line, it's because we couldn't figure
diff --git a/perl.c b/perl.c
index cbb66e0730..3cdae043f1 100644
--- a/perl.c
+++ b/perl.c
@@ -389,9 +389,9 @@ perl_construct(pTHXx)
     PL_XPosix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(VertSpace_invlist);
     PL_XPosix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(XPosixWord_invlist);
     PL_XPosix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(XPosixXDigit_invlist);
-    PL_GCB_invlist = _new_invlist_C_array(Grapheme_Cluster_Break_invlist);
-    PL_SB_invlist = _new_invlist_C_array(Sentence_Break_invlist);
-    PL_WB_invlist = _new_invlist_C_array(Word_Break_invlist);
+    PL_GCB_invlist = _new_invlist_C_array(_Perl_GCB_invlist);
+    PL_SB_invlist = _new_invlist_C_array(_Perl_SB_invlist);
+    PL_WB_invlist = _new_invlist_C_array(_Perl_WB_invlist);
 
     ENTER;
 }
diff --git a/perl.h b/perl.h
index 9d57450cba..bb4dac4184 100644
--- a/perl.h
+++ b/perl.h
@@ -4779,7 +4779,14 @@ EXTCONST  unsigned char PL_mod_latin1_uc[] = {
 	192,	193,	194,	195,	196,	197,	198,	199,
 	200,	201,	202,	203,	204,	205,	206,	207,
 	208,	209,	210,	211,	212,	213,	214,	215,
-	216,	217,	218,	219,	220,	221,	222,	255 /*sharp s*/,	
+	216,	217,	218,	219,	220,	221,	222,
+#if    UNICODE_MAJOR_VERSION > 2                                        \
+   || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1		\
+                                  && UNICODE_DOT_DOT_VERSION >= 8)
+	                                                        255 /*sharp s*/,
+#else   /* uc() is itself in early unicode */
+	                                                        223,
+#endif
 	224-32,	225-32,	226-32,	227-32,	228-32,	229-32,	230-32,	231-32,
 	232-32,	233-32,	234-32,	235-32,	236-32,	237-32,	238-32,	239-32,
 	240-32,	241-32,	242-32,	243-32,	244-32,	245-32,	246-32,	247,
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 06f2ee271f..1580b43471 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -338,7 +338,10 @@ files in F<ext/> and F<lib/> are best summarized in L</Modules and Pragmata>.
 
 =item *
 
-XXX
+Perl can again be compiled with any Unicode version.  This used to
+(mostly) work, but was lost in v5.18 through v5.20.  The property
+C<Name_Alias> did not exist prior to Unicode 5.0.  L<Unicode::UCD>
+incorrectly said it did.  This has been fixed.
 
 =back
 
diff --git a/pp.c b/pp.c
index f750fce304..a963478cec 100644
--- a/pp.c
+++ b/pp.c
@@ -4096,6 +4096,9 @@ PP(pp_uc)
 		     * just above.  
 		     * Use the source to distinguish between the three cases */
 
+#if    UNICODE_MAJOR_VERSION > 2                                        \
+   || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1		\
+                                  && UNICODE_DOT_DOT_VERSION >= 8)
 		    if (*s == LATIN_SMALL_LETTER_SHARP_S) {
 
 			/* uc() of this requires 2 characters, but they are
@@ -4108,6 +4111,7 @@ PP(pp_uc)
 			*d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
 			continue;   /* Back to the tight loop; still in ASCII */
 		    }
+#endif
 
 		    /* The other two special handling characters have their
 		     * upper cases outside the latin1 range, hence need to be
@@ -4401,8 +4405,14 @@ PP(pp_fc)
     const U8 *send;
     U8 *d;
     U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
     const bool full_folding = TRUE; /* This variable is here so we can easily
                                        move to more generality later */
+#else
+    const bool full_folding = FALSE;
+#endif
     const U8 flags = ( full_folding      ? FOLD_FLAGS_FULL   : 0 )
 #ifdef USE_LOCALE_CTYPE
                    | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
diff --git a/regcharclass.h b/regcharclass.h
index c59ea78b3e..f7cd2fde5b 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -168,7 +168,7 @@
 /*
 	NONCHAR: Non character code points
 
-	\p{Nchar}
+	\p{_Perl_Nchar}
 */
 /*** GENERATED CODE ***/
 #define is_NONCHAR_utf8(s)                                                  \
@@ -185,7 +185,7 @@
 /*
 	SURROGATE: Surrogate characters
 
-	\p{Gc=Cs}
+	\p{_Perl_Surrogate}
 */
 /*** GENERATED CODE ***/
 #define is_SURROGATE_utf8(s)                                                \
@@ -591,7 +591,7 @@
 /*
 	PATWS: pattern white space
 
-	\p{PatWS}
+	\p{_Perl_PatWS}
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
@@ -761,7 +761,7 @@
 /*
 	NONCHAR: Non character code points
 
-	\p{Nchar}
+	\p{_Perl_Nchar}
 */
 /*** GENERATED CODE ***/
 #define is_NONCHAR_utf8(s)                                                  \
@@ -782,7 +782,7 @@
 /*
 	SURROGATE: Surrogate characters
 
-	\p{Gc=Cs}
+	\p{_Perl_Surrogate}
 */
 /*** GENERATED CODE ***/
 #define is_SURROGATE_utf8(s)                                                \
@@ -1205,7 +1205,7 @@
 /*
 	PATWS: pattern white space
 
-	\p{PatWS}
+	\p{_Perl_PatWS}
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
@@ -1373,7 +1373,7 @@
 /*
 	NONCHAR: Non character code points
 
-	\p{Nchar}
+	\p{_Perl_Nchar}
 */
 /*** GENERATED CODE ***/
 #define is_NONCHAR_utf8(s)                                                  \
@@ -1394,7 +1394,7 @@
 /*
 	SURROGATE: Surrogate characters
 
-	\p{Gc=Cs}
+	\p{_Perl_Surrogate}
 */
 /*** GENERATED CODE ***/
 #define is_SURROGATE_utf8(s)                                                \
@@ -1825,7 +1825,7 @@
 /*
 	PATWS: pattern white space
 
-	\p{PatWS}
+	\p{_Perl_PatWS}
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
@@ -1993,7 +1993,7 @@
 /*
 	NONCHAR: Non character code points
 
-	\p{Nchar}
+	\p{_Perl_Nchar}
 */
 /*** GENERATED CODE ***/
 #define is_NONCHAR_utf8(s)                                                  \
@@ -2014,7 +2014,7 @@
 /*
 	SURROGATE: Surrogate characters
 
-	\p{Gc=Cs}
+	\p{_Perl_Surrogate}
 */
 /*** GENERATED CODE ***/
 #define is_SURROGATE_utf8(s)                                                \
@@ -2445,7 +2445,7 @@
 /*
 	PATWS: pattern white space
 
-	\p{PatWS}
+	\p{_Perl_PatWS}
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
@@ -2472,7 +2472,7 @@
 #endif /* H_REGCHARCLASS */
 
 /* Generated from:
- * 083180df694deb1fc173361406c1a75619fb8376403db3a76dc585c1e3951eca lib/Unicode/UCD.pm
+ * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
  * ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
  * 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
  * 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
@@ -2514,9 +2514,9 @@
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * c9326eab8d7861c3543963e555d5b927348f4467c93071db23154dece7619654 lib/unicore/mktables
+ * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
- * 5e47f645eac3a918246254e19c06b604c8ea088cf62da5be84dcb953ef2bf16c regen/regcharclass.pl
- * 206b60035ff0cec9f7d1701937ecf9226a943faa42dfc4827c37306be64ff18e regen/regcharclass_multi_char_folds.pl
+ * d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl
+ * 393f8d882713a3ba227351ad0f00ea4839fda74fcf77dcd1cdf31519925adba5 regen/regcharclass_multi_char_folds.pl
  * ex: set ro: */
diff --git a/regcomp.c b/regcomp.c
index fcaf153ef6..901e2c0846 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3652,6 +3652,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              * this function, we need to flag any occurrences of the sharp s.
              * This character forbids trie formation (because of added
              * complexity) */
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
 	    while (s < s_end) {
                 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
                     OP(scan) = EXACTFA_NO_TRIE;
@@ -3659,7 +3662,6 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     break;
                 }
                 s++;
-                continue;
             }
         }
 	else {
@@ -3705,6 +3707,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                 *min_subtract += len - 1;
                 s += len;
 	    }
+#endif
 	}
     }
 
@@ -11505,8 +11508,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
             *character = (U8) code_point;
             len = 1;
         } /* Else is folded non-UTF8 */
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
         else if (LIKELY(code_point != LATIN_SMALL_LETTER_SHARP_S)) {
-
+#else
+        else if (1) {
+#endif
             /* We don't fold any non-UTF8 except possibly the Sharp s  (see
              * comments at join_exact()); */
             *character = (U8) code_point;
@@ -11550,9 +11558,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
     /* A single character node is SIMPLE, except for the special-cased SHARP S
      * under /di. */
     if ((len == 1 || (UTF && len == UNISKIP(code_point)))
-        && (code_point != LATIN_SMALL_LETTER_SHARP_S
-            || ! FOLD || ! DEPENDS_SEMANTICS))
-    {
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
+        && ( code_point != LATIN_SMALL_LETTER_SHARP_S
+            || ! FOLD || ! DEPENDS_SEMANTICS)
+#endif
+    ) {
         *flagp |= SIMPLE;
     }
 
@@ -12650,11 +12662,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 }
                 else /* A regular FOLD code point */
                     if (! ( UTF
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
                         /* See comments for join_exact() as to why we fold this
                          * non-UTF at compile time */
                         || (node_type == EXACTFU
-                            && ender == LATIN_SMALL_LETTER_SHARP_S)))
-                {
+                            && ender == LATIN_SMALL_LETTER_SHARP_S)
+#endif
+                )) {
                     /* Here, are folding and are not UTF-8 encoded; therefore
                      * the character must be in the range 0-255, and is not /l
                      * (Not /l because we already handled these under /l in
@@ -12667,11 +12683,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          * 'ss' */
                         if (maybe_exactfu
                             && (PL_fold[ender] != PL_fold_latin1[ender]
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
                                 || ender == LATIN_SMALL_LETTER_SHARP_S
                                 || (len > 0
                                    && isALPHA_FOLD_EQ(ender, 's')
-                                   && isALPHA_FOLD_EQ(*(s-1), 's'))))
-                        {
+                                   && isALPHA_FOLD_EQ(*(s-1), 's'))
+#endif
+                        )) {
                             maybe_exactfu = FALSE;
                         }
                     }
@@ -14014,9 +14034,30 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
           *invlist = add_cp_to_invlist(*invlist,
                                         LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
             break;
+
+#ifdef LATIN_CAPITAL_LETTER_SHARP_S /* not defined in early Unicode releases */
+
         case LATIN_SMALL_LETTER_SHARP_S:
           *invlist = add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_SHARP_S);
             break;
+
+#endif
+
+#if    UNICODE_MAJOR_VERSION < 3                                        \
+   || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0)
+
+        /* In 3.0 and earlier, U+0130 folded simply to 'i'; and in 3.0.1 so did
+         * U+0131.  */
+        case 'i':
+        case 'I':
+          *invlist =
+             add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+#   if UNICODE_DOT_DOT_VERSION == 1
+          *invlist = add_cp_to_invlist(*invlist, LATIN_SMALL_LETTER_DOTLESS_I);
+#   endif
+            break;
+#endif
+
         default:
             /* Use deprecated warning to increase the chances of this being
              * output */
@@ -14209,6 +14250,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     DEBUG_PARSE("clas");
 
+#if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
+    || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0          \
+                                   && UNICODE_DOT_DOT_VERSION == 0)
+    allow_multi_folds = FALSE;
+#endif
+
     /* Assume we are going to generate an ANYOF node. */
     ret = reganode(pRExC_state,
                    (LOC)
diff --git a/regen/mk_PL_charclass.pl b/regen/mk_PL_charclass.pl
index 4b46bd0542..1cabfc4de9 100644
--- a/regen/mk_PL_charclass.pl
+++ b/regen/mk_PL_charclass.pl
@@ -51,6 +51,7 @@ my @properties = qw(
 
 # Read in the case fold mappings.
 my %folded_closure;
+my %simple_folded_closure;
 my @hex_non_final_folds;
 my @non_latin1_simple_folds;
 my @folds;
@@ -118,8 +119,14 @@ BEGIN { # Have to do this at compile time because using user-defined \p{property
         for my $i (0 .. @folded - 1) {
             my $hex_fold = $folded[$i];
             my $fold = hex $hex_fold;
-            push @{$folded_closure{$fold}}, $from if $fold < 256;
-            push @{$folded_closure{$from}}, $fold if $from < 256;
+            if ($fold < 256) {
+                push @{$folded_closure{$fold}}, $from;
+                push @{$simple_folded_closure{$fold}}, $from if $fold_type ne 'F';
+            }
+            if ($from < 256) {
+                push @{$folded_closure{$from}}, $fold;
+                push @{$simple_folded_closure{$from}}, $fold if $fold_type ne 'F';
+            }
 
             if (($fold_type eq 'C' || $fold_type eq 'S')
                 && ($fold < 256 != $from < 256))
@@ -153,11 +160,16 @@ BEGIN { # Have to do this at compile time because using user-defined \p{property
             push @{$folded_closure{$from}}, @{$folded_closure{$folded}};
         }
     }
+    foreach my $folded (keys %simple_folded_closure) {
+        foreach my $from (grep { $_ < 256 } @{$simple_folded_closure{$folded}}) {
+            push @{$simple_folded_closure{$from}}, @{$simple_folded_closure{$folded}};
+        }
+    }
 
     # We have the single-character folds that cross the 255/256, like KELVIN
     # SIGN => 'k', but we need the closure, so add like 'K' to it
     foreach my $folded (@non_latin1_simple_folds) {
-        foreach my $fold (@{$folded_closure{$folded}}) {
+        foreach my $fold (@{$simple_folded_closure{$folded}}) {
             if ($fold < 256 && ! grep { $fold == $_ } @non_latin1_simple_folds) {
                 push @non_latin1_simple_folds, $fold;
             }
@@ -217,12 +229,18 @@ for my $ord (0..255) {
         } elsif ($name eq 'SPACE') {;
             $re = qr/\p{XPerlSpace}/;
         } elsif ($name eq 'IDFIRST') {
-            $re = qr/[_\p{Alpha}]/;
+            $re = qr/[_\p{XPosixAlpha}]/;
         } elsif ($name eq 'WORDCHAR') {
             $re = qr/\p{XPosixWord}/;
+        } elsif ($name eq 'LOWER') {
+            $re = qr/\p{XPosixLower}/;
+        } elsif ($name eq 'UPPER') {
+            $re = qr/\p{XPosixUpper}/;
         } elsif ($name eq 'ALPHANUMERIC') {
             # Like \w, but no underscore
             $re = qr/\p{Alnum}/;
+        } elsif ($name eq 'ALPHA') {
+            $re = qr/\p{XPosixAlpha}/;
         } elsif ($name eq 'QUOTEMETA') {
             $re = qr/\p{_Perl_Quotemeta}/;
         } elsif ($name eq 'NONLATIN1_FOLD') {
@@ -241,7 +259,7 @@ for my $ord (0..255) {
             use Carp;
             carp $@ if ! defined $re;
         }
-        #print "$ord, $name $property, $re\n";
+        #print STDERR __LINE__, ": $ord, $name $property, $re\n";
         if ($char =~ $re) {  # Add this property if matches
             $bits[$ord] .= '|' if $bits[$ord];
             $bits[$ord] .= "(1U<<_CC_$property)";
@@ -295,10 +313,11 @@ foreach my $charset (get_supported_code_pages()) {
         }
         else {
             use Unicode::UCD qw(prop_invmap);
-            my ($list_ref, $map_ref, $format) = prop_invmap("Name_Alias");
+            my ($list_ref, $map_ref, $format)
+                   = prop_invmap("_Perl_Name_Alias", '_perl_core_internal_ok');
             if ($format !~ /^s/) {
                 use Carp;
-                carp "Unexpected format '$format' for 'Name_Alias";
+                carp "Unexpected format '$format' for '_Perl_Name_Alias";
                 last;
             }
             my $which = Unicode::UCD::search_invlist($list_ref, $ord);
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 42f5b1c925..bc27d0f208 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -176,15 +176,14 @@ sub output_invlist ($$;$) {
     my $charset = shift // "";  # name of character set for comment
 
     die "No inversion list for $name" unless defined $invlist
-                                             && ref $invlist eq 'ARRAY'
-                                             && @$invlist;
+                                             && ref $invlist eq 'ARRAY';
 
     # Output the inversion list $invlist using the name $name for it.
     # It is output in the exact internal form for inversion lists.
 
     # Is the last element of the header 0, or 1 ?
     my $zero_or_one = 0;
-    if ($invlist->[0] != 0) {
+    if (@$invlist && $invlist->[0] != 0) {
         unshift @$invlist, 0;
         $zero_or_one = 1;
     }
@@ -235,8 +234,8 @@ sub output_invmap ($$$$$$$) {
     my $name_prefix;
 
     if ($input_format eq 's') {
-        $prop_name = (prop_aliases($prop_name))[1]; # Get full name
-        my $short_name = (prop_aliases($prop_name))[0];
+        $prop_name = (prop_aliases($prop_name))[1] // $prop_name =~ s/^_Perl_//r; # Get full name
+        my $short_name = (prop_aliases($prop_name))[0] // $prop_name;
             my @enums = prop_values($prop_name);
             if (! @enums) {
                 die "Only enum properties are currently handled; '$prop_name' isn't one";
@@ -267,7 +266,8 @@ sub output_invmap ($$$$$$$) {
                 # Assign a value to each element of the enum.  The default
                 # value always gets 0; the others are arbitrarily assigned.
                 my $enum_val = 0;
-                $default = prop_value_aliases($prop_name, $default);
+                my $canonical_default = prop_value_aliases($prop_name, $default);
+                $default = $canonical_default if defined $canonical_default;
                 $enums{$default} = $enum_val++;
                 for my $enum (@enums) {
                     $enums{$enum} = $enum_val++ unless exists $enums{$enum};
@@ -315,7 +315,9 @@ sub output_invmap ($$$$$$$) {
     # The main body are the scalars passed in to this routine.
     for my $i (0 .. $count - 1) {
         my $element = $invmap->[$i];
-        $element = $name_prefix . prop_value_aliases($prop_name, $element);
+        my $full_element_name = prop_value_aliases($prop_name, $element);
+        $element = $full_element_name if defined $full_element_name;
+        $element = $name_prefix . $element;
         print $out_fh "\t$element";
         print $out_fh "," if $i < $count - 1;
         print $out_fh  "\n";
@@ -353,7 +355,8 @@ sub mk_invlist_from_sorted_cp_list {
 my ($cp_ref, $folds_ref, $format) = prop_invmap("Case_Folding");
 die "Could not find inversion map for Case_Folding" unless defined $format;
 die "Incorrect format '$format' for Case_Folding inversion map"
-                                                    unless $format eq 'al';
+                                                    unless $format eq 'al'
+                                                           || $format eq 'a';
 my @has_multi_char_fold;
 my @is_non_final_fold;
 
@@ -447,9 +450,9 @@ for my $charset (get_supported_code_pages()) {
                              &UpperLatin1
                              _Perl_IDStart
                              _Perl_IDCont
-                             Grapheme_Cluster_Break,EDGE
-                             Word_Break,EDGE,UNKNOWN
-                             Sentence_Break,EDGE
+                             _Perl_GCB,EDGE
+                             _Perl_WB,EDGE,UNKNOWN
+                             _Perl_SB,EDGE
                            )
     ) {
 
@@ -489,20 +492,37 @@ for my $charset (get_supported_code_pages()) {
         else {
             @invlist = prop_invlist($lookup_prop, '_perl_core_internal_ok');
             if (! @invlist) {
-                my ($list_ref, $map_ref, $format, $default);
 
-                ($list_ref, $map_ref, $format, $default)
+                # If couldn't find a non-empty inversion list, see if it is
+                # instead an inversion map
+                my ($list_ref, $map_ref, $format, $default)
                           = prop_invmap($lookup_prop, '_perl_core_internal_ok');
-                die "Could not find inversion list for '$lookup_prop'" unless $list_ref;
-                @invlist = @$list_ref;
-                @invmap = @$map_ref;
-                $map_format = $format;
-                $map_default = $default;
-                $maps_to_code_point = $map_format =~ /x/;
-                $to_adjust = $map_format =~ /a/;
+                if (! $list_ref) {
+                    # An empty return here could mean an unknown property, or
+                    # merely that the original inversion list is empty.  Call
+                    # in scalar context to differentiate
+                    my $count = prop_invlist($lookup_prop,
+                                             '_perl_core_internal_ok');
+                    die "Could not find inversion list for '$lookup_prop'"
+                                                          unless defined $count;
+                }
+                else {
+                    @invlist = @$list_ref;
+                    @invmap = @$map_ref;
+                    $map_format = $format;
+                    $map_default = $default;
+                    $maps_to_code_point = $map_format =~ /x/;
+                    $to_adjust = $map_format =~ /a/;
+                }
             }
         }
-        die "Could not find inversion list for '$lookup_prop'" unless @invlist;
+
+
+        # Short-circuit an empty inversion list.
+        if (! @invlist) {
+            output_invlist($prop_name, \@invlist, $charset);
+            next;
+        }
 
         # Re-order the Unicode code points to native ones for this platform.
         # This is only needed for code points below 256, because native code
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 184437e88f..9115eafeb6 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -610,6 +610,8 @@ sub length_optree {
 
     my $else= ( $opt{else} ||= 0 );
 
+    return $else if $self->{count} == 0;
+
     my $method = $type =~ /generic/ ? 'generic_optree' : 'optree';
     if ($method eq 'optree' && scalar keys %{$self->{size}{$type}} == 1) {
 
@@ -1629,11 +1631,11 @@ REPLACEMENT: Unicode REPLACEMENT CHARACTER
 
 NONCHAR: Non character code points
 => UTF8 :fast
-\p{Nchar}
+\p{_Perl_Nchar}
 
 SURROGATE: Surrogate characters
 => UTF8 :fast
-\p{Gc=Cs}
+\p{_Perl_Surrogate}
 
 # This program was run with this enabled, and the results copied to utf8.h;
 # then this was commented out because it takes so long to figure out these 2
@@ -1695,4 +1697,4 @@ PROBLEMATIC_LOCALE_FOLDEDS_START : The first folded character of folds which are
 
 PATWS: pattern white space
 => generic cp : safe
-\p{PatWS}
+\p{_Perl_PatWS}
diff --git a/regen/regcharclass_multi_char_folds.pl b/regen/regcharclass_multi_char_folds.pl
index dfc8f9f3e5..5ea9d33a25 100644
--- a/regen/regcharclass_multi_char_folds.pl
+++ b/regen/regcharclass_multi_char_folds.pl
@@ -63,6 +63,8 @@ sub multi_char_folds ($) {
                             # multi-char folds; false if just the ones that
                             # are all ascii
 
+    return () if pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) lt v3.0.1;
+
     my ($cp_ref, $folds_ref, $format) = prop_invmap("Case_Folding");
     die "Could not find inversion map for Case_Folding" unless defined $format;
     die "Incorrect format '$format' for Case_Folding inversion map"
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 936c1a8a6c..5004e1d50e 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -3,6 +3,7 @@ use strict;
 use warnings;
 require 'regen/regen_lib.pl';
 require 'regen/charset_translations.pl';
+use Unicode::UCD;
 use charnames qw(:loose);
 
 my $out_fh = open_new('unicode_constants.h', '>',
@@ -14,10 +15,11 @@ print $out_fh <<END;
 #ifndef H_UNICODE_CONSTANTS   /* Guard against nested #includes */
 #define H_UNICODE_CONSTANTS   1
 
-/* This file contains #defines for various Unicode code points.  The values
- * the macros expand to are the native Unicode code point, or all or portions
- * of the UTF-8 encoding for the code point.  In the former case, the macro
- * name has the suffix "_NATIVE"; otherwise, the suffix "_UTF8".
+/* This file contains #defines for the version of Unicode being used and
+ * various Unicode code points.  The values the code point macros expand to
+ * are the native Unicode code point, or all or portions of the UTF-8 encoding
+ * for the code point.  In the former case, the macro name has the suffix
+ * "_NATIVE"; otherwise, the suffix "_UTF8".
  *
  * The macros that have the suffix "_UTF8" may have further suffixes, as
  * follows:
@@ -28,34 +30,18 @@ print $out_fh <<END;
 
 END
 
-# The data are at the end of this file.  A blank line is output as-is.
-# Comments (lines whose first non-blank is a '#') are converted to C-style,
-# though empty comments are converted to blank lines.  Otherwise, each line
-# represents one #define, and begins with either a Unicode character name with
-# the blanks and dashes in it squeezed out or replaced by underscores; or it
-# may be a hexadecimal Unicode code point of the form U+xxxx.  In the latter
-# case, the name will be looked-up to use as the name of the macro.  In either
-# case, the macro name will have suffixes as listed above, and all blanks and
-# dashes will be replaced by underscores.
-#
-# Each line may optionally have one of the following flags on it, separated by
-# white space from the initial token.
-#   string  indicates that the output is to be of the string form
-#           described in the comments above that are placed in the file.
-#   string_skip_ifundef  is the same as 'string', but instead of dying if the
-#           code point doesn't exist, the line is just skipped: no output is
-#           generated for it
-#   first   indicates that the output is to be of the FIRST_BYTE form.
-#   tail    indicates that the output is of the _TAIL form.
-#   native  indicates that the output is the code point, converted to the
-#           platform's native character set if applicable
-#
-# If the code point has no official name, the desired name may be appended
-# after the flag, which will be ignored if there is an official name.
-#
-# This program is used to make it convenient to create compile time constants
-# of UTF-8, and to generate proper EBCDIC as well as ASCII without manually
-# having to figure things out.
+my $version = Unicode::UCD::UnicodeVersion();
+my ($major, $dot, $dotdot) = $version =~ / (.*?) \. (.*?) (?: \. (.*) )? $ /x;
+$dotdot = 0 unless defined $dotdot;
+
+print $out_fh <<END;
+#define UNICODE_MAJOR_VERSION   $major
+#define UNICODE_DOT_VERSION     $dot
+#define UNICODE_DOT_DOT_VERSION $dotdot
+
+END
+
+# The data are at __DATA__  in this file.
 
 my @data = <DATA>;
 
@@ -95,13 +81,13 @@ foreach my $charset (get_supported_code_pages()) {
         my $name;
         my $cp;
         my $U_cp;   # code point in Unicode (not-native) terms
-        my $undef_ok = $desired_name || $flag =~ /skip_if_undef/;
 
         if ($name_or_cp =~ /^U\+(.*)/) {
             $U_cp = hex $1;
             $name = charnames::viacode($name_or_cp);
             if (! defined $name) {
-                die "Unknown code point '$name_or_cp' at line $.: $_\n" unless $undef_ok;
+                next if $flag =~ /skip_if_undef/;
+                die "Unknown code point '$name_or_cp' at line $.: $_\n" unless $desired_name;
                 $name = "";
             }
         }
@@ -176,6 +162,37 @@ print $out_fh "\n#endif /* H_UNICODE_CONSTANTS */\n";
 
 read_only_bottom_close_and_rename($out_fh);
 
+# DATA FORMAT
+#
+# A blank line is output as-is.
+# Comments (lines whose first non-blank is a '#') are converted to C-style,
+# though empty comments are converted to blank lines.  Otherwise, each line
+# represents one #define, and begins with either a Unicode character name with
+# the blanks and dashes in it squeezed out or replaced by underscores; or it
+# may be a hexadecimal Unicode code point of the form U+xxxx.  In the latter
+# case, the name will be looked-up to use as the name of the macro.  In either
+# case, the macro name will have suffixes as listed above, and all blanks and
+# dashes will be replaced by underscores.
+#
+# Each line may optionally have one of the following flags on it, separated by
+# white space from the initial token.
+#   string  indicates that the output is to be of the string form
+#           described in the comments above that are placed in the file.
+#   string_skip_ifundef  is the same as 'string', but instead of dying if the
+#           code point doesn't exist, the line is just skipped: no output is
+#           generated for it
+#   first   indicates that the output is to be of the FIRST_BYTE form.
+#   tail    indicates that the output is of the _TAIL form.
+#   native  indicates that the output is the code point, converted to the
+#           platform's native character set if applicable
+#
+# If the code point has no official name, the desired name may be appended
+# after the flag, which will be ignored if there is an official name.
+#
+# This program is used to make it convenient to create compile time constants
+# of UTF-8, and to generate proper EBCDIC as well as ASCII without manually
+# having to figure things out.
+
 __DATA__
 U+017F string
 
@@ -184,10 +201,12 @@ U+0300 string
 U+0399 string
 U+03BC string
 
-U+1E9E string
+U+1E9E string_skip_if_undef
 
 U+FB05 string
 U+FB06 string
+U+0130 string
+U+0131 string
 
 U+2010 string
 U+D800 first FIRST_SURROGATE
diff --git a/regexec.c b/regexec.c
index ec4ed861ab..f2517e5022 100644
--- a/regexec.c
+++ b/regexec.c
@@ -192,7 +192,7 @@ static const char* const non_utf8_target_but_utf8_required
                                         PL_utf8_swash_ptrs[_CC_WORDCHAR],     \
                                         "",                                   \
                                         PL_XPosix_ptrs[_CC_WORDCHAR],         \
-                                        LATIN_CAPITAL_LETTER_SHARP_S_UTF8);
+                                        LATIN_SMALL_LIGATURE_LONG_S_T_UTF8);
 
 #define PLACEHOLDER	/* Something for the preprocessor to grab onto */
 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
@@ -1753,7 +1753,7 @@ REXEC_FBC_SCAN( /* Loops while (s < strend) */                 \
 #define getGCB_VAL_CP(cp)                                                      \
           _generic_GET_BREAK_VAL_CP(                                           \
                                     PL_GCB_invlist,                            \
-                                    Grapheme_Cluster_Break_invmap,             \
+                                    _Perl_GCB_invmap,                          \
                                     (cp))
 
 /* Returns the GCB value for the first code point in the UTF-8 encoded string
@@ -1766,7 +1766,7 @@ REXEC_FBC_SCAN( /* Loops while (s < strend) */                 \
 #define getSB_VAL_CP(cp)                                                       \
           _generic_GET_BREAK_VAL_CP(                                           \
                                     PL_SB_invlist,                             \
-                                    Sentence_Break_invmap,                     \
+                                    _Perl_SB_invmap,                     \
                                     (cp))
 
 /* Returns the SB value for the first code point in the UTF-8 encoded string
@@ -1778,7 +1778,7 @@ REXEC_FBC_SCAN( /* Loops while (s < strend) */                 \
 #define getWB_VAL_CP(cp)                                                       \
           _generic_GET_BREAK_VAL_CP(                                           \
                                     PL_WB_invlist,                             \
-                                    Word_Break_invmap,                         \
+                                    _Perl_WB_invmap,                         \
                                     (cp))
 
 /* Returns the WB value for the first code point in the UTF-8 encoded string
diff --git a/t/uni/variables.t b/t/uni/variables.t
index 33f057a645..5601b9767f 100644
--- a/t/uni/variables.t
+++ b/t/uni/variables.t
@@ -231,7 +231,7 @@ for ( 0x0 .. 0xff ) {
         if ($chr =~ /[#*]/) {
 
             # Length-1 variables with these two characters used to be used by
-            # Perl, but now it generates a warning that they're gone.
+            # Perl, but now a warning is generated that they're gone.
             # Ignore such warnings.
             for (my $i = @warnings - 1; $i >= 0; $i--) {
                 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
diff --git a/unicode_constants.h b/unicode_constants.h
index 7da4eb3a48..be7a38e92a 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -8,10 +8,11 @@
 #ifndef H_UNICODE_CONSTANTS   /* Guard against nested #includes */
 #define H_UNICODE_CONSTANTS   1
 
-/* This file contains #defines for various Unicode code points.  The values
- * the macros expand to are the native Unicode code point, or all or portions
- * of the UTF-8 encoding for the code point.  In the former case, the macro
- * name has the suffix "_NATIVE"; otherwise, the suffix "_UTF8".
+/* This file contains #defines for the version of Unicode being used and
+ * various Unicode code points.  The values the code point macros expand to
+ * are the native Unicode code point, or all or portions of the UTF-8 encoding
+ * for the code point.  In the former case, the macro name has the suffix
+ * "_NATIVE"; otherwise, the suffix "_UTF8".
  *
  * The macros that have the suffix "_UTF8" may have further suffixes, as
  * follows:
@@ -20,6 +21,10 @@
  *  "_TAIL"       if instead it represents all but the first byte.  This, and
  *                with no additional suffix are both string constants */
 
+#define UNICODE_MAJOR_VERSION   8
+#define UNICODE_DOT_VERSION     0
+#define UNICODE_DOT_DOT_VERSION 0
+
 
 #if 'A' == 65 /* ASCII/Latin1 */
 #   define LATIN_SMALL_LETTER_LONG_S_UTF8  "\xC5\xBF"    /* U+017F */
@@ -33,6 +38,8 @@
 
 #   define LATIN_SMALL_LIGATURE_LONG_S_T_UTF8  "\xEF\xAC\x85"    /* U+FB05 */
 #   define LATIN_SMALL_LIGATURE_ST_UTF8  "\xEF\xAC\x86"    /* U+FB06 */
+#   define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8  "\xC4\xB0"    /* U+0130 */
+#   define LATIN_SMALL_LETTER_DOTLESS_I_UTF8  "\xC4\xB1"    /* U+0131 */
 
 #   define HYPHEN_UTF8  "\xE2\x80\x90"    /* U+2010 */
 #   define FIRST_SURROGATE_UTF8_FIRST_BYTE  0xED    /* U+D800 */
@@ -71,6 +78,8 @@
 
 #   define LATIN_SMALL_LIGATURE_LONG_S_T_UTF8  "\xDD\x72\x67\x46"    /* U+FB05 */
 #   define LATIN_SMALL_LIGATURE_ST_UTF8  "\xDD\x72\x67\x47"    /* U+FB06 */
+#   define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8  "\x8D\x57"    /* U+0130 */
+#   define LATIN_SMALL_LETTER_DOTLESS_I_UTF8  "\x8D\x58"    /* U+0131 */
 
 #   define HYPHEN_UTF8  "\xCA\x41\x57"    /* U+2010 */
 #   define FIRST_SURROGATE_UTF8_FIRST_BYTE  0xDD    /* U+D800 */
@@ -109,6 +118,8 @@
 
 #   define LATIN_SMALL_LIGATURE_LONG_S_T_UTF8  "\xDD\x71\x66\x46"    /* U+FB05 */
 #   define LATIN_SMALL_LIGATURE_ST_UTF8  "\xDD\x71\x66\x47"    /* U+FB06 */
+#   define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8  "\x8C\x57"    /* U+0130 */
+#   define LATIN_SMALL_LETTER_DOTLESS_I_UTF8  "\x8C\x58"    /* U+0131 */
 
 #   define HYPHEN_UTF8  "\xCA\x41\x57"    /* U+2010 */
 #   define FIRST_SURROGATE_UTF8_FIRST_BYTE  0xDD    /* U+D800 */
@@ -147,6 +158,8 @@
 
 #   define LATIN_SMALL_LIGATURE_LONG_S_T_UTF8  "\xDC\x74\x68\x46"    /* U+FB05 */
 #   define LATIN_SMALL_LIGATURE_ST_UTF8  "\xDC\x74\x68\x47"    /* U+FB06 */
+#   define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8  "\x8E\x58"    /* U+0130 */
+#   define LATIN_SMALL_LETTER_DOTLESS_I_UTF8  "\x8E\x59"    /* U+0131 */
 
 #   define HYPHEN_UTF8  "\xCA\x41\x58"    /* U+2010 */
 #   define FIRST_SURROGATE_UTF8_FIRST_BYTE  0xDC    /* U+D800 */
diff --git a/utf8.c b/utf8.c
index cbff7a7888..695daac9db 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1413,11 +1413,15 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_
 	    case MICRO_SIGN:
 		converted = GREEK_CAPITAL_LETTER_MU;
 		break;
+#if    UNICODE_MAJOR_VERSION > 2                                        \
+   || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1           \
+                                  && UNICODE_DOT_DOT_VERSION >= 8)
 	    case LATIN_SMALL_LETTER_SHARP_S:
 		*(p)++ = 'S';
 		*p = S_or_s;
 		*lenp = 2;
 		return 'S';
+#endif
 	    default:
 		Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect '%c' to map to '%c'", c, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS);
 		NOT_REACHED; /* NOTREACHED */
@@ -1540,6 +1544,9 @@ Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const unsigned int f
     if (c == MICRO_SIGN) {
 	converted = GREEK_SMALL_LETTER_MU;
     }
+#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
+   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
+                                      || UNICODE_DOT_DOT_VERSION > 0)
     else if ((flags & FOLD_FLAGS_FULL) && c == LATIN_SMALL_LETTER_SHARP_S) {
 
         /* If can't cross 127/128 boundary, can't return "ss"; instead return
@@ -1558,6 +1565,7 @@ Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const unsigned int f
             return 's';
         }
     }
+#endif
     else { /* In this range the fold of all other characters is their lower
               case */
 	converted = toLOWER_LATIN1(c);
@@ -2200,11 +2208,13 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
 
 	if (flags & FOLD_FLAGS_LOCALE) {
 
-#           define CAP_SHARP_S   LATIN_CAPITAL_LETTER_SHARP_S_UTF8
 #           define LONG_S_T      LATIN_SMALL_LIGATURE_LONG_S_T_UTF8
+            const unsigned int long_s_t_len    = sizeof(LONG_S_T) - 1;
+
+#         ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
+#           define CAP_SHARP_S   LATIN_CAPITAL_LETTER_SHARP_S_UTF8
 
             const unsigned int cap_sharp_s_len = sizeof(CAP_SHARP_S) - 1;
-            const unsigned int long_s_t_len    = sizeof(LONG_S_T) - 1;
 
             /* Special case these two characters, as what normally gets
              * returned under locale doesn't work */
@@ -2217,7 +2227,9 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
                               "resolved to \"\\x{17F}\\x{17F}\".");
                 goto return_long_s;
             }
-            else if (UTF8SKIP(p) == long_s_t_len
+            else
+#endif
+                 if (UTF8SKIP(p) == long_s_t_len
                      && memEQ((char *) p, LONG_S_T, long_s_t_len))
             {
                 /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
@@ -2226,6 +2238,28 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
                               "resolved to \"\\x{FB06}\".");
                 goto return_ligature_st;
             }
+
+#if    UNICODE_MAJOR_VERSION   == 3         \
+    && UNICODE_DOT_VERSION     == 0         \
+    && UNICODE_DOT_DOT_VERSION == 1
+#           define DOTTED_I   LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8
+
+            /* And special case this on this Unicode version only, for the same
+             * reaons the other two are special cased.  They would cross the
+             * 255/256 boundary which is forbidden under /l, and so the code
+             * wouldn't catch that they are equivalent (which they are only in
+             * this release) */
+            else if (UTF8SKIP(p) == sizeof(DOTTED_I) - 1
+                     && memEQ((char *) p, DOTTED_I, sizeof(DOTTED_I) - 1))
+            {
+                /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
+                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+                              "Can't do fc(\"\\x{0130}\") on non-UTF-8 locale; "
+                              "resolved to \"\\x{0131}\".");
+                goto return_dotless_i;
+            }
+#endif
+
 	    return check_locale_boundary_crossing(p, result, ustrp, lenp);
 	}
 	else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
@@ -2249,14 +2283,24 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
 
                     /* But in these instances, there is an alternative we can
                      * return that is valid */
-                    if (original == LATIN_CAPITAL_LETTER_SHARP_S
-                        || original == LATIN_SMALL_LETTER_SHARP_S)
-                    {
+                    if (original == LATIN_SMALL_LETTER_SHARP_S
+#ifdef LATIN_CAPITAL_LETTER_SHARP_S /* not defined in early Unicode releases */
+                        || original == LATIN_CAPITAL_LETTER_SHARP_S
+#endif
+                    ) {
                         goto return_long_s;
                     }
                     else if (original == LATIN_SMALL_LIGATURE_LONG_S_T) {
                         goto return_ligature_st;
                     }
+#if    UNICODE_MAJOR_VERSION   == 3         \
+    && UNICODE_DOT_VERSION     == 0         \
+    && UNICODE_DOT_DOT_VERSION == 1
+
+                    else if (original == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+                        goto return_dotless_i;
+                    }
+#endif
 		    Copy(p, ustrp, *lenp, char);
 		    return original;
 		}
@@ -2300,6 +2344,18 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
     *lenp = sizeof(LATIN_SMALL_LIGATURE_ST_UTF8) - 1;
     Copy(LATIN_SMALL_LIGATURE_ST_UTF8, ustrp, *lenp, U8);
     return LATIN_SMALL_LIGATURE_ST;
+
+#if    UNICODE_MAJOR_VERSION   == 3         \
+    && UNICODE_DOT_VERSION     == 0         \
+    && UNICODE_DOT_DOT_VERSION == 1
+
+  return_dotless_i:
+    *lenp = sizeof(LATIN_SMALL_LETTER_DOTLESS_I_UTF8) - 1;
+    Copy(LATIN_SMALL_LETTER_DOTLESS_I_UTF8, ustrp, *lenp, U8);
+    return LATIN_SMALL_LETTER_DOTLESS_I;
+
+#endif
+
 }
 
 /* Note:
@@ -3252,7 +3308,24 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash)
     * currently handle.  But it also means that FB05 and FB06 are equivalent in
     * a 1-1 mapping which we should handle, and this relationship may not be in
     * the main table.  Therefore this function examines all the multi-char
-    * sequences and adds the 1-1 mappings that come out of that.  */
+    * sequences and adds the 1-1 mappings that come out of that.
+    *
+    * XXX This function was originally intended to be multipurpose, but its
+    * only use is quite likely to remain for constructing the inversion of
+    * the CaseFolding (//i) property.  If it were more general purpose for
+    * regex patterns, it would have to do the FB05/FB06 game for simple folds,
+    * because certain folds are prohibited under /iaa and /il.  As an example,
+    * in Unicode 3.0.1 both U+0130 and U+0131 fold to 'i', and hence are both
+    * equivalent under /i.  But under /iaa and /il, the folds to 'i' are
+    * prohibited, so we would not figure out that they fold to each other.
+    * Code could be written to automatically figure this out, similar to the
+    * code that does this for multi-character folds, but this is the only case
+    * where something like this is ever likely to happen, as all the single
+    * char folds to The 0-255 range are now quite settled.  Instead there is a
+    * little special code that is compiled only for this Unicode version.  This
+    * is smaller and didn't require much coding time to do.  But this makes
+    * this routine strongly tied to being used just for CaseFolding.  If ever
+    * it should be generalized, this would have to be fixed */
 
     U8 *l, *lend;
     STRLEN lcur;
@@ -3395,7 +3468,24 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash)
     } /* End of specials */
 
     /* read $swash->{LIST} */
+
+#if    UNICODE_MAJOR_VERSION   == 3         \
+    && UNICODE_DOT_VERSION     == 0         \
+    && UNICODE_DOT_DOT_VERSION == 1
+
+    /* For this version only U+130 and U+131 are equivalent under qr//i.  Add a
+     * rule so that things work under /iaa and /il */
+
+    SV * mod_listsv = sv_mortalcopy(*listsvp);
+    sv_catpv(mod_listsv, "130\t130\t131\n");
+    l = (U8*)SvPV(mod_listsv, lcur);
+
+#else
+
     l = (U8*)SvPV(*listsvp, lcur);
+
+#endif
+
     lend = l + lcur;
 
     /* Go through each input line */
diff --git a/utf8.h b/utf8.h
index 3e15707ff8..191a6b2a5c 100644
--- a/utf8.h
+++ b/utf8.h
@@ -611,7 +611,11 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
 #define GREEK_CAPITAL_LETTER_MU                 0x039C	/* Upper and title case
                                                            of MICRON */
 #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS   0x0178	/* Also is title case */
-#define LATIN_CAPITAL_LETTER_SHARP_S	        0x1E9E
+#ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
+#   define LATIN_CAPITAL_LETTER_SHARP_S	        0x1E9E
+#endif
+#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE   0x130
+#define LATIN_SMALL_LETTER_DOTLESS_I            0x131
 #define LATIN_SMALL_LETTER_LONG_S               0x017F
 #define LATIN_SMALL_LIGATURE_LONG_S_T           0xFB05
 #define LATIN_SMALL_LIGATURE_ST                 0xFB06
author	Karl Williamson <khw@cpan.org>	2015-07-28 22:17:01 -0600
committer	Karl Williamson <khw@cpan.org>	2015-07-28 22:17:01 -0600
commit	a0b05c4bde4f97301b663b8de70677551eacff4c (patch)
tree	e9329726efb7aa27f5fd08f1441943f95b68a594
parent	16d89be8495ff4e0a9b99e837b5444df8a4a6cc6 (diff)
parent	912fd71114648888551a4750534463f20ae16c7c (diff)
download	perl-a0b05c4bde4f97301b663b8de70677551eacff4c.tar.gz