3 files changed, 142 insertions, 26 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 3bdaffa1ca..64e4453e58 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -358,6 +358,65 @@
 ( ( 0xFF21 <= cp && cp <= 0xFF26 ) || ( 0xFF41 <= cp && cp <= 0xFF46 ) ) ) )
 
 /*
+	XPERLSPACE: \p{XPerlSpace}
+
+	\p{XPerlSpace}
+*/
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE(s,is_utf8)                                            \
+( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\
+: ( is_utf8 ) ?                                                             \
+    ( ( 0xC2 == ((U8*)s)[0] ) ?                                             \
+	( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 )          \
+    : ( 0xE1 == ((U8*)s)[0] ) ?                                             \
+	( ( 0x9A == ((U8*)s)[1] ) ?                                         \
+	    ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 )                             \
+	: ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 )  \
+    : ( 0xE2 == ((U8*)s)[0] ) ?                                             \
+	( ( 0x80 == ((U8*)s)[1] ) ?                                         \
+	    ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+	: ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 )  \
+    : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0x85 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_utf8(s)                                               \
+( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\
+: ( 0xC2 == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 )              \
+: ( 0xE1 == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0x9A == ((U8*)s)[1] ) ?                                             \
+	( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 )                                 \
+    : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 )      \
+: ( 0xE2 == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0x80 == ((U8*)s)[1] ) ?                                             \
+	( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+    : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 )      \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_high(s)                                               \
+( ( 0xE1 == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0x9A == ((U8*)s)[1] ) ?                                             \
+	( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 )                                 \
+    : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 )      \
+: ( 0xE2 == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0x80 == ((U8*)s)[1] ) ?                                             \
+	( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+    : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 )      \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_cp_high(cp)                                           \
+( 0x1680 == cp || ( 0x1680 < cp &&                                          \
+( 0x180E == cp || ( 0x180E < cp &&                                          \
+( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp &&                      \
+( 0x2028 == cp || ( 0x2028 < cp &&                                          \
+( 0x2029 == cp || ( 0x2029 < cp &&                                          \
+( 0x202F == cp || ( 0x202F < cp &&                                          \
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) )
+
+/*
 	REPLACEMENT: Unicode REPLACEMENT CHARACTER
 
 	0xFFFD
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 46425e4965..0bab57086a 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1400,6 +1400,10 @@ XDIGIT: Hexadecimal digits
 => UTF8 high cp_high :fast
 \p{XDigit}
 
+XPERLSPACE: \p{XPerlSpace}
+=> generic UTF8 high cp_high :fast
+\p{XPerlSpace}
+
 REPLACEMENT: Unicode REPLACEMENT CHARACTER
 => UTF8 :safe
 0xFFFD
diff --git a/regexec.c b/regexec.c
index 69bda15de3..d0560cec4e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -164,7 +164,6 @@ static const char* const non_utf8_target_but_utf8_required
 
 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
-#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 
 #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
         /* No asserts are done for some of these, in case called on a   */  \
@@ -1713,16 +1712,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	    break;
 	case SPACEU:
-	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target),
+	    REXEC_FBC_CSCAN(
+		is_XPERLSPACE_utf8(s),
                 isSPACE_L1((U8) *s)
 	    );
 	    break;
 	case SPACE:
-	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target),
+	    REXEC_FBC_CSCAN(
+		is_XPERLSPACE_utf8(s),
                 isSPACE((U8) *s)
 	    );
 	    break;
@@ -1738,16 +1735,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	    break;
 	case NSPACEU:
-	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		!( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target)),
+	    REXEC_FBC_CSCAN(
+		! is_XPERLSPACE_utf8(s),
                 ! isSPACE_L1((U8) *s)
 	    );
 	    break;
 	case NSPACE:
-	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target)),
+	    REXEC_FBC_CSCAN(
+		! is_XPERLSPACE_utf8(s),
                 ! isSPACE((U8) *s)
 	    );
 	    break;
@@ -4331,11 +4326,73 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 		  ALNUMA, NALNUMA, isWORDCHAR_A,
 		  alnum, "a");
 
-        CCC_TRY_U(SPACE,  NSPACE,  isSPACE,
-		  SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
-		  SPACEU, NSPACEU, isSPACE_L1,
-		  SPACEA, NSPACEA, isSPACE_A,
-		  space, " ");
+        case SPACEL:
+            PL_reg_flags |= RF_tainted;
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+            if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
+                if (! isSPACE_LC_utf8((U8 *) locinput)) {
+                    sayNO;
+                }
+            }
+            else if (! isSPACE_LC((U8) nextchr)) {
+                    sayNO;
+            }
+            goto increment_locinput;
+
+        case NSPACEL:
+            PL_reg_flags |= RF_tainted;
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+            if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
+                if (isSPACE_LC_utf8((U8 *) locinput)) {
+                    sayNO;
+                }
+            }
+            else if (isSPACE_LC(nextchr)) {
+                    sayNO;
+            }
+            goto increment_locinput;
+
+        case SPACE:
+            if (utf8_target) {
+                goto utf8_space;
+            }
+            /* FALL THROUGH */
+        case SPACEA:
+            if (NEXTCHR_IS_EOS || ! isSPACE_A(nextchr)) {
+                sayNO;
+            }
+            /* Matched a utf8-invariant, so don't have to worry about utf8 */
+            locinput++;
+            break;
+
+        case NSPACE:
+            if (utf8_target) {
+                goto utf8_nspace;
+            }
+            /* FALL THROUGH */
+        case NSPACEA:
+            if (NEXTCHR_IS_EOS || isSPACE_A(nextchr)) {
+                sayNO;
+            }
+            goto increment_locinput;
+
+        case SPACEU:
+          utf8_space:
+            if (NEXTCHR_IS_EOS || ! is_XPERLSPACE(locinput, utf8_target)) {
+                sayNO;
+            }
+            goto increment_locinput;
+
+        case NSPACEU:
+          utf8_nspace:
+            if (NEXTCHR_IS_EOS || is_XPERLSPACE(locinput, utf8_target)) {
+                sayNO;
+            }
+            goto increment_locinput;
 
         CCC_TRY(DIGIT,  NDIGIT,  isDIGIT,
 		DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
@@ -6902,10 +6959,8 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
 
     utf8_space:
 
-	    LOAD_UTF8_CHARCLASS_SPACE();
-	    while (hardcount < max && scan < loceol &&
-		   (*scan == ' ' ||
-                    swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
+	    while (hardcount < max && scan < loceol
+                   && is_XPERLSPACE_utf8((U8*)scan))
             {
 		scan += UTF8SKIP(scan);
 		hardcount++;
@@ -6955,10 +7010,8 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
 
     utf8_Nspace:
 
-	    LOAD_UTF8_CHARCLASS_SPACE();
-	    while (hardcount < max && scan < loceol &&
-		   ! (*scan == ' ' ||
-                      swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
+	    while (hardcount < max && scan < loceol
+                   && ! is_XPERLSPACE_utf8((U8*)scan))
             {
 		scan += UTF8SKIP(scan);
 		hardcount++;