6 files changed, 112 insertions, 16 deletions
diff --git a/embed.fnc b/embed.fnc
index c33833a53a..39060f0bcf 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -778,9 +778,9 @@ ADMpR	|bool	|is_uni_print_lc|UV c
 ADMpR	|bool	|is_uni_punct_lc|UV c
 ADMpPR	|bool	|is_uni_xdigit_lc|UV c
 AndmoR	|bool	|is_utf8_invariant_string|NN const U8* const s		    \
-		|STRLEN const len
+		|STRLEN len
 AnidR	|bool	|is_utf8_invariant_string_loc|NN const U8* const s	    \
-		|STRLEN const len					    \
+		|STRLEN len						    \
 		|NULLOK const U8 ** ep
 AmnpdRP	|bool	|is_ascii_string|NN const U8* const s|const STRLEN len
 AmnpdRP	|bool	|is_invariant_string|NN const U8* const s|const STRLEN len
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm
index cdc7e5c380..64022244d5 100644
--- a/ext/XS-APItest/APItest.pm
+++ b/ext/XS-APItest/APItest.pm
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.93';
+our $VERSION = '0.94';
 
 require XSLoader;
 
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index 8bf1545b63..ea793ba39e 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -6005,6 +6005,19 @@ test_is_utf8_string(char *s, STRLEN len)
         RETVAL
 
 AV *
+test_is_utf8_invariant_string_loc(char *s, STRLEN offset, STRLEN len)
+    PREINIT:
+        AV *av;
+        const U8 * ep;
+    CODE:
+        av = newAV();
+        av_push(av, newSViv(is_utf8_invariant_string_loc((U8 *) s + offset, len, &ep)));
+        av_push(av, newSViv(ep - ((U8 *) s + offset)));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+AV *
 test_is_utf8_string_loc(char *s, STRLEN len)
     PREINIT:
         AV *av;
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index a463d1b6eb..123820869d 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -15,6 +15,30 @@ $|=1;
 
 use XS::APItest;
 
+
+my $s = "A" x 100 ;
+my $ret_ref = test_is_utf8_invariant_string_loc($s, 0, length $s);
+is($ret_ref->[0], 1, "is_utf8_invariant_string_loc returns TRUE for invariant");
+
+my $above_word_length = 9;
+for my $initial (0 .. $above_word_length) {
+    for my $offset (0 .. $above_word_length) {
+        for my $trailing (0 .. $above_word_length) {
+            if ($initial >= $offset) {
+                my $variant_pos = $initial - $offset;
+                $s = "A" x $initial . "\x80" . "A" x $trailing;
+                my $ret_ref = test_is_utf8_invariant_string_loc($s, $offset,
+                                                                length $s);
+                is($ret_ref->[0], 0, "is_utf8_invariant_string_loc returns"
+                                   . " FALSE for variant at $variant_pos,"
+                                   . " first $offset ignored)");
+                is($ret_ref->[1], $variant_pos,
+                                        "   And returns the correct position");
+            }
+        }
+    }
+}
+
 my $pound_sign = chr utf8::unicode_to_native(163);
 
 # This  test file can't use byte_utf8a_to_utf8n() from t/charset_tools.pl
diff --git a/inline.h b/inline.h
index 2f67af8833..ddafde9650 100644
--- a/inline.h
+++ b/inline.h
@@ -370,29 +370,88 @@ UTF-8 invariant, this function does not change the contents of C<*ep>.
 
 =cut
 
-XXX On ASCII machines this could be sped up by doing word-at-a-time operations
-
 */
 
 PERL_STATIC_INLINE bool
-S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
+S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 {
-    const U8* const send = s + (len ? len : strlen((const char *)s));
+    const U8* send;
     const U8* x = s;
 
     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 
-    while (x < send) {
-	if (UTF8_IS_INVARIANT(*x)) {
-            x++;
-            continue;
+    if (len == 0) {
+        len = strlen((const char *)s);
+    }
+
+    send = s + len;
+
+#ifndef EBCDIC
+    /* Try to get the widest word on this platform */
+#  ifdef HAS_LONG_LONG
+#    define PERL_WORDCAST unsigned long long
+#    define PERL_WORDSIZE LONGLONGSIZE
+#  else
+#    define PERL_WORDCAST UV
+#    define PERL_WORDSIZE UVSIZE
+#  endif
+
+#  if PERL_WORDSIZE == 4
+#    define PERL_VARIANTS_WORD_MASK 0x80808080
+#    define PERL_WORD_BOUNDARY_MASK 0x3
+#  elif PERL_WORDSIZE == 8
+#    define PERL_VARIANTS_WORD_MASK 0x8080808080808080
+#    define PERL_WORD_BOUNDARY_MASK 0x7
+#  else
+#    error Unexpected word size
+#  endif
+
+    /* Process per-byte until reach word boundary.  XXX This loop could be
+     * eliminated if we knew that this platform had fast unaligned reads */
+    while (x < send && (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) {
+        if (! UTF8_IS_INVARIANT(*x)) {
+            if (ep) {
+                *ep = x;
+            }
+
+            return FALSE;
         }
+        x++;
+    }
+
+    /* Process per-word as long as we have at least a full word left */
+    while (x + PERL_WORDSIZE <= send) {
+        if ((* (PERL_WORDCAST *) x) & PERL_VARIANTS_WORD_MASK)  {
+
+            /* Found a variant.  Just return if caller doesn't want its exact
+             * position */
+            if (! ep) {
+                return FALSE;
+            }
 
-        if (ep) {
-            *ep = x;
+            /* Otherwise fall into final loop to find which byte it is */
+            break;
         }
+        x += PERL_WORDSIZE;
+    }
 
-        return FALSE;
+#  undef PERL_WORDCAST
+#  undef PERL_WORDSIZE
+#  undef PERL_WORD_BOUNDARY_MASK
+#  undef PERL_VARIANTS_WORD_MASK
+#endif
+
+    /* Process per-byte */
+    while (x < send) {
+	if (! UTF8_IS_INVARIANT(*x)) {
+            if (ep) {
+                *ep = x;
+            }
+
+            return FALSE;
+        }
+
+        x++;
     }
 
     return TRUE;
diff --git a/proto.h b/proto.h
index 94009acac6..598e7c1c04 100644
--- a/proto.h
+++ b/proto.h
@@ -1626,11 +1626,11 @@ PERL_CALLCONV bool	Perl_is_utf8_idfirst(pTHX_ const U8 *p)
 #define PERL_ARGS_ASSERT_IS_UTF8_IDFIRST	\
 	assert(p)
 
-/* PERL_CALLCONV bool	is_utf8_invariant_string(const U8* const s, STRLEN const len)
+/* PERL_CALLCONV bool	is_utf8_invariant_string(const U8* const s, STRLEN len)
 			__attribute__warn_unused_result__; */
 
 #ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE bool	S_is_utf8_invariant_string_loc(const U8* const s, STRLEN const len, const U8 ** ep)
+PERL_STATIC_INLINE bool	S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC	\
 	assert(s)