6 files changed, 86 insertions, 24 deletions
diff --git a/regcomp.c b/regcomp.c
index 547f756a30..69d114e9e9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1930,6 +1930,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 	r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
 	r->reganch |= ROPT_EVAL_SEEN;
+    if (RExC_seen & REG_SEEN_SANY)
+	r->reganch |= ROPT_SANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
     PL_regdata = r->data; /* for regprop() */
@@ -2638,6 +2640,7 @@ tryagain:
 	    break;
 	case 'C':
 	    ret = reg_node(pRExC_state, SANY);
+	    RExC_seen |= REG_SEEN_SANY;
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar(pRExC_state);
 	    break;
diff --git a/regcomp.h b/regcomp.h
index 066e31f01d..ee9be39f71 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -285,10 +285,11 @@ struct regnode_charclass_class {
 
 #define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
 
-#define REG_SEEN_ZERO_LEN	1
-#define REG_SEEN_LOOKBEHIND	2
-#define REG_SEEN_GPOS		4
-#define REG_SEEN_EVAL		8
+#define REG_SEEN_ZERO_LEN	 1
+#define REG_SEEN_LOOKBEHIND	 2
+#define REG_SEEN_GPOS		 4
+#define REG_SEEN_EVAL		 8
+#define REG_SEEN_SANY		16
 
 START_EXTERN_C
 
diff --git a/regexec.c b/regexec.c
index c70d1b1655..5d9e8ac8fa 100644
--- a/regexec.c
+++ b/regexec.c
@@ -398,7 +398,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 	      goto fail;
 	  }
-	  if (prog->check_offset_min == prog->check_offset_max) {
+	  if (prog->check_offset_min == prog->check_offset_max &&
+	      !(prog->reganch & ROPT_SANY_SEEN)) {
 	    /* Substring at constant offset from beg-of-str... */
 	    I32 slen;
 
@@ -474,6 +475,10 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 	if (data)
 	    *data->scream_olds = s;
     }
+    else if (prog->reganch & ROPT_SANY_SEEN)
+	s = fbm_instr((U8*)(s + start_shift),
+		      (U8*)(strend - end_shift),
+		      check, PL_multiline ? FBMrf_MULTILINE : 0);
     else
 	s = fbm_instr(HOP3(s, start_shift, strend),
 		      HOP3(strend, -end_shift, strbeg),
@@ -1407,7 +1412,8 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
 
     minlen = prog->minlen;
     if (do_utf8) {
-      if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
+      if (!(prog->reganch & ROPT_SANY_SEEN))
+        if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
     }
     else {
       if (strend - startpos < minlen) goto phooey;
@@ -2075,13 +2081,6 @@ S_regmatch(pTHX_ regnode *prog)
 		sayNO;
 	    break;
 	case SANY:
-	    if (do_utf8) {
-		locinput += PL_utf8skip[nextchr];
-		if (locinput > PL_regeol)
-		    sayNO;
-		nextchr = UCHARAT(locinput);
-		break;
-	    }
 	    if (!nextchr && locinput >= PL_regeol)
 		sayNO;
 	    nextchr = UCHARAT(++locinput);
@@ -3563,15 +3562,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
 	}
 	break;
     case SANY:
-	if (do_utf8) {
-	    loceol = PL_regeol;
-	    while (hardcount < max && scan < loceol) {
-		scan += UTF8SKIP(scan);
-		hardcount++;
-	    }
-	} else {
-	    scan = loceol;
-	}
+	scan = loceol;
 	break;
     case EXACT:		/* length of string is 1 */
 	c = (U8)*STRING(p);
diff --git a/regexp.h b/regexp.h
index 3c71060a40..33ace4066e 100644
--- a/regexp.h
+++ b/regexp.h
@@ -54,6 +54,7 @@ typedef struct regexp {
 #define ROPT_CHECK_ALL		0x00100
 #define ROPT_LOOKBEHIND_SEEN	0x00200
 #define ROPT_EVAL_SEEN		0x00400
+#define ROPT_SANY_SEEN		0x00800
 
 /* 0xf800 of reganch is used by PMf_COMPILETIME */
 
diff --git a/t/op/pat.t b/t/op/pat.t
index 17df867fd9..d7eb9f8fd1 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,7 +4,7 @@
 # the format supported by op/regexp.t.  If you want to add a test
 # that does fit that format, add it to op/re_tests, not here.
 
-print "1..231\n";
+print "1..240\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1129,3 +1129,56 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/;
 print "ok $test\n";
 $test++;
 
+$_ = "a\x{100}b";
+if (/(.)(\C)(\C)(.)/) {
+  print "ok 232\n";
+  if ($1 eq "a") {
+    print "ok 233\n";
+  } else {
+    print "not ok 233\n";
+  }
+  if ($2 eq "\xC4") {
+    print "ok 234\n";
+  } else {
+    print "not ok 234\n";
+  }
+  if ($3 eq "\x80") {
+    print "ok 235\n";
+  } else {
+    print "not ok 235\n";
+  }
+  if ($4 eq "b") {
+    print "ok 236\n";
+  } else {
+    print "not ok 236\n";
+  }
+} else {
+  for (232..236) {
+    print "not ok $_\n";
+  }
+}
+$_ = "\x{100}";
+if (/(\C)/g) {
+  print "ok 237\n";
+  if ($1 eq "\xC4") {
+    print "ok 238\n";
+  } else {
+    print "not ok 238\n";
+  }
+} else {
+  for (237..238) {
+    print "not ok $_\n";
+  }
+}
+if (/(\C)/g) {
+  print "ok 239\n";
+  if ($1 eq "\x80") {
+    print "ok 240\n";
+  } else {
+    print "not ok 240\n";
+  }
+} else {
+  for (239..240) {
+    print "not ok $_\n";
+  }
+}
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t
index 577e6b4e2b..60e6c6e102 100755
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..107\n";
+print "1..109\n";
 
 my $test = 1;
 
@@ -577,3 +577,16 @@ sub nok_bytes {
     $test++;					# 107
 }
 
+{
+    # bug id 20001230.002
+
+    use utf8;
+
+    print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
+    print "ok $test\n";
+    $test++;					# 108
+
+    print "not " unless "École" =~ /^\C\C(c)/;
+    print "ok $test\n";
+    $test++;					# 109
+}