summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perldebguts.pod7
-rw-r--r--regcomp.c190
-rw-r--r--regcomp.sym6
-rw-r--r--regnodes.h243
-rw-r--r--t/porting/known_pod_issues.dat2
5 files changed, 308 insertions, 140 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index a826d9d237..b076bcf2d3 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -680,6 +680,13 @@ will be lost.
EXACTFU_ONLY8 str Like EXACTFU, but only UTF-8 encoded
targets can match
+ EXACTFS_B_U str EXACTFU but begins with [Ss]; (string not
+ UTF-8; compile-time only).
+ EXACTFS_E_U str EXACTFU but ends with [Ss]; (string not UTF-
+ 8; compile-time only).
+ EXACTFS_BE_U str EXACTFU but begins and ends with [Ss];
+ (string not UTF-8; compile-time only).
+
# Do nothing types
NOTHING no Match empty string.
diff --git a/regcomp.c b/regcomp.c
index a501bf1475..0fc793626f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4001,6 +4001,108 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
else if ((OP(scan) == EXACTFU_ONLY8) && (OP(n) == EXACTFU)) {
; /* join is compatible, no need to change OP */
}
+ else if (OP(scan) == EXACTFU) {
+ if (OP(n) != EXACTFU) {
+
+ /* Here the first node is EXACTFU and the second isn't.
+ * Normally EXACTFU nodes are compatible for joining only
+ * with EXACTFU_ONLY8 nodes (already handled), and other
+ * EXACTFU nodes. But under /di, certain temporary
+ * EXACTFS_foo_U nodes are generated, which are compatible.
+ * We check for this case here. These need to be resolved
+ * to either EXACTFU or EXACTF at joining time. They have
+ * nothing in them that would forbid them from being the
+ * more desirable EXACTFU nodes except that they begin
+ * and/or end with a single [Ss]. The reason this is
+ * problematic is because they could be joined in this loop
+ * with an adjacent node that ends and/or begins with [Ss]
+ * which would then form the sequence 'ss', which matches
+ * differently under /di than /ui, in which case EXACTFU
+ * can't be used. If the 'ss' sequence doesn't get formed,
+ * the nodes get absorbed into any adjacent EXACTFU node.
+ * And if the only adjacent node is EXACTF, they get
+ * absorbed into that, under the theory that a longer node
+ * is better than two shorter ones, even if one is EXACTFU.
+ * Note that EXACTFU_ONLY8 is generated only for UTF-8
+ * patterns, and the EXACTFS_foo_U ones only for non-UTF-8.
+ * */
+
+ if (OP(n) == EXACTFS_E_U || OP(n) == EXACTFS_BE_U) {
+
+ /* Here the joined node would end with 's'. If the
+ * node following the combination is an EXACTF one,
+ * it's better to join this EXACTFS_fooE_U with that
+ * one, leaving the current one in 'scan' be the more
+ * desirable EXACTFU */
+ if (OP(nnext) == EXACTF) {
+ break;
+ }
+ OP(scan) = EXACTFS_E_U;
+ }
+ else if (OP(n) != EXACTFS_B_U) {
+ break; /* This would be an incompatible join; stop */
+ }
+ }
+ }
+ else if (OP(scan) == EXACTF) {
+ if (OP(n) != EXACTF) {
+
+ /* Here the first node is EXACTF and the second isn't.
+ * EXACTF nodes are compatible for joining only with other
+ * EXACTF nodes, and the EXACTFS_foo_U nodes. But the
+ * latter nodes can be also joined with EXACTFU ones, and
+ * that is a better outcome, so if the node following 'n'
+ * is EXACTFU, quit now so that those two can be joined
+ * later */
+ if ( OP(n) != EXACTFS_B_U
+ && OP(n) != EXACTFS_E_U
+ && OP(n) != EXACTFS_BE_U)
+ {
+ break;
+ }
+ else if (OP(nnext) == EXACTFU) {
+ break;
+ }
+ else {
+ /* Here the next node can be joined with the EXACTF
+ * node, and become part of it. That they begin or end
+ * with 's' now doesn't matter. */
+ }
+ }
+ }
+ else if (OP(scan) == EXACTFS_B_U) {
+
+ /* Here, the first node begins, but does not end with 's'.
+ * That means it doesn't form 'ss' with the following node, so
+ * can become EXACTFU, and either stand on its own or be joined
+ * with a following EXACTFU. If the following is instead an
+ * EXACTF, the two can also be joined together as EXACTF */
+ if (OP(n) == EXACTF) {
+ OP(scan) = EXACTF;
+ }
+ else {
+ OP(scan) = EXACTFU;
+ if (OP(n) != EXACTFU) {
+ break;
+ }
+ }
+ }
+ else if (OP(scan) == EXACTFS_E_U || OP(scan) == EXACTFS_BE_U) {
+
+ /* Here, the first node ends with 's', and could become an
+ * EXACTFU (or be joined with a following EXACTFU) if that next
+ * node doesn't begin with 's'; otherwise it must become an
+ * EXACTF node. */
+ if (OP(n) == EXACTFS_B_U || OP(n) == EXACTFS_BE_U) {
+ OP(scan) = EXACTF;
+ }
+ else {
+ OP(scan) = EXACTFU;
+ if (OP(n) != EXACTFU) {
+ break;
+ }
+ }
+ }
else if (OP(scan) != OP(n)) {
/* The only other compatible joinings are the same node type */
@@ -4036,6 +4138,15 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
#endif
}
+ /* These temporary nodes can now be turned into EXACTFU, and must, as
+ * regexec.c doesn't handle them */
+ if ( OP(scan) == EXACTFS_B_U
+ || OP(scan) == EXACTFS_E_U
+ || OP(scan) == EXACTFS_BE_U)
+ {
+ OP(scan) = EXACTFU;
+ }
+
*min_subtract = 0;
*unfolded_multi_char = FALSE;
@@ -5174,6 +5285,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
min++;
/* FALLTHROUGH */
case STAR:
+ next = NEXTOPER(scan);
+
+ /* These temporary nodes can now be turned into EXACTFU, and
+ * must, as regexec.c doesn't handle them */
+ if ( OP(next) == EXACTFS_B_U
+ || OP(next) == EXACTFS_E_U
+ || OP(next) == EXACTFS_BE_U)
+ {
+ OP(next) = EXACTFU;
+ }
+
if (flags & SCF_DO_STCLASS) {
mincount = 0;
maxcount = REG_INFTY;
@@ -13786,6 +13908,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* as the latter's folds aren't known until runtime. */
bool maybe_exactfu = FOLD;
+ /* An EXACTF node that otherwise could be turned into EXACTFU,
+ * can't be if it starts and/or ends with [Ss]. Because, during
+ * optimization it could be joined with another node that ends
+ * and/or starts with [Ss], creating the sequence 'ss', which needs
+ * to remain in an EXACTF node. This flag is used to signal this
+ * situation */
+ bool maybe_exactfs = FALSE;
+
/* Single-character EXACTish nodes are almost always SIMPLE. This
* allows us to override this as encountered */
U8 maybe_SIMPLE = SIMPLE;
@@ -14282,9 +14412,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* On non-ancient Unicode versions, this includes the
* multi-char fold SHARP S to 'ss' */
+ if (len == 0 && isALPHA_FOLD_EQ(ender, 's')) {
+ maybe_exactfs = TRUE; /* Node begins with 's' */
+ }
else if ( UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
- || ( isALPHA_FOLD_EQ(ender, 's')
- && (len == 0 || isALPHA_FOLD_EQ(*(s-1), 's'))))
+ || ( isALPHA_FOLD_EQ(ender, 's')
+ && isALPHA_FOLD_EQ(*(s-1), 's')))
{
/* Here, we have one of the following:
* a) a SHARP S. This folds to 'ss' only under
@@ -14301,24 +14434,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* so that we won't generate an unwanted
* match, unless, at runtime, the target
* string is in UTF-8.
- * c) an initial s in the node. By itself, this
- * isn't a problem, but if we later join this
- * and the node preceding it together, where
- * that one ends with an 's', the juncture
- * would contain 'ss', and again we could have
- * an inappropriate match, so keep this node
- * EXACTF. After we've accumulated the node
- * we also make sure that a final s keeps it
- * from becoming EXACTFU.
- *
- * XXX An enhancement would be to create a new
- * node-type, say EXACTFS, which would be EXACTFU
- * except for beginning or ending with 's'. This
- * could trivially be turned into EXACTFU after
- * joining, if appropriate, and would then be
- * trieable */
+ * */
- maybe_exactfu = FALSE;
+ maybe_exactfs = FALSE; /* Can't generate an
+ EXACTFS node */
+ maybe_exactfu = FALSE; /* Nor EXACTFU (unless we
+ already are in one) */
if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
maybe_SIMPLE = 0;
if (node_type == EXACTFU) {
@@ -14532,12 +14653,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
if (FOLD) {
- /* If the node ends in an 's' we make sure it stays EXACTF,
- * as if it turns into an EXACTFU, it could later get
- * joined with another 's' that would then wrongly match
- * the sharp s */
- if (maybe_exactfu && isALPHA_FOLD_EQ(ender, 's'))
- {
+ /* If the node ends in an 's' it can't now be changed into
+ * an EXACTFU, as the node could later get joined with another
+ * one that begins with 's' and that combination that would
+ * then wrongly match the sharp s under /di. (Note that if
+ * it's already EXACTFU, this is irrelevant) If this is
+ * the only reason keeping it from being an EXACTFU, we
+ * create a special node type so that at joining time, we
+ * can turn it into an EXACTFU if no 'ss' is formed */
+ if (isALPHA_FOLD_EQ(ender, 's')) {
+ if (maybe_exactfu && node_type == EXACTF) {
+ node_type = (maybe_exactfs)
+ ? EXACTFS_BE_U
+ : EXACTFS_E_U;
+ }
maybe_exactfu = FALSE;
}
@@ -14554,6 +14683,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
else if (node_type == EXACTF) {
RExC_seen_d_op = TRUE;
+
+ /* If the only thing keeping this from being EXACTFU is
+ * that it begins with 's', change it to a special node
+ * type so that during the later join, we can easily
+ * check for, and do the change there if appropriate */
+ if (maybe_exactfs) {
+ node_type = EXACTFS_B_U;
+ }
}
/* The micro sign is the only below 256 character that
@@ -19334,6 +19471,9 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
case EXACT_ONLY8:
case EXACTL:
case EXACTF:
+ case EXACTFS_B_U:
+ case EXACTFS_E_U:
+ case EXACTFS_BE_U:
case EXACTFAA_NO_TRIE:
case EXACTFAA:
case EXACTFU:
diff --git a/regcomp.sym b/regcomp.sym
index dffc03b1a0..ddf5ba886f 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -117,6 +117,12 @@ EXACTFU_ONLY8 EXACT, str ; Like EXACTFU, but only UTF-8 encoded targets
# One could add EXACTFAA8 and and something that has the same effect for /l,
# but these would be extremely uncommon
+# If we ran out of node types, these could be replaced by some other method,
+# such as instead examining the first and final characters of nodes.
+EXACTFS_B_U EXACT, str ; EXACTFU but begins with [Ss]; (string not UTF-8; compile-time only).
+EXACTFS_E_U EXACT, str ; EXACTFU but ends with [Ss]; (string not UTF-8; compile-time only).
+EXACTFS_BE_U EXACT, str ; EXACTFU but begins and ends with [Ss]; (string not UTF-8; compile-time only).
+
#*Do nothing types
NOTHING NOTHING, no ; Match empty string.
diff --git a/regnodes.h b/regnodes.h
index 7c12d1f4d9..94b444379c 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 101
-#define REGMATCH_STATE_MAX 141
+#define REGNODE_MAX 104
+#define REGMATCH_STATE_MAX 144
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -58,61 +58,64 @@
#define EXACTFAA_NO_TRIE 44 /* 0x2c Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
#define EXACT_ONLY8 45 /* 0x2d Like EXACT, but only UTF-8 encoded targets can match */
#define EXACTFU_ONLY8 46 /* 0x2e Like EXACTFU, but only UTF-8 encoded targets can match */
-#define NOTHING 47 /* 0x2f Match empty string. */
-#define TAIL 48 /* 0x30 Match empty string. Can jump here from outside. */
-#define STAR 49 /* 0x31 Match this (simple) thing 0 or more times. */
-#define PLUS 50 /* 0x32 Match this (simple) thing 1 or more times. */
-#define CURLY 51 /* 0x33 Match this simple thing {n,m} times. */
-#define CURLYN 52 /* 0x34 Capture next-after-this simple thing */
-#define CURLYM 53 /* 0x35 Capture this medium-complex thing {n,m} times. */
-#define CURLYX 54 /* 0x36 Match this complex thing {n,m} times. */
-#define WHILEM 55 /* 0x37 Do curly processing and see if rest matches. */
-#define OPEN 56 /* 0x38 Mark this point in input as start of #n. */
-#define CLOSE 57 /* 0x39 Close corresponding OPEN of #n. */
-#define SROPEN 58 /* 0x3a Same as OPEN, but for script run */
-#define SRCLOSE 59 /* 0x3b Close preceding SROPEN */
-#define REF 60 /* 0x3c Match some already matched string */
-#define REFF 61 /* 0x3d Match already matched string, folded using native charset rules for non-utf8 */
-#define REFFL 62 /* 0x3e Match already matched string, folded in loc. */
-#define REFFU 63 /* 0x3f Match already matched string, folded using unicode rules for non-utf8 */
-#define REFFA 64 /* 0x40 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define NREF 65 /* 0x41 Match some already matched string */
-#define NREFF 66 /* 0x42 Match already matched string, folded using native charset rules for non-utf8 */
-#define NREFFL 67 /* 0x43 Match already matched string, folded in loc. */
-#define NREFFU 68 /* 0x44 Match already matched string, folded using unicode rules for non-utf8 */
-#define NREFFA 69 /* 0x45 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define LONGJMP 70 /* 0x46 Jump far away. */
-#define BRANCHJ 71 /* 0x47 BRANCH with long offset. */
-#define IFMATCH 72 /* 0x48 Succeeds if the following matches. */
-#define UNLESSM 73 /* 0x49 Fails if the following matches. */
-#define SUSPEND 74 /* 0x4a "Independent" sub-RE. */
-#define IFTHEN 75 /* 0x4b Switch, should be preceded by switcher. */
-#define GROUPP 76 /* 0x4c Whether the group matched. */
-#define EVAL 77 /* 0x4d Execute some Perl code. */
-#define MINMOD 78 /* 0x4e Next operator is not greedy. */
-#define LOGICAL 79 /* 0x4f Next opcode should set the flag only. */
-#define RENUM 80 /* 0x50 Group with independently numbered parens. */
-#define TRIE 81 /* 0x51 Match many EXACT(F[ALU]?)? at once. flags==type */
-#define TRIEC 82 /* 0x52 Same as TRIE, but with embedded charclass data */
-#define AHOCORASICK 83 /* 0x53 Aho Corasick stclass. flags==type */
-#define AHOCORASICKC 84 /* 0x54 Same as AHOCORASICK, but with embedded charclass data */
-#define GOSUB 85 /* 0x55 recurse to paren arg1 at (signed) ofs arg2 */
-#define NGROUPP 86 /* 0x56 Whether the group matched. */
-#define INSUBP 87 /* 0x57 Whether we are in a specific recurse. */
-#define DEFINEP 88 /* 0x58 Never execute directly. */
-#define ENDLIKE 89 /* 0x59 Used only for the type field of verbs */
-#define OPFAIL 90 /* 0x5a Same as (?!), but with verb arg */
-#define ACCEPT 91 /* 0x5b Accepts the current matched string, with verbar */
-#define VERB 92 /* 0x5c Used only for the type field of verbs */
-#define PRUNE 93 /* 0x5d Pattern fails at this startpoint if no-backtracking through this */
-#define MARKPOINT 94 /* 0x5e Push the current location for rollback by cut. */
-#define SKIP 95 /* 0x5f On failure skip forward (to the mark) before retrying */
-#define COMMIT 96 /* 0x60 Pattern fails outright if backtracking through this */
-#define CUTGROUP 97 /* 0x61 On failure go to the next alternation in the group */
-#define KEEPS 98 /* 0x62 $& begins here. */
-#define LNBREAK 99 /* 0x63 generic newline pattern */
-#define OPTIMIZED 100 /* 0x64 Placeholder for dump. */
-#define PSEUDO 101 /* 0x65 Pseudo opcode for internal use. */
+#define EXACTFS_B_U 47 /* 0x2f EXACTFU but begins with [Ss]; (string not UTF-8; compile-time only). */
+#define EXACTFS_E_U 48 /* 0x30 EXACTFU but ends with [Ss]; (string not UTF-8; compile-time only). */
+#define EXACTFS_BE_U 49 /* 0x31 EXACTFU but begins and ends with [Ss]; (string not UTF-8; compile-time only). */
+#define NOTHING 50 /* 0x32 Match empty string. */
+#define TAIL 51 /* 0x33 Match empty string. Can jump here from outside. */
+#define STAR 52 /* 0x34 Match this (simple) thing 0 or more times. */
+#define PLUS 53 /* 0x35 Match this (simple) thing 1 or more times. */
+#define CURLY 54 /* 0x36 Match this simple thing {n,m} times. */
+#define CURLYN 55 /* 0x37 Capture next-after-this simple thing */
+#define CURLYM 56 /* 0x38 Capture this medium-complex thing {n,m} times. */
+#define CURLYX 57 /* 0x39 Match this complex thing {n,m} times. */
+#define WHILEM 58 /* 0x3a Do curly processing and see if rest matches. */
+#define OPEN 59 /* 0x3b Mark this point in input as start of #n. */
+#define CLOSE 60 /* 0x3c Close corresponding OPEN of #n. */
+#define SROPEN 61 /* 0x3d Same as OPEN, but for script run */
+#define SRCLOSE 62 /* 0x3e Close preceding SROPEN */
+#define REF 63 /* 0x3f Match some already matched string */
+#define REFF 64 /* 0x40 Match already matched string, folded using native charset rules for non-utf8 */
+#define REFFL 65 /* 0x41 Match already matched string, folded in loc. */
+#define REFFU 66 /* 0x42 Match already matched string, folded using unicode rules for non-utf8 */
+#define REFFA 67 /* 0x43 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define NREF 68 /* 0x44 Match some already matched string */
+#define NREFF 69 /* 0x45 Match already matched string, folded using native charset rules for non-utf8 */
+#define NREFFL 70 /* 0x46 Match already matched string, folded in loc. */
+#define NREFFU 71 /* 0x47 Match already matched string, folded using unicode rules for non-utf8 */
+#define NREFFA 72 /* 0x48 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define LONGJMP 73 /* 0x49 Jump far away. */
+#define BRANCHJ 74 /* 0x4a BRANCH with long offset. */
+#define IFMATCH 75 /* 0x4b Succeeds if the following matches. */
+#define UNLESSM 76 /* 0x4c Fails if the following matches. */
+#define SUSPEND 77 /* 0x4d "Independent" sub-RE. */
+#define IFTHEN 78 /* 0x4e Switch, should be preceded by switcher. */
+#define GROUPP 79 /* 0x4f Whether the group matched. */
+#define EVAL 80 /* 0x50 Execute some Perl code. */
+#define MINMOD 81 /* 0x51 Next operator is not greedy. */
+#define LOGICAL 82 /* 0x52 Next opcode should set the flag only. */
+#define RENUM 83 /* 0x53 Group with independently numbered parens. */
+#define TRIE 84 /* 0x54 Match many EXACT(F[ALU]?)? at once. flags==type */
+#define TRIEC 85 /* 0x55 Same as TRIE, but with embedded charclass data */
+#define AHOCORASICK 86 /* 0x56 Aho Corasick stclass. flags==type */
+#define AHOCORASICKC 87 /* 0x57 Same as AHOCORASICK, but with embedded charclass data */
+#define GOSUB 88 /* 0x58 recurse to paren arg1 at (signed) ofs arg2 */
+#define NGROUPP 89 /* 0x59 Whether the group matched. */
+#define INSUBP 90 /* 0x5a Whether we are in a specific recurse. */
+#define DEFINEP 91 /* 0x5b Never execute directly. */
+#define ENDLIKE 92 /* 0x5c Used only for the type field of verbs */
+#define OPFAIL 93 /* 0x5d Same as (?!), but with verb arg */
+#define ACCEPT 94 /* 0x5e Accepts the current matched string, with verbar */
+#define VERB 95 /* 0x5f Used only for the type field of verbs */
+#define PRUNE 96 /* 0x60 Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 97 /* 0x61 Push the current location for rollback by cut. */
+#define SKIP 98 /* 0x62 On failure skip forward (to the mark) before retrying */
+#define COMMIT 99 /* 0x63 Pattern fails outright if backtracking through this */
+#define CUTGROUP 100 /* 0x64 On failure go to the next alternation in the group */
+#define KEEPS 101 /* 0x65 $& begins here. */
+#define LNBREAK 102 /* 0x66 generic newline pattern */
+#define OPTIMIZED 103 /* 0x67 Placeholder for dump. */
+#define PSEUDO 104 /* 0x68 Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -208,6 +211,9 @@ EXTCONST U8 PL_regkind[] = {
EXACT, /* EXACTFAA_NO_TRIE */
EXACT, /* EXACT_ONLY8 */
EXACT, /* EXACTFU_ONLY8 */
+ EXACT, /* EXACTFS_B_U */
+ EXACT, /* EXACTFS_E_U */
+ EXACT, /* EXACTFS_BE_U */
NOTHING, /* NOTHING */
NOTHING, /* TAIL */
STAR, /* STAR */
@@ -359,6 +365,9 @@ static const U8 regarglen[] = {
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
+ 0, /* EXACTFS_B_U */
+ 0, /* EXACTFS_E_U */
+ 0, /* EXACTFS_BE_U */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
@@ -466,6 +475,9 @@ static const char reg_off_by_arg[] = {
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
+ 0, /* EXACTFS_B_U */
+ 0, /* EXACTFS_E_U */
+ 0, /* EXACTFS_BE_U */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
@@ -579,61 +591,64 @@ EXTCONST char * const PL_reg_name[] = {
"EXACTFAA_NO_TRIE", /* 0x2c */
"EXACT_ONLY8", /* 0x2d */
"EXACTFU_ONLY8", /* 0x2e */
- "NOTHING", /* 0x2f */
- "TAIL", /* 0x30 */
- "STAR", /* 0x31 */
- "PLUS", /* 0x32 */
- "CURLY", /* 0x33 */
- "CURLYN", /* 0x34 */
- "CURLYM", /* 0x35 */
- "CURLYX", /* 0x36 */
- "WHILEM", /* 0x37 */
- "OPEN", /* 0x38 */
- "CLOSE", /* 0x39 */
- "SROPEN", /* 0x3a */
- "SRCLOSE", /* 0x3b */
- "REF", /* 0x3c */
- "REFF", /* 0x3d */
- "REFFL", /* 0x3e */
- "REFFU", /* 0x3f */
- "REFFA", /* 0x40 */
- "NREF", /* 0x41 */
- "NREFF", /* 0x42 */
- "NREFFL", /* 0x43 */
- "NREFFU", /* 0x44 */
- "NREFFA", /* 0x45 */
- "LONGJMP", /* 0x46 */
- "BRANCHJ", /* 0x47 */
- "IFMATCH", /* 0x48 */
- "UNLESSM", /* 0x49 */
- "SUSPEND", /* 0x4a */
- "IFTHEN", /* 0x4b */
- "GROUPP", /* 0x4c */
- "EVAL", /* 0x4d */
- "MINMOD", /* 0x4e */
- "LOGICAL", /* 0x4f */
- "RENUM", /* 0x50 */
- "TRIE", /* 0x51 */
- "TRIEC", /* 0x52 */
- "AHOCORASICK", /* 0x53 */
- "AHOCORASICKC", /* 0x54 */
- "GOSUB", /* 0x55 */
- "NGROUPP", /* 0x56 */
- "INSUBP", /* 0x57 */
- "DEFINEP", /* 0x58 */
- "ENDLIKE", /* 0x59 */
- "OPFAIL", /* 0x5a */
- "ACCEPT", /* 0x5b */
- "VERB", /* 0x5c */
- "PRUNE", /* 0x5d */
- "MARKPOINT", /* 0x5e */
- "SKIP", /* 0x5f */
- "COMMIT", /* 0x60 */
- "CUTGROUP", /* 0x61 */
- "KEEPS", /* 0x62 */
- "LNBREAK", /* 0x63 */
- "OPTIMIZED", /* 0x64 */
- "PSEUDO", /* 0x65 */
+ "EXACTFS_B_U", /* 0x2f */
+ "EXACTFS_E_U", /* 0x30 */
+ "EXACTFS_BE_U", /* 0x31 */
+ "NOTHING", /* 0x32 */
+ "TAIL", /* 0x33 */
+ "STAR", /* 0x34 */
+ "PLUS", /* 0x35 */
+ "CURLY", /* 0x36 */
+ "CURLYN", /* 0x37 */
+ "CURLYM", /* 0x38 */
+ "CURLYX", /* 0x39 */
+ "WHILEM", /* 0x3a */
+ "OPEN", /* 0x3b */
+ "CLOSE", /* 0x3c */
+ "SROPEN", /* 0x3d */
+ "SRCLOSE", /* 0x3e */
+ "REF", /* 0x3f */
+ "REFF", /* 0x40 */
+ "REFFL", /* 0x41 */
+ "REFFU", /* 0x42 */
+ "REFFA", /* 0x43 */
+ "NREF", /* 0x44 */
+ "NREFF", /* 0x45 */
+ "NREFFL", /* 0x46 */
+ "NREFFU", /* 0x47 */
+ "NREFFA", /* 0x48 */
+ "LONGJMP", /* 0x49 */
+ "BRANCHJ", /* 0x4a */
+ "IFMATCH", /* 0x4b */
+ "UNLESSM", /* 0x4c */
+ "SUSPEND", /* 0x4d */
+ "IFTHEN", /* 0x4e */
+ "GROUPP", /* 0x4f */
+ "EVAL", /* 0x50 */
+ "MINMOD", /* 0x51 */
+ "LOGICAL", /* 0x52 */
+ "RENUM", /* 0x53 */
+ "TRIE", /* 0x54 */
+ "TRIEC", /* 0x55 */
+ "AHOCORASICK", /* 0x56 */
+ "AHOCORASICKC", /* 0x57 */
+ "GOSUB", /* 0x58 */
+ "NGROUPP", /* 0x59 */
+ "INSUBP", /* 0x5a */
+ "DEFINEP", /* 0x5b */
+ "ENDLIKE", /* 0x5c */
+ "OPFAIL", /* 0x5d */
+ "ACCEPT", /* 0x5e */
+ "VERB", /* 0x5f */
+ "PRUNE", /* 0x60 */
+ "MARKPOINT", /* 0x61 */
+ "SKIP", /* 0x62 */
+ "COMMIT", /* 0x63 */
+ "CUTGROUP", /* 0x64 */
+ "KEEPS", /* 0x65 */
+ "LNBREAK", /* 0x66 */
+ "OPTIMIZED", /* 0x67 */
+ "PSEUDO", /* 0x68 */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -768,7 +783,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
EXTCONST U8 PL_varies_bitmask[];
#else
EXTCONST U8 PL_varies_bitmask[] = {
- 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xFE, 0xF0, 0xBF, 0x0C, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xF0, 0x87, 0xFF, 0x65, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
@@ -791,7 +806,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
EXTCONST U8 PL_simple_bitmask[];
#else
EXTCONST U8 PL_simple_bitmask[] = {
- 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 671f6c72be..ee15556d66 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -355,7 +355,7 @@ pod/perl.pod Verbatim line length including indents exceeds 79 by 8
pod/perlandroid.pod Verbatim line length including indents exceeds 79 by 3
pod/perlbook.pod Verbatim line length including indents exceeds 79 by 1
pod/perlce.pod Verbatim line length including indents exceeds 79 by 3
-pod/perldebguts.pod Verbatim line length including indents exceeds 79 by 27
+pod/perldebguts.pod Verbatim line length including indents exceeds 79 by 28
pod/perldebtut.pod Verbatim line length including indents exceeds 79 by 3
pod/perldtrace.pod Verbatim line length including indents exceeds 79 by 7
pod/perlgit.pod ? Should you be using F<...> or maybe L<...> instead of 1