summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-12-04 09:58:13 -0700
committerKarl Williamson <khw@cpan.org>2018-12-07 21:12:16 -0700
commit8a100c918ec81926c0536594df8ee1fcccb171da (patch)
tree61553beb7d50f69a1c65e5295f7cf7fc079097cd
parent127a194773690138ef2e74691af748a925a2f47a (diff)
downloadperl-8a100c918ec81926c0536594df8ee1fcccb171da.tar.gz
regcomp.c: Allow more EXACTFish nodes to be trieable
The previous two commits fixed bugs where it would be possible during optimization to join two EXACTFish nodes together, and the result would not work properly with LATIN SMALL LETTER SHARP S. But by doing so, the commits caused all non-UTF-8 EXACTFU nodes that begin or end with [Ss] from being trieable. This commit changes things so that the only the ones that are non-trieable are the ones that, when joined, have the sequence [Ss][Ss] in them. To do so, I created three new node types that indicate if the node begins with [Ss] or ends with them, or both. These preclude having to examine the node contents at joining to determine this. And since there are plenty of node types available, it seemed the best choice. But other options would be available should we run out of nodes. Examining the first and final characters of a node is not expensive, for example.
-rw-r--r--pod/perldebguts.pod7
-rw-r--r--regcomp.c190
-rw-r--r--regcomp.sym6
-rw-r--r--regnodes.h243
-rw-r--r--t/porting/known_pod_issues.dat2
5 files changed, 308 insertions, 140 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index a826d9d237..b076bcf2d3 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -680,6 +680,13 @@ will be lost.
EXACTFU_ONLY8 str Like EXACTFU, but only UTF-8 encoded
targets can match
+ EXACTFS_B_U str EXACTFU but begins with [Ss]; (string not
+ UTF-8; compile-time only).
+ EXACTFS_E_U str EXACTFU but ends with [Ss]; (string not UTF-
+ 8; compile-time only).
+ EXACTFS_BE_U str EXACTFU but begins and ends with [Ss];
+ (string not UTF-8; compile-time only).
+
# Do nothing types
NOTHING no Match empty string.
diff --git a/regcomp.c b/regcomp.c
index a501bf1475..0fc793626f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4001,6 +4001,108 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
else if ((OP(scan) == EXACTFU_ONLY8) && (OP(n) == EXACTFU)) {
; /* join is compatible, no need to change OP */
}
+ else if (OP(scan) == EXACTFU) {
+ if (OP(n) != EXACTFU) {
+
+ /* Here the first node is EXACTFU and the second isn't.
+ * Normally EXACTFU nodes are compatible for joining only
+ * with EXACTFU_ONLY8 nodes (already handled), and other
+ * EXACTFU nodes. But under /di, certain temporary
+ * EXACTFS_foo_U nodes are generated, which are compatible.
+ * We check for this case here. These need to be resolved
+ * to either EXACTFU or EXACTF at joining time. They have
+ * nothing in them that would forbid them from being the
+ * more desirable EXACTFU nodes except that they begin
+ * and/or end with a single [Ss]. The reason this is
+ * problematic is because they could be joined in this loop
+ * with an adjacent node that ends and/or begins with [Ss]
+ * which would then form the sequence 'ss', which matches
+ * differently under /di than /ui, in which case EXACTFU
+ * can't be used. If the 'ss' sequence doesn't get formed,
+ * the nodes get absorbed into any adjacent EXACTFU node.
+ * And if the only adjacent node is EXACTF, they get
+ * absorbed into that, under the theory that a longer node
+ * is better than two shorter ones, even if one is EXACTFU.
+ * Note that EXACTFU_ONLY8 is generated only for UTF-8
+ * patterns, and the EXACTFS_foo_U ones only for non-UTF-8.
+ * */
+
+ if (OP(n) == EXACTFS_E_U || OP(n) == EXACTFS_BE_U) {
+
+ /* Here the joined node would end with 's'. If the
+ * node following the combination is an EXACTF one,
+ * it's better to join this EXACTFS_fooE_U with that
+ * one, leaving the current one in 'scan' be the more
+ * desirable EXACTFU */
+ if (OP(nnext) == EXACTF) {
+ break;
+ }
+ OP(scan) = EXACTFS_E_U;
+ }
+ else if (OP(n) != EXACTFS_B_U) {
+ break; /* This would be an incompatible join; stop */
+ }
+ }
+ }
+ else if (OP(scan) == EXACTF) {
+ if (OP(n) != EXACTF) {
+
+ /* Here the first node is EXACTF and the second isn't.
+ * EXACTF nodes are compatible for joining only with other
+ * EXACTF nodes, and the EXACTFS_foo_U nodes. But the
+ * latter nodes can be also joined with EXACTFU ones, and
+ * that is a better outcome, so if the node following 'n'
+ * is EXACTFU, quit now so that those two can be joined
+ * later */
+ if ( OP(n) != EXACTFS_B_U
+ && OP(n) != EXACTFS_E_U
+ && OP(n) != EXACTFS_BE_U)
+ {
+ break;
+ }
+ else if (OP(nnext) == EXACTFU) {
+ break;
+ }
+ else {
+ /* Here the next node can be joined with the EXACTF
+ * node, and become part of it. That they begin or end
+ * with 's' now doesn't matter. */
+ }
+ }
+ }
+ else if (OP(scan) == EXACTFS_B_U) {
+
+ /* Here, the first node begins, but does not end with 's'.
+ * That means it doesn't form 'ss' with the following node, so
+ * can become EXACTFU, and either stand on its own or be joined
+ * with a following EXACTFU. If the following is instead an
+ * EXACTF, the two can also be joined together as EXACTF */
+ if (OP(n) == EXACTF) {
+ OP(scan) = EXACTF;
+ }
+ else {
+ OP(scan) = EXACTFU;
+ if (OP(n) != EXACTFU) {
+ break;
+ }
+ }
+ }
+ else if (OP(scan) == EXACTFS_E_U || OP(scan) == EXACTFS_BE_U) {
+
+ /* Here, the first node ends with 's', and could become an
+ * EXACTFU (or be joined with a following EXACTFU) if that next
+ * node doesn't begin with 's'; otherwise it must become an
+ * EXACTF node. */
+ if (OP(n) == EXACTFS_B_U || OP(n) == EXACTFS_BE_U) {
+ OP(scan) = EXACTF;
+ }
+ else {
+ OP(scan) = EXACTFU;
+ if (OP(n) != EXACTFU) {
+ break;
+ }
+ }
+ }
else if (OP(scan) != OP(n)) {
/* The only other compatible joinings are the same node type */
@@ -4036,6 +4138,15 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
#endif
}
+ /* These temporary nodes can now be turned into EXACTFU, and must, as
+ * regexec.c doesn't handle them */
+ if ( OP(scan) == EXACTFS_B_U
+ || OP(scan) == EXACTFS_E_U
+ || OP(scan) == EXACTFS_BE_U)
+ {
+ OP(scan) = EXACTFU;
+ }
+
*min_subtract = 0;
*unfolded_multi_char = FALSE;
@@ -5174,6 +5285,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
min++;
/* FALLTHROUGH */
case STAR:
+ next = NEXTOPER(scan);
+
+ /* These temporary nodes can now be turned into EXACTFU, and
+ * must, as regexec.c doesn't handle them */
+ if ( OP(next) == EXACTFS_B_U
+ || OP(next) == EXACTFS_E_U
+ || OP(next) == EXACTFS_BE_U)
+ {
+ OP(next) = EXACTFU;
+ }
+
if (flags & SCF_DO_STCLASS) {
mincount = 0;
maxcount = REG_INFTY;
@@ -13786,6 +13908,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* as the latter's folds aren't known until runtime. */
bool maybe_exactfu = FOLD;
+ /* An EXACTF node that otherwise could be turned into EXACTFU,
+ * can't be if it starts and/or ends with [Ss]. Because, during
+ * optimization it could be joined with another node that ends
+ * and/or starts with [Ss], creating the sequence 'ss', which needs
+ * to remain in an EXACTF node. This flag is used to signal this
+ * situation */
+ bool maybe_exactfs = FALSE;
+
/* Single-character EXACTish nodes are almost always SIMPLE. This
* allows us to override this as encountered */
U8 maybe_SIMPLE = SIMPLE;
@@ -14282,9 +14412,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* On non-ancient Unicode versions, this includes the
* multi-char fold SHARP S to 'ss' */
+ if (len == 0 && isALPHA_FOLD_EQ(ender, 's')) {
+ maybe_exactfs = TRUE; /* Node begins with 's' */
+ }
else if ( UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
- || ( isALPHA_FOLD_EQ(ender, 's')
- && (len == 0 || isALPHA_FOLD_EQ(*(s-1), 's'))))
+ || ( isALPHA_FOLD_EQ(ender, 's')
+ && isALPHA_FOLD_EQ(*(s-1), 's')))
{
/* Here, we have one of the following:
* a) a SHARP S. This folds to 'ss' only under
@@ -14301,24 +14434,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* so that we won't generate an unwanted
* match, unless, at runtime, the target
* string is in UTF-8.
- * c) an initial s in the node. By itself, this
- * isn't a problem, but if we later join this
- * and the node preceding it together, where
- * that one ends with an 's', the juncture
- * would contain 'ss', and again we could have
- * an inappropriate match, so keep this node
- * EXACTF. After we've accumulated the node
- * we also make sure that a final s keeps it
- * from becoming EXACTFU.
- *
- * XXX An enhancement would be to create a new
- * node-type, say EXACTFS, which would be EXACTFU
- * except for beginning or ending with 's'. This
- * could trivially be turned into EXACTFU after
- * joining, if appropriate, and would then be
- * trieable */
+ * */
- maybe_exactfu = FALSE;
+ maybe_exactfs = FALSE; /* Can't generate an
+ EXACTFS node */
+ maybe_exactfu = FALSE; /* Nor EXACTFU (unless we
+ already are in one) */
if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
maybe_SIMPLE = 0;
if (node_type == EXACTFU) {
@@ -14532,12 +14653,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
if (FOLD) {
- /* If the node ends in an 's' we make sure it stays EXACTF,
- * as if it turns into an EXACTFU, it could later get
- * joined with another 's' that would then wrongly match
- * the sharp s */
- if (maybe_exactfu && isALPHA_FOLD_EQ(ender, 's'))
- {
+ /* If the node ends in an 's' it can't now be changed into
+ * an EXACTFU, as the node could later get joined with another
+ * one that begins with 's' and that combination that would
+ * then wrongly match the sharp s under /di. (Note that if
+ * it's already EXACTFU, this is irrelevant) If this is
+ * the only reason keeping it from being an EXACTFU, we
+ * create a special node type so that at joining time, we
+ * can turn it into an EXACTFU if no 'ss' is formed */
+ if (isALPHA_FOLD_EQ(ender, 's')) {
+ if (maybe_exactfu && node_type == EXACTF) {
+ node_type = (maybe_exactfs)
+ ? EXACTFS_BE_U
+ : EXACTFS_E_U;
+ }
maybe_exactfu = FALSE;
}
@@ -14554,6 +14683,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
else if (node_type == EXACTF) {
RExC_seen_d_op = TRUE;
+
+ /* If the only thing keeping this from being EXACTFU is
+ * that it begins with 's', change it to a special node
+ * type so that during the later join, we can easily
+ * check for, and do the change there if appropriate */
+ if (maybe_exactfs) {
+ node_type = EXACTFS_B_U;
+ }
}
/* The micro sign is the only below 256 character that
@@ -19334,6 +19471,9 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
case EXACT_ONLY8:
case EXACTL:
case EXACTF:
+ case EXACTFS_B_U:
+ case EXACTFS_E_U:
+ case EXACTFS_BE_U:
case EXACTFAA_NO_TRIE:
case EXACTFAA:
case EXACTFU:
diff --git a/regcomp.sym b/regcomp.sym
index dffc03b1a0..ddf5ba886f 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -117,6 +117,12 @@ EXACTFU_ONLY8 EXACT, str ; Like EXACTFU, but only UTF-8 encoded targets
# One could add EXACTFAA8 and and something that has the same effect for /l,
# but these would be extremely uncommon
+# If we ran out of node types, these could be replaced by some other method,
+# such as instead examining the first and final characters of nodes.
+EXACTFS_B_U EXACT, str ; EXACTFU but begins with [Ss]; (string not UTF-8; compile-time only).
+EXACTFS_E_U EXACT, str ; EXACTFU but ends with [Ss]; (string not UTF-8; compile-time only).
+EXACTFS_BE_U EXACT, str ; EXACTFU but begins and ends with [Ss]; (string not UTF-8; compile-time only).
+
#*Do nothing types
NOTHING NOTHING, no ; Match empty string.
diff --git a/regnodes.h b/regnodes.h
index 7c12d1f4d9..94b444379c 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 101
-#define REGMATCH_STATE_MAX 141
+#define REGNODE_MAX 104
+#define REGMATCH_STATE_MAX 144
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -58,61 +58,64 @@
#define EXACTFAA_NO_TRIE 44 /* 0x2c Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
#define EXACT_ONLY8 45 /* 0x2d Like EXACT, but only UTF-8 encoded targets can match */
#define EXACTFU_ONLY8 46 /* 0x2e Like EXACTFU, but only UTF-8 encoded targets can match */
-#define NOTHING 47 /* 0x2f Match empty string. */
-#define TAIL 48 /* 0x30 Match empty string. Can jump here from outside. */
-#define STAR 49 /* 0x31 Match this (simple) thing 0 or more times. */
-#define PLUS 50 /* 0x32 Match this (simple) thing 1 or more times. */
-#define CURLY 51 /* 0x33 Match this simple thing {n,m} times. */
-#define CURLYN 52 /* 0x34 Capture next-after-this simple thing */
-#define CURLYM 53 /* 0x35 Capture this medium-complex thing {n,m} times. */
-#define CURLYX 54 /* 0x36 Match this complex thing {n,m} times. */
-#define WHILEM 55 /* 0x37 Do curly processing and see if rest matches. */
-#define OPEN 56 /* 0x38 Mark this point in input as start of #n. */
-#define CLOSE 57 /* 0x39 Close corresponding OPEN of #n. */
-#define SROPEN 58 /* 0x3a Same as OPEN, but for script run */
-#define SRCLOSE 59 /* 0x3b Close preceding SROPEN */
-#define REF 60 /* 0x3c Match some already matched string */
-#define REFF 61 /* 0x3d Match already matched string, folded using native charset rules for non-utf8 */
-#define REFFL 62 /* 0x3e Match already matched string, folded in loc. */
-#define REFFU 63 /* 0x3f Match already matched string, folded using unicode rules for non-utf8 */
-#define REFFA 64 /* 0x40 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define NREF 65 /* 0x41 Match some already matched string */
-#define NREFF 66 /* 0x42 Match already matched string, folded using native charset rules for non-utf8 */
-#define NREFFL 67 /* 0x43 Match already matched string, folded in loc. */
-#define NREFFU 68 /* 0x44 Match already matched string, folded using unicode rules for non-utf8 */
-#define NREFFA 69 /* 0x45 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define LONGJMP 70 /* 0x46 Jump far away. */
-#define BRANCHJ 71 /* 0x47 BRANCH with long offset. */
-#define IFMATCH 72 /* 0x48 Succeeds if the following matches. */
-#define UNLESSM 73 /* 0x49 Fails if the following matches. */
-#define SUSPEND 74 /* 0x4a "Independent" sub-RE. */
-#define IFTHEN 75 /* 0x4b Switch, should be preceded by switcher. */
-#define GROUPP 76 /* 0x4c Whether the group matched. */
-#define EVAL 77 /* 0x4d Execute some Perl code. */
-#define MINMOD 78 /* 0x4e Next operator is not greedy. */
-#define LOGICAL 79 /* 0x4f Next opcode should set the flag only. */
-#define RENUM 80 /* 0x50 Group with independently numbered parens. */
-#define TRIE 81 /* 0x51 Match many EXACT(F[ALU]?)? at once. flags==type */
-#define TRIEC 82 /* 0x52 Same as TRIE, but with embedded charclass data */
-#define AHOCORASICK 83 /* 0x53 Aho Corasick stclass. flags==type */
-#define AHOCORASICKC 84 /* 0x54 Same as AHOCORASICK, but with embedded charclass data */
-#define GOSUB 85 /* 0x55 recurse to paren arg1 at (signed) ofs arg2 */
-#define NGROUPP 86 /* 0x56 Whether the group matched. */
-#define INSUBP 87 /* 0x57 Whether we are in a specific recurse. */
-#define DEFINEP 88 /* 0x58 Never execute directly. */
-#define ENDLIKE 89 /* 0x59 Used only for the type field of verbs */
-#define OPFAIL 90 /* 0x5a Same as (?!), but with verb arg */
-#define ACCEPT 91 /* 0x5b Accepts the current matched string, with verbar */
-#define VERB 92 /* 0x5c Used only for the type field of verbs */
-#define PRUNE 93 /* 0x5d Pattern fails at this startpoint if no-backtracking through this */
-#define MARKPOINT 94 /* 0x5e Push the current location for rollback by cut. */
-#define SKIP 95 /* 0x5f On failure skip forward (to the mark) before retrying */
-#define COMMIT 96 /* 0x60 Pattern fails outright if backtracking through this */
-#define CUTGROUP 97 /* 0x61 On failure go to the next alternation in the group */
-#define KEEPS 98 /* 0x62 $& begins here. */
-#define LNBREAK 99 /* 0x63 generic newline pattern */
-#define OPTIMIZED 100 /* 0x64 Placeholder for dump. */
-#define PSEUDO 101 /* 0x65 Pseudo opcode for internal use. */
+#define EXACTFS_B_U 47 /* 0x2f EXACTFU but begins with [Ss]; (string not UTF-8; compile-time only). */
+#define EXACTFS_E_U 48 /* 0x30 EXACTFU but ends with [Ss]; (string not UTF-8; compile-time only). */
+#define EXACTFS_BE_U 49 /* 0x31 EXACTFU but begins and ends with [Ss]; (string not UTF-8; compile-time only). */
+#define NOTHING 50 /* 0x32 Match empty string. */
+#define TAIL 51 /* 0x33 Match empty string. Can jump here from outside. */
+#define STAR 52 /* 0x34 Match this (simple) thing 0 or more times. */
+#define PLUS 53 /* 0x35 Match this (simple) thing 1 or more times. */
+#define CURLY 54 /* 0x36 Match this simple thing {n,m} times. */
+#define CURLYN 55 /* 0x37 Capture next-after-this simple thing */
+#define CURLYM 56 /* 0x38 Capture this medium-complex thing {n,m} times. */
+#define CURLYX 57 /* 0x39 Match this complex thing {n,m} times. */
+#define WHILEM 58 /* 0x3a Do curly processing and see if rest matches. */
+#define OPEN 59 /* 0x3b Mark this point in input as start of #n. */
+#define CLOSE 60 /* 0x3c Close corresponding OPEN of #n. */
+#define SROPEN 61 /* 0x3d Same as OPEN, but for script run */
+#define SRCLOSE 62 /* 0x3e Close preceding SROPEN */
+#define REF 63 /* 0x3f Match some already matched string */
+#define REFF 64 /* 0x40 Match already matched string, folded using native charset rules for non-utf8 */
+#define REFFL 65 /* 0x41 Match already matched string, folded in loc. */
+#define REFFU 66 /* 0x42 Match already matched string, folded using unicode rules for non-utf8 */
+#define REFFA 67 /* 0x43 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define NREF 68 /* 0x44 Match some already matched string */
+#define NREFF 69 /* 0x45 Match already matched string, folded using native charset rules for non-utf8 */
+#define NREFFL 70 /* 0x46 Match already matched string, folded in loc. */
+#define NREFFU 71 /* 0x47 Match already matched string, folded using unicode rules for non-utf8 */
+#define NREFFA 72 /* 0x48 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define LONGJMP 73 /* 0x49 Jump far away. */
+#define BRANCHJ 74 /* 0x4a BRANCH with long offset. */
+#define IFMATCH 75 /* 0x4b Succeeds if the following matches. */
+#define UNLESSM 76 /* 0x4c Fails if the following matches. */
+#define SUSPEND 77 /* 0x4d "Independent" sub-RE. */
+#define IFTHEN 78 /* 0x4e Switch, should be preceded by switcher. */
+#define GROUPP 79 /* 0x4f Whether the group matched. */
+#define EVAL 80 /* 0x50 Execute some Perl code. */
+#define MINMOD 81 /* 0x51 Next operator is not greedy. */
+#define LOGICAL 82 /* 0x52 Next opcode should set the flag only. */
+#define RENUM 83 /* 0x53 Group with independently numbered parens. */
+#define TRIE 84 /* 0x54 Match many EXACT(F[ALU]?)? at once. flags==type */
+#define TRIEC 85 /* 0x55 Same as TRIE, but with embedded charclass data */
+#define AHOCORASICK 86 /* 0x56 Aho Corasick stclass. flags==type */
+#define AHOCORASICKC 87 /* 0x57 Same as AHOCORASICK, but with embedded charclass data */
+#define GOSUB 88 /* 0x58 recurse to paren arg1 at (signed) ofs arg2 */
+#define NGROUPP 89 /* 0x59 Whether the group matched. */
+#define INSUBP 90 /* 0x5a Whether we are in a specific recurse. */
+#define DEFINEP 91 /* 0x5b Never execute directly. */
+#define ENDLIKE 92 /* 0x5c Used only for the type field of verbs */
+#define OPFAIL 93 /* 0x5d Same as (?!), but with verb arg */
+#define ACCEPT 94 /* 0x5e Accepts the current matched string, with verbar */
+#define VERB 95 /* 0x5f Used only for the type field of verbs */
+#define PRUNE 96 /* 0x60 Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 97 /* 0x61 Push the current location for rollback by cut. */
+#define SKIP 98 /* 0x62 On failure skip forward (to the mark) before retrying */
+#define COMMIT 99 /* 0x63 Pattern fails outright if backtracking through this */
+#define CUTGROUP 100 /* 0x64 On failure go to the next alternation in the group */
+#define KEEPS 101 /* 0x65 $& begins here. */
+#define LNBREAK 102 /* 0x66 generic newline pattern */
+#define OPTIMIZED 103 /* 0x67 Placeholder for dump. */
+#define PSEUDO 104 /* 0x68 Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -208,6 +211,9 @@ EXTCONST U8 PL_regkind[] = {
EXACT, /* EXACTFAA_NO_TRIE */
EXACT, /* EXACT_ONLY8 */
EXACT, /* EXACTFU_ONLY8 */
+ EXACT, /* EXACTFS_B_U */
+ EXACT, /* EXACTFS_E_U */
+ EXACT, /* EXACTFS_BE_U */
NOTHING, /* NOTHING */
NOTHING, /* TAIL */
STAR, /* STAR */
@@ -359,6 +365,9 @@ static const U8 regarglen[] = {
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
+ 0, /* EXACTFS_B_U */
+ 0, /* EXACTFS_E_U */
+ 0, /* EXACTFS_BE_U */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
@@ -466,6 +475,9 @@ static const char reg_off_by_arg[] = {
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
+ 0, /* EXACTFS_B_U */
+ 0, /* EXACTFS_E_U */
+ 0, /* EXACTFS_BE_U */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
@@ -579,61 +591,64 @@ EXTCONST char * const PL_reg_name[] = {
"EXACTFAA_NO_TRIE", /* 0x2c */
"EXACT_ONLY8", /* 0x2d */
"EXACTFU_ONLY8", /* 0x2e */
- "NOTHING", /* 0x2f */
- "TAIL", /* 0x30 */
- "STAR", /* 0x31 */
- "PLUS", /* 0x32 */
- "CURLY", /* 0x33 */
- "CURLYN", /* 0x34 */
- "CURLYM", /* 0x35 */
- "CURLYX", /* 0x36 */
- "WHILEM", /* 0x37 */
- "OPEN", /* 0x38 */
- "CLOSE", /* 0x39 */
- "SROPEN", /* 0x3a */
- "SRCLOSE", /* 0x3b */
- "REF", /* 0x3c */
- "REFF", /* 0x3d */
- "REFFL", /* 0x3e */
- "REFFU", /* 0x3f */
- "REFFA", /* 0x40 */
- "NREF", /* 0x41 */
- "NREFF", /* 0x42 */
- "NREFFL", /* 0x43 */
- "NREFFU", /* 0x44 */
- "NREFFA", /* 0x45 */
- "LONGJMP", /* 0x46 */
- "BRANCHJ", /* 0x47 */
- "IFMATCH", /* 0x48 */
- "UNLESSM", /* 0x49 */
- "SUSPEND", /* 0x4a */
- "IFTHEN", /* 0x4b */
- "GROUPP", /* 0x4c */
- "EVAL", /* 0x4d */
- "MINMOD", /* 0x4e */
- "LOGICAL", /* 0x4f */
- "RENUM", /* 0x50 */
- "TRIE", /* 0x51 */
- "TRIEC", /* 0x52 */
- "AHOCORASICK", /* 0x53 */
- "AHOCORASICKC", /* 0x54 */
- "GOSUB", /* 0x55 */
- "NGROUPP", /* 0x56 */
- "INSUBP", /* 0x57 */
- "DEFINEP", /* 0x58 */
- "ENDLIKE", /* 0x59 */
- "OPFAIL", /* 0x5a */
- "ACCEPT", /* 0x5b */
- "VERB", /* 0x5c */
- "PRUNE", /* 0x5d */
- "MARKPOINT", /* 0x5e */
- "SKIP", /* 0x5f */
- "COMMIT", /* 0x60 */
- "CUTGROUP", /* 0x61 */
- "KEEPS", /* 0x62 */
- "LNBREAK", /* 0x63 */
- "OPTIMIZED", /* 0x64 */
- "PSEUDO", /* 0x65 */
+ "EXACTFS_B_U", /* 0x2f */
+ "EXACTFS_E_U", /* 0x30 */
+ "EXACTFS_BE_U", /* 0x31 */
+ "NOTHING", /* 0x32 */
+ "TAIL", /* 0x33 */
+ "STAR", /* 0x34 */
+ "PLUS", /* 0x35 */
+ "CURLY", /* 0x36 */
+ "CURLYN", /* 0x37 */
+ "CURLYM", /* 0x38 */
+ "CURLYX", /* 0x39 */
+ "WHILEM", /* 0x3a */
+ "OPEN", /* 0x3b */
+ "CLOSE", /* 0x3c */
+ "SROPEN", /* 0x3d */
+ "SRCLOSE", /* 0x3e */
+ "REF", /* 0x3f */
+ "REFF", /* 0x40 */
+ "REFFL", /* 0x41 */
+ "REFFU", /* 0x42 */
+ "REFFA", /* 0x43 */
+ "NREF", /* 0x44 */
+ "NREFF", /* 0x45 */
+ "NREFFL", /* 0x46 */
+ "NREFFU", /* 0x47 */
+ "NREFFA", /* 0x48 */
+ "LONGJMP", /* 0x49 */
+ "BRANCHJ", /* 0x4a */
+ "IFMATCH", /* 0x4b */
+ "UNLESSM", /* 0x4c */
+ "SUSPEND", /* 0x4d */
+ "IFTHEN", /* 0x4e */
+ "GROUPP", /* 0x4f */
+ "EVAL", /* 0x50 */
+ "MINMOD", /* 0x51 */
+ "LOGICAL", /* 0x52 */
+ "RENUM", /* 0x53 */
+ "TRIE", /* 0x54 */
+ "TRIEC", /* 0x55 */
+ "AHOCORASICK", /* 0x56 */
+ "AHOCORASICKC", /* 0x57 */
+ "GOSUB", /* 0x58 */
+ "NGROUPP", /* 0x59 */
+ "INSUBP", /* 0x5a */
+ "DEFINEP", /* 0x5b */
+ "ENDLIKE", /* 0x5c */
+ "OPFAIL", /* 0x5d */
+ "ACCEPT", /* 0x5e */
+ "VERB", /* 0x5f */
+ "PRUNE", /* 0x60 */
+ "MARKPOINT", /* 0x61 */
+ "SKIP", /* 0x62 */
+ "COMMIT", /* 0x63 */
+ "CUTGROUP", /* 0x64 */
+ "KEEPS", /* 0x65 */
+ "LNBREAK", /* 0x66 */
+ "OPTIMIZED", /* 0x67 */
+ "PSEUDO", /* 0x68 */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -768,7 +783,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
EXTCONST U8 PL_varies_bitmask[];
#else
EXTCONST U8 PL_varies_bitmask[] = {
- 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xFE, 0xF0, 0xBF, 0x0C, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xF0, 0x87, 0xFF, 0x65, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
@@ -791,7 +806,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
EXTCONST U8 PL_simple_bitmask[];
#else
EXTCONST U8 PL_simple_bitmask[] = {
- 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 671f6c72be..ee15556d66 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -355,7 +355,7 @@ pod/perl.pod Verbatim line length including indents exceeds 79 by 8
pod/perlandroid.pod Verbatim line length including indents exceeds 79 by 3
pod/perlbook.pod Verbatim line length including indents exceeds 79 by 1
pod/perlce.pod Verbatim line length including indents exceeds 79 by 3
-pod/perldebguts.pod Verbatim line length including indents exceeds 79 by 27
+pod/perldebguts.pod Verbatim line length including indents exceeds 79 by 28
pod/perldebtut.pod Verbatim line length including indents exceeds 79 by 3
pod/perldtrace.pod Verbatim line length including indents exceeds 79 by 7
pod/perlgit.pod ? Should you be using F<...> or maybe L<...> instead of 1