summaryrefslogtreecommitdiff
path: root/regcomp.sym
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-06-03 16:05:23 -0600
committerKarl Williamson <khw@cpan.org>2020-10-02 09:37:42 -0600
commit7af5518645a0304883fa2e9bfb22d9fd00b3847b (patch)
tree34b90211457157349a732b4f9d9f704926379875 /regcomp.sym
parent1a651719bf0d87ba92ed2380a88eb658daa69fd6 (diff)
downloadperl-7af5518645a0304883fa2e9bfb22d9fd00b3847b.tar.gz
regcomp.sym: Reorder some entries
These are mostly used in regexec.c in three functions. Two of the functions use less than half the available ones, as case labels in a switch() statement. By moving all the ones used by those functions to be nearly contiguous at the beginning, compilers can generate smaller jump tables for the switch().
Diffstat (limited to 'regcomp.sym')
-rw-r--r--regcomp.sym38
1 files changed, 20 insertions, 18 deletions
diff --git a/regcomp.sym b/regcomp.sym
index 7c0bf7a484..1554ec90ab 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -167,6 +167,23 @@ EXACTFU_REQ8 EXACT, str ; Like EXACTFU, but only UTF-8 encoded targets
EXACTFU_S_EDGE EXACT, str ; /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only)
+#*New charclass like patterns
+LNBREAK LNBREAK, none ; generic newline pattern
+
+#*Trie Related
+
+#* Behave the same as A|LIST|OF|WORDS would. The '..C' variants
+#* have inline charclass data (ascii only), the 'C' store it in the
+#* structure.
+# NOTE: the relative order of the TRIE-like regops is significant
+
+TRIE TRIE, trie 1 ; Match many EXACT(F[ALU]?)? at once. flags==type
+TRIEC TRIE,trie charclass ; Same as TRIE, but with embedded charclass data
+
+# For start classes, contains an added fail table.
+AHOCORASICK TRIE, trie 1 ; Aho Corasick stclass. flags==type
+AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded charclass data
+
#*Do nothing types
NOTHING NOTHING, no ; Match empty string.
@@ -239,20 +256,6 @@ LOGICAL LOGICAL, no ; Next opcode should set the flag only.
#*This is not used yet
RENUM BRANCHJ, off 1 . 1 ; Group with independently numbered parens.
-#*Trie Related
-
-#* Behave the same as A|LIST|OF|WORDS would. The '..C' variants
-#* have inline charclass data (ascii only), the 'C' store it in the
-#* structure.
-# NOTE: the relative order of the TRIE-like regops is significant
-
-TRIE TRIE, trie 1 ; Match many EXACT(F[ALU]?)? at once. flags==type
-TRIEC TRIE,trie charclass ; Same as TRIE, but with embedded charclass data
-
-# For start classes, contains an added fail table.
-AHOCORASICK TRIE, trie 1 ; Aho Corasick stclass. flags==type
-AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded charclass data
-
#*Regex Subroutines
GOSUB GOSUB, num/ofs 2L ; recurse to paren arg1 at (signed) ofs arg2
@@ -277,10 +280,9 @@ CUTGROUP VERB, no-sv 1 ; On failure go to the next alternation in the
#*Control what to keep in $&.
KEEPS KEEPS, no ; $& begins here.
-#*New charclass like patterns
-LNBREAK LNBREAK, none ; generic newline pattern
-
-# NEW STUFF SOMEWHERE ABOVE THIS LINE
+# NEW STUFF SOMEWHERE ABOVE THIS LINE. Stuff that regexec.c: find_byclass()
+# and regrepeat() use should go way above, near LNBREAK to allow a more compact
+# jump table to be generated for their switch() statements
################################################################################