summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-11-14 09:28:14 -0700
committerKarl Williamson <khw@cpan.org>2019-11-14 18:22:50 -0700
commit58ea1df2beb2aba315d3fb4dc8946f09336ef244 (patch)
treeebb9ea461a2ae005231e1f1abeba15a78e7eba0d
parentae9a9edb65d4dd18b6ba99b2639e1906b9f81514 (diff)
downloadperl-58ea1df2beb2aba315d3fb4dc8946f09336ef244.tar.gz
regcomp.sym: Add detail to some node descriptions
Having this enabled me to more quickly understand what's going on. A trailing period is removed from some long descriptions to make them slightly shorter.
-rw-r--r--pod/perldebguts.pod26
-rw-r--r--regcomp.sym24
-rw-r--r--regnodes.h18
3 files changed, 36 insertions, 32 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 6f47573c0f..ebaee86f13 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -666,29 +666,29 @@ will be lost.
LEXACT len:str 1 Match this long string (preceded by length;
flags unused).
EXACTL str Like EXACT, but /l is in effect (used so
- locale-related warnings can be checked
- for).
+ locale-related warnings can be checked for)
EXACTF str Like EXACT, but match using /id rules;
- (string not UTF-8, not guaranteed to be
- folded).
+ (string not UTF-8, ASCII folded; non-ASCII
+ not)
EXACTFL str Like EXACT, but match using /il rules;
- (string not likely to be folded).
+ (string not likely to be folded)
EXACTFU str Like EXACT, but match using /iu rules;
- (string folded).
+ (string folded)
+
EXACTFAA str Like EXACT, but match using /iaa rules;
- (string folded iff pattern is UTF8; folded
- length <= unfolded).
+ (string folded except in non-UTF8 patterns:
+ MICRO, SHARP S; folded length <= unfolded)
EXACTFUP str Like EXACT, but match using /iu rules;
- (string not UTF-8, not guaranteed to be
- folded; and it is Problematic).
+ (string not UTF-8, folded except MICRO,
+ SHARP S: hence Problematic)
EXACTFLU8 str Like EXACTFU, but use /il, UTF-8, (string
is folded, and everything in it is above
- 255.
+ 255
EXACTFAA_NO_TRIE str Like EXACT, but match using /iaa rules
(string not UTF-8, not guaranteed to be
- folded, not currently trie-able).
+ folded, not currently trie-able)
EXACT_REQ8 str Like EXACT, but only UTF-8 encoded targets
can match
@@ -699,7 +699,7 @@ will be lost.
EXACTFU_S_EDGE str /di rules, but nothing in it precludes /ui,
except begins and/or ends with [Ss];
- (string not UTF-8; compile-time only).
+ (string not UTF-8; compile-time only)
# Do nothing types
diff --git a/regcomp.sym b/regcomp.sym
index 306cbf0e58..4ea160e6db 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -121,23 +121,28 @@ EXACT EXACT, str ; Match this string (flags field is the length
#* In a long string node, the U32 argument is the length, and is
#* immediately followed by the string.
LEXACT EXACT, len:str 1; Match this long string (preceded by length; flags unused).
-EXACTL EXACT, str ; Like EXACT, but /l is in effect (used so locale-related warnings can be checked for).
-EXACTF EXACT, str ; Like EXACT, but match using /id rules; (string not UTF-8, not guaranteed to be folded).
-EXACTFL EXACT, str ; Like EXACT, but match using /il rules; (string not likely to be folded).
-EXACTFU EXACT, str ; Like EXACT, but match using /iu rules; (string folded).
-EXACTFAA EXACT, str ; Like EXACT, but match using /iaa rules; (string folded iff pattern is UTF8; folded length <= unfolded).
+EXACTL EXACT, str ; Like EXACT, but /l is in effect (used so locale-related warnings can be checked for)
+EXACTF EXACT, str ; Like EXACT, but match using /id rules; (string not UTF-8, ASCII folded; non-ASCII not)
+EXACTFL EXACT, str ; Like EXACT, but match using /il rules; (string not likely to be folded)
+EXACTFU EXACT, str ; Like EXACT, but match using /iu rules; (string folded)
+
+# The reason MICRO and SHARP S aren't folded in non-UTF8 patterns is because
+# they would fold to something that requires UTF-8. SHARP S would normally
+# fold to 'ss', but because of /aa, it instead folds to a pair of LATIN SMALL
+# LETTER LONG S characters (U+017F)
+EXACTFAA EXACT, str ; Like EXACT, but match using /iaa rules; (string folded except in non-UTF8 patterns: MICRO, SHARP S; folded length <= unfolded)
# End of important relative ordering.
-EXACTFUP EXACT, str ; Like EXACT, but match using /iu rules; (string not UTF-8, not guaranteed to be folded; and it is Problematic).
+EXACTFUP EXACT, str ; Like EXACT, but match using /iu rules; (string not UTF-8, folded except MICRO, SHARP S: hence Problematic)
# In order for a non-UTF-8 EXACTFAA to think the pattern is pre-folded when
# matching a UTF-8 target string, there would have to be something like an
# EXACTFAA_MICRO which would not be considered pre-folded for UTF-8 targets,
# since the fold of the MICRO SIGN would not be done, and would be
# representable in the UTF-8 target string.
-EXACTFLU8 EXACT, str ; Like EXACTFU, but use /il, UTF-8, (string is folded, and everything in it is above 255.
-EXACTFAA_NO_TRIE EXACT, str ; Like EXACT, but match using /iaa rules (string not UTF-8, not guaranteed to be folded, not currently trie-able).
+EXACTFLU8 EXACT, str ; Like EXACTFU, but use /il, UTF-8, (string is folded, and everything in it is above 255
+EXACTFAA_NO_TRIE EXACT, str ; Like EXACT, but match using /iaa rules (string not UTF-8, not guaranteed to be folded, not currently trie-able)
EXACT_REQ8 EXACT, str ; Like EXACT, but only UTF-8 encoded targets can match
@@ -146,7 +151,7 @@ EXACTFU_REQ8 EXACT, str ; Like EXACTFU, but only UTF-8 encoded targets
# One could add EXACTFAA8 and something that has the same effect for /l,
# but these would be extremely uncommon
-EXACTFU_S_EDGE EXACT, str ; /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only).
+EXACTFU_S_EDGE EXACT, str ; /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only)
#*Do nothing types
@@ -208,7 +213,6 @@ SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE.
IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher.
GROUPP GROUPP, num 1 ; Whether the group matched.
-
#*The heavy worker
EVAL EVAL, evl/flags 2L ; Execute some Perl code.
diff --git a/regnodes.h b/regnodes.h
index cab219b5bd..cee7b75f7f 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -50,18 +50,18 @@
#define BRANCH 36 /* 0x24 Match this alternative, or the next... */
#define EXACT 37 /* 0x25 Match this string (flags field is the length). */
#define LEXACT 38 /* 0x26 Match this long string (preceded by length; flags unused). */
-#define EXACTL 39 /* 0x27 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
-#define EXACTF 40 /* 0x28 Like EXACT, but match using /id rules; (string not UTF-8, not guaranteed to be folded). */
-#define EXACTFL 41 /* 0x29 Like EXACT, but match using /il rules; (string not likely to be folded). */
-#define EXACTFU 42 /* 0x2a Like EXACT, but match using /iu rules; (string folded). */
-#define EXACTFAA 43 /* 0x2b Like EXACT, but match using /iaa rules; (string folded iff pattern is UTF8; folded length <= unfolded). */
-#define EXACTFUP 44 /* 0x2c Like EXACT, but match using /iu rules; (string not UTF-8, not guaranteed to be folded; and it is Problematic). */
-#define EXACTFLU8 45 /* 0x2d Like EXACTFU, but use /il, UTF-8, (string is folded, and everything in it is above 255. */
-#define EXACTFAA_NO_TRIE 46 /* 0x2e Like EXACT, but match using /iaa rules (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
+#define EXACTL 39 /* 0x27 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for) */
+#define EXACTF 40 /* 0x28 Like EXACT, but match using /id rules; (string not UTF-8, ASCII folded; non-ASCII not) */
+#define EXACTFL 41 /* 0x29 Like EXACT, but match using /il rules; (string not likely to be folded) */
+#define EXACTFU 42 /* 0x2a Like EXACT, but match using /iu rules; (string folded) */
+#define EXACTFAA 43 /* 0x2b Like EXACT, but match using /iaa rules; (string folded except in non-UTF8 patterns: MICRO, SHARP S; folded length <= unfolded) */
+#define EXACTFUP 44 /* 0x2c Like EXACT, but match using /iu rules; (string not UTF-8, folded except MICRO, SHARP S: hence Problematic) */
+#define EXACTFLU8 45 /* 0x2d Like EXACTFU, but use /il, UTF-8, (string is folded, and everything in it is above 255 */
+#define EXACTFAA_NO_TRIE 46 /* 0x2e Like EXACT, but match using /iaa rules (string not UTF-8, not guaranteed to be folded, not currently trie-able) */
#define EXACT_REQ8 47 /* 0x2f Like EXACT, but only UTF-8 encoded targets can match */
#define LEXACT_REQ8 48 /* 0x30 Like LEXACT, but only UTF-8 encoded targets can match */
#define EXACTFU_REQ8 49 /* 0x31 Like EXACTFU, but only UTF-8 encoded targets can match */
-#define EXACTFU_S_EDGE 50 /* 0x32 /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only). */
+#define EXACTFU_S_EDGE 50 /* 0x32 /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only) */
#define NOTHING 51 /* 0x33 Match empty string. */
#define TAIL 52 /* 0x34 Match empty string. Can jump here from outside. */
#define STAR 53 /* 0x35 Match this (simple) thing 0 or more times. */