summaryrefslogtreecommitdiff
path: root/proto.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-01-10 17:06:04 -0700
committerKarl Williamson <public@khwilliamson.com>2013-01-11 11:50:38 -0700
commit9d1a5160ac870eccea399973eaa9f9e3020b0833 (patch)
tree9d08b87e7c229f41ee345da68b7de257a585a21b /proto.h
parentab6629666cee2471e467421195a7a99662521188 (diff)
downloadperl-9d1a5160ac870eccea399973eaa9f9e3020b0833.tar.gz
New regex experimental feature: (?[ ])
This is a fancier [bracketed] character class which allows set operations, such as intersection and subtraction. The entry in perlre for this commit details its operation. Besides extending regular expressions to handle this functionality, recommended by Unicode, the intent here is to do three things: 1) Intersection has been simulated by regexes using zero-width look-around assertions, which are non-obvious. This allows replacing those with a more powerful and clearer syntax; the compiled regexes are smaller and faster. Everything is known at compile time. 2) Set operations have also been simulated by using user-defined Unicode properties. These are globals, have security implications, restricted names, and d don't allow as complex expressions as this new feature. 3) I hope that this feature will come to be viewed as a "better" bracketed character class. I took advantage of the fact that there is no embedded base to have to be compatibile with to forbid certain iffy practices with the existing ones, while remaining mostly backwards compatible. The main difference is that /x is always enabled, so white space can be pretty much freely used with these, but to specify a match on white space, it must be escaped. Things that should have been illegal are, such as \x{}, and \x{abcdefghi}. Things that look like a posix specifier but don't quite meet the rules now give an error instead of silently compiling. e.g., [:digit] is an error instead of the union of the characters that compose it. I may have omitted things; perhaps it should be an error to have the same letter occur twice, adjacent. Since this is experimental, we can make such changes based on field feed back. The intent is to keep this feature, since it is strongly recommended by Unicode. The exact syntax is subject to change, so is experimental.
Diffstat (limited to 'proto.h')
-rw-r--r--proto.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/proto.h b/proto.h
index 0cab673c77..c9c667fb44 100644
--- a/proto.h
+++ b/proto.h
@@ -6456,6 +6456,11 @@ PERL_STATIC_INLINE U8 S_compute_EXACTish(pTHX_ struct RExC_state_t *pRExC_state)
#define PERL_ARGS_ASSERT_COMPUTE_EXACTISH \
assert(pRExC_state)
+STATIC bool S_could_it_be_POSIX(pTHX_ struct RExC_state_t *pRExC_state)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_COULD_IT_BE_POSIX \
+ assert(pRExC_state)
+
PERL_STATIC_INLINE UV* S_get_invlist_iter_addr(pTHX_ SV* invlist)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
@@ -6486,6 +6491,13 @@ STATIC bool S_grok_bslash_N(pTHX_ struct RExC_state_t *pRExC_state, regnode** no
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
assert(pRExC_state); assert(flagp)
+STATIC regnode* S_handle_sets(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, char * const oregcomp_parse)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_4);
+#define PERL_ARGS_ASSERT_HANDLE_SETS \
+ assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
+
PERL_STATIC_INLINE UV* S_invlist_array(pTHX_ SV* const invlist)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);