diff options
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 96 |
1 files changed, 83 insertions, 13 deletions
@@ -87,7 +87,6 @@ EXTERN_C const struct regexp_engine my_reg_engine; #endif #include "dquote_static.c" -#include "charclass_invlists.h" #include "inline_invlist.c" #include "unicode_constants.h" @@ -11772,27 +11771,90 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) invert = 1; /* FALLTHROUGH */ case 'b': + { + regex_charset charset = get_regex_charset(RExC_flags); + RExC_seen_zerolen++; RExC_seen |= REG_LOOKBEHIND_SEEN; - op = BOUND + get_regex_charset(RExC_flags); - if (op > BOUNDA) { /* /aa is same as /a */ - op = BOUNDA; - } - else if (op == BOUNDL) { - RExC_contains_locale = 1; - } + op = BOUND + charset; - if (invert) { - op += NBOUND - BOUND; + if (op == BOUNDL) { + RExC_contains_locale = 1; } ret = reg_node(pRExC_state, op); *flagp |= SIMPLE; - if ((U8) *(RExC_parse + 1) == '{') { - /* diag_listed_as: Use "%s" instead of "%s" */ - vFAIL3("Use \"\\%c\\{\" instead of \"\\%c{\"", *RExC_parse, *RExC_parse); + if (*(RExC_parse + 1) != '{') { + FLAGS(ret) = TRADITIONAL_BOUND; + if (PASS2 && op > BOUNDA) { /* /aa is same as /a */ + OP(ret) = BOUNDA; + } + } + else { + STRLEN length; + char name = *RExC_parse; + char * endbrace; + RExC_parse += 2; + endbrace = strchr(RExC_parse, '}'); + + if (! endbrace) { + vFAIL2("Missing right brace on \\%c{}", name); + } + /* XXX Need to decide whether to take spaces or not. Should be + * consistent with \p{}, but that currently is SPACE, which + * means vertical too, which seems wrong + * while (isBLANK(*RExC_parse)) { + RExC_parse++; + }*/ + if (endbrace == RExC_parse) { + RExC_parse++; /* After the '}' */ + vFAIL2("Empty \\%c{}", name); + } + length = endbrace - RExC_parse; + /*while (isBLANK(*(RExC_parse + length - 1))) { + length--; + }*/ + switch (*RExC_parse) { + case 'g': + if (length != 1 + && (length != 3 || strnNE(RExC_parse + 1, "cb", 2))) + { + goto bad_bound_type; + } + FLAGS(ret) = GCB_BOUND; + break; + default: + bad_bound_type: + RExC_parse = endbrace; + vFAIL2utf8f( + "'%"UTF8f"' is an unknown bound type", + UTF8fARG(UTF, length, endbrace - length)); + NOT_REACHED; /*NOTREACHED*/ + } + RExC_parse = endbrace; + RExC_uni_semantics = 1; + + if (PASS2 && op >= BOUNDA) { /* /aa is same as /a */ + OP(ret) = BOUNDU; + length += 4; + + /* Don't have to worry about UTF-8, in this message because + * to get here the contents of the \b must be ASCII */ + ckWARN4reg(RExC_parse + 1, /* Include the '}' in msg */ + "Using /u for '%.*s' instead of /%s", + (unsigned) length, + endbrace - length + 1, + (charset == REGEX_ASCII_RESTRICTED_CHARSET) + ? ASCII_RESTRICT_PAT_MODS + : ASCII_MORE_RESTRICT_PAT_MODS); + } } + + if (PASS2 && invert) { + OP(ret) += NBOUND - BOUND; + } goto finish_meta_pat; + } case 'D': invert = 1; @@ -16735,6 +16797,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index); } } + else if (k == BOUND || k == NBOUND) { + /* Must be synced with order of 'bound_type' in regcomp.h */ + const char * const bounds[] = { + "", /* Traditional */ + "{gcb}" + }; + sv_catpv(sv, bounds[FLAGS(o)]); + } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags)); else if (OP(o) == SBOL) |