diff options
author | Karl Williamson <khw@cpan.org> | 2021-07-24 19:24:54 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-07 09:17:37 -0600 |
commit | 03abef4e914341bd78e7f70808ad55bd1da905cf (patch) | |
tree | 3ab48c260b3bd01cee4bed96c67dbf595c9d3de3 /regexec.c | |
parent | 7d39b50bff3325167beaea1283e83431b5be4c55 (diff) | |
download | perl-03abef4e914341bd78e7f70808ad55bd1da905cf.tar.gz |
regexec.c: Refactor macro to generalize it
This is in preparation for a somewhat different use to be added.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 38 |
1 files changed, 27 insertions, 11 deletions
@@ -1859,7 +1859,8 @@ STMT_START { /* These differ from the above macros in that they call a function which * returns the next occurrence of the thing being looked for in 's'; and - * 'strend' if there is no such occurrence. */ + * 'strend' if there is no such occurrence. 'f' is something like fcn(a,b,c) + * */ #define REXEC_FBC_UTF8_FIND_NEXT_SCAN(f) \ while (s < strend) { \ s = (char *) (f); \ @@ -1884,13 +1885,12 @@ STMT_START { previous_occurrence_end = s; \ } -/* This differs from the above macros in that it is passed a single byte that - * is known to begin the next occurrence of the thing being looked for in 's'. - * It does a memchr to find the next occurrence of 'byte', before trying 'COND' - * at that position. */ -#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND) \ +/* This is like the above macro except the function returns NULL if there is no + * occurrence, and there is a further condition that must be matched besides + * the function */ +#define REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND(f, COND) \ while (s < strend) { \ - s = (char *) memchr(s, byte, strend -s); \ + s = (char *) (f); \ if (s == NULL) { \ s = (char *) strend; \ break; \ @@ -1906,6 +1906,21 @@ STMT_START { } \ } +/* This differs from the above macros in that it is passed a single byte that + * is known to begin the next occurrence of the thing being looked for in 's'. + * It does a memchr to find the next occurrence of 'byte', before trying 'COND' + * at that position. */ +#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND) \ + REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND(memchr(s, byte, strend - s), \ + COND) + +/* This is like the function above, but takes an entire string to look for + * instead of a single byte */ +#define REXEC_FBC_FIND_NEXT_UTF8_STRING_SCAN(substr, substr_end, COND) \ + REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND( \ + ninstr(s, strend, substr, substr_end), \ + COND) + /* The four macros below are slightly different versions of the same logic. * * The first is for /a and /aa when the target string is UTF-8. This can only @@ -2311,10 +2326,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, case ANYOFHs_t8_pb: case ANYOFHs_t8_p8: - REXEC_FBC_UTF8_CLASS_SCAN( - ( strend -s >= FLAGS(c) - && memEQ(s, ((struct regnode_anyofhs *) c)->string, FLAGS(c)) - && reginclass(prog, c, (U8*)s, (U8*) strend, 1 /* is utf8 */))); + REXEC_FBC_FIND_NEXT_UTF8_STRING_SCAN( + ((struct regnode_anyofhs *) c)->string, + ((struct regnode_anyofhs *) c)->string + FLAGS(c), + reginclass(prog, c, (U8*)s, (U8*) strend, + 1 /* is utf8 */)); break; case ANYOFR_tb_pb: |