summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-18 10:24:31 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-19 08:32:59 -0700
commit1ee208c4824a2a3a18e979873141161c149a57a3 (patch)
tree8a1fc013840b03cc2a807679cfb3d4a773d036b4 /regexec.c
parent77d654fbc7477bd2bbb8741fcbb08e9b541f53e3 (diff)
downloadperl-1ee208c4824a2a3a18e979873141161c149a57a3.tar.gz
regcomp.c: Fix more alignment problems
I believe this will fix the remaining alignment problems recently being shown on gcc on HP-UX, It works on the procura machine. regnodes should not have stricter alignment than required by U32, for reasons given in the comments this commit adds to the beginning of regcomp.h. Commit 31f05a37 added a new ANYOF regnode struct with a pointer field. This requires stricter alignment on some 64-bit platforms, and hence doesn't work on those platforms. This commit removes that regnode struct type, and instead stores the pointer it used via a more indirect, but already existing mechanism that stores other data.. The function that returns that other data is enlarged to return this new field as well. It now needs to be called from regcomp.c, so the previous commit had renamed and made it accessible from there. The "public" function that wraps this one is unchanged. (I put "public" in quotes here, because I don't think anyone outside core is or should be using it, but since it has been publicly available for a long time, I'm treating the API as unchangeable. regcomp.c called this public function before this commit, but needs the additional data returned by the inner one).
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c46
1 files changed, 29 insertions, 17 deletions
diff --git a/regexec.c b/regexec.c
index d1a6dcc16a..e1e840dc54 100644
--- a/regexec.c
+++ b/regexec.c
@@ -7537,7 +7537,8 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
- assert(ANYOF_FLAGS(node) & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8));
+ assert(ANYOF_FLAGS(node)
+ & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8|ANYOF_LOC_FOLD));
if (data && data->count) {
const U32 n = ARG(node);
@@ -7550,18 +7551,30 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
si = *ary; /* ary[0] = the string to initialize the swash with */
- /* Elements 2 and 3 are either both present or both absent. [2] is
- * any inversion list generated at compile time; [3] indicates if
+ /* Elements 3 and 4 are either both present or both absent. [3] is
+ * any inversion list generated at compile time; [4] indicates if
* that inversion list has any user-defined properties in it. */
- if (av_len(av) >= 2) {
- invlist = ary[2];
- if (SvUV(ary[3])) {
+ if (av_tindex(av) >= 2) {
+ if (only_utf8_locale_ptr
+ && ary[2]
+ && ary[2] != &PL_sv_undef)
+ {
+ *only_utf8_locale_ptr = ary[2];
+ }
+ else {
+ *only_utf8_locale_ptr = NULL;
+ }
+
+ if (av_len(av) >= 3) {
+ invlist = ary[3];
+ if (SvUV(ary[4])) {
swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
}
}
else {
invlist = NULL;
}
+ }
/* Element [1] is reserved for the set-up swash. If already there,
* return it; if not, create it and store it there */
@@ -7715,15 +7728,6 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
}
}
- /* For /li matching and the current locale is a UTF-8 one, look at the
- * special list, valid for just these circumstances. */
- if (! match
- && (flags & ANYOF_LOC_FOLD)
- && IN_UTF8_CTYPE_LOCALE
- && ANYOF_UTF8_LOCALE_INVLIST(n))
- {
- match = _invlist_contains_cp(ANYOF_UTF8_LOCALE_INVLIST(n), c);
- }
/* If the bitmap didn't (or couldn't) match, and something outside the
* bitmap could match, try that. */
@@ -7732,9 +7736,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
match = TRUE; /* Everything above 255 matches */
}
else if ((flags & ANYOF_NONBITMAP_NON_UTF8)
- || (utf8_target && (flags & ANYOF_UTF8)))
+ || (utf8_target && (flags & ANYOF_UTF8))
+ || ((flags & ANYOF_LOC_FOLD)
+ && IN_UTF8_CTYPE_LOCALE
+ && ARG(n) != ANYOF_NONBITMAP_EMPTY))
{
- SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0, NULL);
+ SV* only_utf8_locale = NULL;
+ SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,
+ &only_utf8_locale);
if (sw) {
U8 * utf8_p;
if (utf8_target) {
@@ -7751,6 +7760,9 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
/* If we allocated a string above, free it */
if (! utf8_target) Safefree(utf8_p);
}
+ if (! match && only_utf8_locale && IN_UTF8_CTYPE_LOCALE) {
+ match = _invlist_contains_cp(only_utf8_locale, c);
+ }
}
if (UNICODE_IS_SUPER(c)