diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2020-11-17 19:01:08 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-11-26 16:00:32 -0500 |
commit | a84e53f978341135355c5c82cd7af2ae2efa5e72 (patch) | |
tree | 931c79cf29f6ce8fbb38b19a5c1250a0855d5ca5 | |
parent | be5d74caab64abf9d986fc7290f62731db7e73e7 (diff) | |
download | haskell-a84e53f978341135355c5c82cd7af2ae2efa5e72.tar.gz |
RTS: Fix failed inlining of copy_tag.
On windows using gcc-10 gcc failed to inline copy_tag into evacuate.
To fix this we now set the always_inline attribute for the various
copy* functions in Evac.c. The main motivation here is not the
overhead of the function call, but rather that this allows the code
to "specialize" for the size of the closure we copy which is often
known at compile time.
An earlier commit also tried to avoid evacuate_large inlining. But
didn't quite succeed. So I also marked evacuate_large as noinline.
Fixes #12416
-rw-r--r-- | includes/Rts.h | 7 | ||||
-rw-r--r-- | rts/sm/Evac.c | 19 |
2 files changed, 18 insertions, 8 deletions
diff --git a/includes/Rts.h b/includes/Rts.h index 1e5a60262b..027d5173a1 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -37,12 +37,17 @@ extern "C" { #include "HsFFI.h" #include "RtsAPI.h" -// Turn off inlining when debugging - it obfuscates things +// Disencourage gcc from inlining when debugging - it obfuscates things #if defined(DEBUG) # undef STATIC_INLINE # define STATIC_INLINE static #endif +// Fine grained inlining control helpers. +#define ATTR_ALWAYS_INLINE __attribute__((always_inline)) +#define ATTR_NOINLINE __attribute__((noinline)) + + #include "rts/Types.h" #include "rts/Time.h" diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index e660fad1d8..8595a80c38 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -58,7 +58,7 @@ #define MAX_THUNK_SELECTOR_DEPTH 16 static void eval_thunk_selector (StgClosure **q, StgSelector *p, bool); -STATIC_INLINE void evacuate_large(StgPtr p); +ATTR_NOINLINE static void evacuate_large(StgPtr p); /* ----------------------------------------------------------------------------- Allocate some space in which to copy an object. @@ -134,8 +134,13 @@ alloc_for_copy (uint32_t size, uint32_t gen_no) The evacuate() code -------------------------------------------------------------------------- */ -/* size is in words */ -STATIC_INLINE GNUC_ATTR_HOT void +/* size is in words + + We want to *always* inline this as often the size of the closure is static, + which allows unrolling of the copy loop. + + */ +ATTR_ALWAYS_INLINE GNUC_ATTR_HOT static inline void copy_tag(StgClosure **p, const StgInfoTable *info, StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag) { @@ -194,7 +199,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info, } #if defined(PARALLEL_GC) && !defined(PROFILING) -STATIC_INLINE void +ATTR_ALWAYS_INLINE static inline void copy_tag_nolock(StgClosure **p, const StgInfoTable *info, StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag) { @@ -231,7 +236,7 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info, * pointer of an object, but reserve some padding after it. This is * used to optimise evacuation of TSOs. */ -static bool +ATTR_ALWAYS_INLINE static inline bool copyPart(StgClosure **p, StgClosure *src, uint32_t size_to_reserve, uint32_t size_to_copy, uint32_t gen_no) { @@ -283,7 +288,7 @@ spin: /* Copy wrappers that don't tag the closure after copying */ -STATIC_INLINE GNUC_ATTR_HOT void +ATTR_ALWAYS_INLINE GNUC_ATTR_HOT static inline void copy(StgClosure **p, const StgInfoTable *info, StgClosure *src, uint32_t size, uint32_t gen_no) { @@ -301,7 +306,7 @@ copy(StgClosure **p, const StgInfoTable *info, that has been evacuated, or unset otherwise. -------------------------------------------------------------------------- */ -static void +ATTR_NOINLINE static void evacuate_large(StgPtr p) { bdescr *bd; |