summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-11-17 19:01:08 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-11-26 16:00:32 -0500
commita84e53f978341135355c5c82cd7af2ae2efa5e72 (patch)
tree931c79cf29f6ce8fbb38b19a5c1250a0855d5ca5
parentbe5d74caab64abf9d986fc7290f62731db7e73e7 (diff)
downloadhaskell-a84e53f978341135355c5c82cd7af2ae2efa5e72.tar.gz
RTS: Fix failed inlining of copy_tag.
On windows using gcc-10 gcc failed to inline copy_tag into evacuate. To fix this we now set the always_inline attribute for the various copy* functions in Evac.c. The main motivation here is not the overhead of the function call, but rather that this allows the code to "specialize" for the size of the closure we copy which is often known at compile time. An earlier commit also tried to avoid evacuate_large inlining. But didn't quite succeed. So I also marked evacuate_large as noinline. Fixes #12416
-rw-r--r--includes/Rts.h7
-rw-r--r--rts/sm/Evac.c19
2 files changed, 18 insertions, 8 deletions
diff --git a/includes/Rts.h b/includes/Rts.h
index 1e5a60262b..027d5173a1 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -37,12 +37,17 @@ extern "C" {
#include "HsFFI.h"
#include "RtsAPI.h"
-// Turn off inlining when debugging - it obfuscates things
+// Disencourage gcc from inlining when debugging - it obfuscates things
#if defined(DEBUG)
# undef STATIC_INLINE
# define STATIC_INLINE static
#endif
+// Fine grained inlining control helpers.
+#define ATTR_ALWAYS_INLINE __attribute__((always_inline))
+#define ATTR_NOINLINE __attribute__((noinline))
+
+
#include "rts/Types.h"
#include "rts/Time.h"
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index e660fad1d8..8595a80c38 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -58,7 +58,7 @@
#define MAX_THUNK_SELECTOR_DEPTH 16
static void eval_thunk_selector (StgClosure **q, StgSelector *p, bool);
-STATIC_INLINE void evacuate_large(StgPtr p);
+ATTR_NOINLINE static void evacuate_large(StgPtr p);
/* -----------------------------------------------------------------------------
Allocate some space in which to copy an object.
@@ -134,8 +134,13 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
The evacuate() code
-------------------------------------------------------------------------- */
-/* size is in words */
-STATIC_INLINE GNUC_ATTR_HOT void
+/* size is in words
+
+ We want to *always* inline this as often the size of the closure is static,
+ which allows unrolling of the copy loop.
+
+ */
+ATTR_ALWAYS_INLINE GNUC_ATTR_HOT static inline void
copy_tag(StgClosure **p, const StgInfoTable *info,
StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag)
{
@@ -194,7 +199,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
}
#if defined(PARALLEL_GC) && !defined(PROFILING)
-STATIC_INLINE void
+ATTR_ALWAYS_INLINE static inline void
copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag)
{
@@ -231,7 +236,7 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
* pointer of an object, but reserve some padding after it. This is
* used to optimise evacuation of TSOs.
*/
-static bool
+ATTR_ALWAYS_INLINE static inline bool
copyPart(StgClosure **p, StgClosure *src, uint32_t size_to_reserve,
uint32_t size_to_copy, uint32_t gen_no)
{
@@ -283,7 +288,7 @@ spin:
/* Copy wrappers that don't tag the closure after copying */
-STATIC_INLINE GNUC_ATTR_HOT void
+ATTR_ALWAYS_INLINE GNUC_ATTR_HOT static inline void
copy(StgClosure **p, const StgInfoTable *info,
StgClosure *src, uint32_t size, uint32_t gen_no)
{
@@ -301,7 +306,7 @@ copy(StgClosure **p, const StgInfoTable *info,
that has been evacuated, or unset otherwise.
-------------------------------------------------------------------------- */
-static void
+ATTR_NOINLINE static void
evacuate_large(StgPtr p)
{
bdescr *bd;