summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2019-05-15 16:49:40 -0400
committerBen Gamari <ben@smart-cactus.org>2019-10-22 12:18:39 -0400
commite6f6823f1eb5ae43a7cd782a649f55c40a5d53fd (patch)
treefafe22ef012aa1a691f7011a77cefb00c990c978
parente893877e3964cd8c8b147c14b3a2b38410e7427b (diff)
downloadhaskell-e6f6823f1eb5ae43a7cd782a649f55c40a5d53fd.tar.gz
NonMoving: Pre-fetch during mark
This improved overall runtime on nofib's constraints test by nearly 10%.
-rw-r--r--rts/sm/NonMovingMark.c46
-rw-r--r--rts/sm/NonMovingMark.h10
2 files changed, 55 insertions, 1 deletions
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 275cfcdd51..ddc1a1234a 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -706,7 +706,7 @@ void markQueuePushArray (MarkQueue *q,
*********************************************************/
// Returns invalid MarkQueueEnt if queue is empty.
-static MarkQueueEnt markQueuePop (MarkQueue *q)
+static MarkQueueEnt markQueuePop_ (MarkQueue *q)
{
MarkQueueBlock *top;
@@ -737,6 +737,46 @@ again:
return ent;
}
+static MarkQueueEnt markQueuePop (MarkQueue *q)
+{
+#if MARK_PREFETCH_QUEUE_DEPTH == 0
+ return markQueuePop_(q);
+#else
+ unsigned int i = q->prefetch_head;
+ while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) {
+ MarkQueueEnt new = markQueuePop_(q);
+ if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) {
+ // Mark queue is empty; look for any valid entries in the prefetch
+ // queue
+ for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH;
+ j != i;
+ j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH)
+ {
+ if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) {
+ i = j;
+ goto done;
+ }
+ }
+ return new;
+ }
+
+ // The entry may not be a MARK_CLOSURE but it doesn't matter, our
+ // MarkQueueEnt encoding always places the pointer to the object to be
+ // marked first.
+ prefetchForRead(&new.mark_closure.p->header.info);
+ q->prefetch_queue[i] = new;
+ i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
+ }
+
+ done:
+ ;
+ MarkQueueEnt ret = q->prefetch_queue[i];
+ q->prefetch_queue[i].null_entry.p = NULL;
+ q->prefetch_head = i;
+ return ret;
+#endif
+}
+
/*********************************************************
* Creating and destroying MarkQueues and UpdRemSets
*********************************************************/
@@ -748,6 +788,10 @@ static void init_mark_queue_ (MarkQueue *queue)
queue->blocks = bd;
queue->top = (MarkQueueBlock *) bd->start;
queue->top->head = 0;
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue));
+ queue->prefetch_head = 0;
+#endif
}
/* Must hold sm_mutex. */
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index bff89d9e92..806776cdc5 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -84,6 +84,9 @@ typedef struct {
MarkQueueEnt entries[];
} MarkQueueBlock;
+// How far ahead in mark queue to prefetch?
+#define MARK_PREFETCH_QUEUE_DEPTH 5
+
/* The mark queue is not capable of concurrent read or write.
*
* invariants:
@@ -101,6 +104,13 @@ typedef struct MarkQueue_ {
// Is this a mark queue or a capability-local update remembered set?
bool is_upd_rem_set;
+
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ // A ring-buffer of entries which we will mark next
+ MarkQueueEnt prefetch_queue[MARK_PREFETCH_QUEUE_DEPTH];
+ // The first free slot in prefetch_queue.
+ uint8_t prefetch_head;
+#endif
} MarkQueue;
/* While it shares its representation with MarkQueue, UpdRemSet differs in