diff options
author | Ben Gamari <ben@smart-cactus.org> | 2019-05-15 16:49:40 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2019-10-22 12:18:39 -0400 |
commit | e6f6823f1eb5ae43a7cd782a649f55c40a5d53fd (patch) | |
tree | fafe22ef012aa1a691f7011a77cefb00c990c978 /rts | |
parent | e893877e3964cd8c8b147c14b3a2b38410e7427b (diff) | |
download | haskell-e6f6823f1eb5ae43a7cd782a649f55c40a5d53fd.tar.gz |
NonMoving: Pre-fetch during mark
This improved overall runtime on nofib's constraints test by nearly 10%.
Diffstat (limited to 'rts')
-rw-r--r-- | rts/sm/NonMovingMark.c | 46 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.h | 10 |
2 files changed, 55 insertions, 1 deletions
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index 275cfcdd51..ddc1a1234a 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -706,7 +706,7 @@ void markQueuePushArray (MarkQueue *q, *********************************************************/ // Returns invalid MarkQueueEnt if queue is empty. -static MarkQueueEnt markQueuePop (MarkQueue *q) +static MarkQueueEnt markQueuePop_ (MarkQueue *q) { MarkQueueBlock *top; @@ -737,6 +737,46 @@ again: return ent; } +static MarkQueueEnt markQueuePop (MarkQueue *q) +{ +#if MARK_PREFETCH_QUEUE_DEPTH == 0 + return markQueuePop_(q); +#else + unsigned int i = q->prefetch_head; + while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) { + MarkQueueEnt new = markQueuePop_(q); + if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) { + // Mark queue is empty; look for any valid entries in the prefetch + // queue + for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH; + j != i; + j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH) + { + if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) { + i = j; + goto done; + } + } + return new; + } + + // The entry may not be a MARK_CLOSURE but it doesn't matter, our + // MarkQueueEnt encoding always places the pointer to the object to be + // marked first. + prefetchForRead(&new.mark_closure.p->header.info); + q->prefetch_queue[i] = new; + i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH; + } + + done: + ; + MarkQueueEnt ret = q->prefetch_queue[i]; + q->prefetch_queue[i].null_entry.p = NULL; + q->prefetch_head = i; + return ret; +#endif +} + /********************************************************* * Creating and destroying MarkQueues and UpdRemSets *********************************************************/ @@ -748,6 +788,10 @@ static void init_mark_queue_ (MarkQueue *queue) queue->blocks = bd; queue->top = (MarkQueueBlock *) bd->start; queue->top->head = 0; +#if MARK_PREFETCH_QUEUE_DEPTH > 0 + memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue)); + queue->prefetch_head = 0; +#endif } /* Must hold sm_mutex. */ diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h index bff89d9e92..806776cdc5 100644 --- a/rts/sm/NonMovingMark.h +++ b/rts/sm/NonMovingMark.h @@ -84,6 +84,9 @@ typedef struct { MarkQueueEnt entries[]; } MarkQueueBlock; +// How far ahead in mark queue to prefetch? +#define MARK_PREFETCH_QUEUE_DEPTH 5 + /* The mark queue is not capable of concurrent read or write. * * invariants: @@ -101,6 +104,13 @@ typedef struct MarkQueue_ { // Is this a mark queue or a capability-local update remembered set? bool is_upd_rem_set; + +#if MARK_PREFETCH_QUEUE_DEPTH > 0 + // A ring-buffer of entries which we will mark next + MarkQueueEnt prefetch_queue[MARK_PREFETCH_QUEUE_DEPTH]; + // The first free slot in prefetch_queue. + uint8_t prefetch_head; +#endif } MarkQueue; /* While it shares its representation with MarkQueue, UpdRemSet differs in |