1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
|
#include "rts/PosixSource.h"
#include "Rts.h"
#include "sm/OSMem.h"
#include "RtsUtils.h"
#include "linker/MMap.h"
#include "AdjustorPool.h"
#include <string.h>
/*
* Note [Adjustor pools]
* ~~~~~~~~~~~~~~~~~~~~~
* Memory management for adjustors is somewhat complicated by the fact that
* modern operating systems highly discourage users from simultaneously
* mapping memory as both writable and executable. This means that when GHC
* creates an adjustor, it must immediately re-map it to be non-writable,
* thus precluding the possibility of adding further adjustors to the same
* page.
*
* For a long time we would simply allocate a fresh page for each adjustor.
* Given that the adjustor code is quite small (on the order of a dozen
* bytes), this incurred significant space overhead. This was particularly
* bad on Windows, where address space can only be allocated in units of 16
* pages (64 kBytes). Consequently, even allocating a couple of thousand
* adjustors can consume gigabytes of memory.
*
* AdjustorPool is a specialized allocator for managing adjustors.
* Specifically, it exploits the fact that an adjustor's code can be
* constructed even if we don't know the location of closure to which the
* adjustor will eventually refer. Consequently, we can allocate an entire
* page (a "chunk") of adjustors at a time, populate it with code, and remap
* it as non-writable/executable. Each adjustor's code is constructed to take
* its closure pointer from another region of memory that we leave as
* writable.
*
* To construct an AdjustorPool one uses newAdjustorPool and provide:
*
* - the size of the adjustor code, in bytes
* - a function to construct the adjustor code
*
* After a pool has been constructed, new adjustors can be allocated from it
* using alloc_adjustor and freed using free_adjustor. The pool maintains a
* free list and will reallocate into free adjustor slots when possible.
* We currently make no attempt at freeing AdjustorChunks which contain no
* live adjustors.
*
* The AdjustorPool module also exposes a high-level interface,
* new_adjustor_pool_from_template, capturing the common case where the
* adjustor code can be simply copied from a "template" and the adjustor fixed
* up.
*
* ┌───────────────────────────────────────┬─────────────────────────┐
* │ │ │
* ▼AdjustorPool AdjustorChunk │ AdjustorChunk │
* ┌────────────┐ ┌──►┌───────────────┐ │ ┌──►┌────────────────┐ │
* │code_size │ │ │owner ├──┘ │ │owner ├──┘
* │free_list ├───┤ ┌─┤exec_page │ │ │exec_page_next │
* └────────────┘ │ │ │free_list_next ├────┘ │free_list_next ├───► ...
* │ │ │first_free │ │first_free │
* │ │ ├───────────────┤ ├────────────────┤
* │ │ │slot_bitmap │ │slot_bitmap │
* │ │ │ │ │ │
* │ │ │ │ │ │
* │ │ ├───────────────┤ ├────────────────┤
* │ │ │context[0] │ │context[0] │
* │ │ │context[1] │ │context[1] │
* │ │ │... │ │... │
* │ │ └───────────────┘ └────────────────┘
* │ │
* │ │
* │ │
* │ └─┐
* │ ▼AdjustorExecPage
* │ ┌──────────────────┐
* └───┤owner │
* ├──────────────────┤
* │adjustor 0 code │ N.B. the code of each adjustor
* │ │ contains a reference back to
* ├──────────────────┤ its slot's chunk->context
* │adjustor 1 code │ entry.
* │ │
* ├──────────────────┤
* │adjustor 3 code │
* │ │
* ├──────────────────┤
* │... │
* │ │
* └──────────────────┘
*
*/
// Round up the N to the nearest multiple of s
#define ROUND_UP(n, s) ((((n) + (s) - 1) / (s)) * (s))
// Forward declarations
struct AdjustorExecPage;
struct AdjustorChunk;
struct AdjustorPool;
static struct AdjustorChunk *alloc_adjustor_chunk(struct AdjustorPool *owner);
#define ADJUSTOR_EXEC_PAGE_MAGIC 0xddeeffaabbcc0011ULL
struct AdjustorExecPage {
uint64_t magic;
/* since FFI is ripe for human error, we include a magic number at the
* beginning of the page to ensure that free_adjustor can catch when it
* is passed an invalid pointer.
*/
struct AdjustorChunk *owner;
uint8_t adjustor_code[];
};
struct AdjustorPool {
mk_adjustor_code_fn make_code;
void *user_data; /* user data to be passed to make_code */
size_t adjustor_code_size; /* how many bytes of code does each adjustor require? */
size_t chunk_slots; /* how many adjustors per chunk? */
struct AdjustorChunk *free_list;
#if defined(THREADED_RTS)
Mutex lock;
#endif
};
struct AdjustorChunk {
size_t first_free;
/* index of the first free adjustor slot.
* Invariant: this must be the index of a slot with unset bit in slot_bitmap
* if the chunk is on its owning AdjustorPool's free_list. If the chunk is not
* on free_list then first_free == pool->chunk_slots.
*/
struct AdjustorPool *owner;
/* the pool which owns this chunk */
struct AdjustorChunk *free_list_next;
/* the next chunk in the pool's free list */
struct AdjustorExecPage *exec_page;
/* an AdjustorExecPage containing the code for each adjustor */
struct AdjustorContext *contexts;
/* an AdjustorContext for each adjustor slot. This points to the contexts
* array which lives after slot_bitmap */
uint8_t slot_bitmap[];
/* a bit for each adjustor slot; bit is set if the slot is allocated */
};
struct AdjustorPool *
new_adjustor_pool(size_t code_size, mk_adjustor_code_fn make_code, void *user_data)
{
struct AdjustorPool *pool = stgMallocBytes(sizeof(struct AdjustorPool), "newAdjustorPool");
const size_t code_alignment = 16;
pool->make_code = make_code;
pool->user_data = user_data;
pool->adjustor_code_size = code_size;
size_t usable_exec_page_sz = getPageSize() - ROUND_UP(sizeof(struct AdjustorExecPage), code_alignment);
pool->chunk_slots = usable_exec_page_sz / ROUND_UP(code_size, code_alignment);
pool->free_list = NULL;
#if defined(THREADED_RTS)
initMutex(&pool->lock);
#endif
return pool;
}
/* Return the index of the first unset bit of the given bitmap or
* length_in_bits. */
static size_t
bitmap_first_unset(uint8_t *bitmap, size_t length_in_bits, size_t start_idx)
{
for (size_t i = start_idx; i < length_in_bits; i += 8) {
uint8_t x = bitmap[i / 8];
if (x != 0xff) {
return i + __builtin_clz(~x);
}
}
return length_in_bits;
}
static void
bitmap_set(uint8_t *bitmap, size_t idx, bool value)
{
size_t word_n = idx / 8;
uint8_t bit = 1 << (idx % 8);
if (value) {
bitmap[word_n] |= bit;
} else {
bitmap[word_n] &= ~bit;
}
}
static bool
bitmap_get(uint8_t *bitmap, size_t idx)
{
size_t word_n = idx / 8;
uint8_t bit = 1 << (idx % 8);
return bitmap[word_n] & bit;
}
void *
alloc_adjustor(struct AdjustorPool *pool, struct AdjustorContext context)
{
size_t slot_idx;
struct AdjustorChunk *chunk;
ACQUIRE_LOCK(&pool->lock);
// allocate a new chunk if free_list is empty.
if (pool->free_list == NULL) {
pool->free_list = alloc_adjustor_chunk(pool);
}
chunk = pool->free_list;
slot_idx = chunk->first_free;
ASSERT(slot_idx < pool->chunk_slots);
bitmap_set(chunk->slot_bitmap, slot_idx, 1);
// advance first_free
chunk->first_free = bitmap_first_unset(chunk->slot_bitmap, pool->chunk_slots, slot_idx+1);
if (chunk->first_free == pool->chunk_slots) {
// there are no free slots left in this chunk; remove it from
// free_list.
pool->free_list = chunk->free_list_next;
chunk->free_list_next = NULL;
}
ASSERT(bitmap_get(chunk->slot_bitmap, slot_idx));
bitmap_set(chunk->slot_bitmap, slot_idx, true);
chunk->contexts[slot_idx] = context;
void *adjustor = &chunk->exec_page->adjustor_code[pool->adjustor_code_size * slot_idx];
RELEASE_LOCK(&pool->lock);
return adjustor;
}
/* Free an adjustor previously allocated with alloc_adjustor, returning its
* AdjustorContext.
*/
struct AdjustorContext
free_adjustor(void *adjustor) {
uintptr_t exec_page_mask = ~(getPageSize() - 1ULL);
struct AdjustorExecPage *exec_page = (struct AdjustorExecPage *) ((uintptr_t) adjustor & exec_page_mask);
if (exec_page->magic != ADJUSTOR_EXEC_PAGE_MAGIC) {
barf("free_adjustor was passed an invalid adjustor");
}
struct AdjustorChunk *chunk = exec_page->owner;
struct AdjustorPool *pool = chunk->owner;
size_t slot_off = (uint8_t *) adjustor - exec_page->adjustor_code;
size_t slot_idx = slot_off / pool->adjustor_code_size;
// ensure that the slot is aligned as we would expect.
ASSERT(slot_off % pool->adjustor_code_size == 0);
ACQUIRE_LOCK(&pool->lock);
// ensure that the slot is in fact allocated.
ASSERT(bitmap_get(chunk->slot_bitmap, slot_idx));
// mark it as free.
bitmap_set(chunk->slot_bitmap, slot_idx, false);
// add the chunk to the pool's free_list if necessary.
if (chunk->first_free == pool->chunk_slots) {
chunk->free_list_next = pool->free_list;
pool->free_list = chunk;
}
// update first_free
if (chunk->first_free > slot_idx) {
chunk->first_free = slot_idx;
}
struct AdjustorContext context = chunk->contexts[slot_idx];
chunk->contexts[slot_idx] = (struct AdjustorContext) { NULL, NULL };
RELEASE_LOCK(&pool->lock);
return context;
}
/* Must hold owner->lock */
static struct AdjustorChunk *
alloc_adjustor_chunk(struct AdjustorPool *owner) {
size_t pg_sz = getPageSize();
struct AdjustorExecPage *exec_page = mmapAnonForLinker(pg_sz);
if (exec_page == NULL) {
barf("alloc_adjustor_chunk: failed to allocate");
}
exec_page->magic = ADJUSTOR_EXEC_PAGE_MAGIC;
// N.B. pad bitmap to ensure that .contexts is aligned.
size_t bitmap_sz = ROUND_UP(owner->chunk_slots, 8*sizeof(void*)) / 8;
size_t contexts_sz = sizeof(struct AdjustorContext) * owner->chunk_slots;
size_t alloc_sz = sizeof(struct AdjustorChunk) + bitmap_sz + contexts_sz;
struct AdjustorChunk *chunk = stgMallocBytes(alloc_sz, "allocAdjustorChunk");
chunk->owner = owner;
chunk->first_free = 0;
chunk->contexts = (struct AdjustorContext *) (chunk->slot_bitmap + bitmap_sz);
chunk->free_list_next = NULL;
chunk->exec_page = exec_page;
chunk->exec_page->owner = chunk;
// initialize the slot bitmap
memset(chunk->slot_bitmap, 0, bitmap_sz);
size_t code_sz = owner->adjustor_code_size;
for (size_t i = 0; i < owner->chunk_slots; i++) {
struct AdjustorContext *ctxt = &chunk->contexts[i];
owner->make_code(&exec_page->adjustor_code[i*code_sz], ctxt, owner->user_data);
*ctxt = (struct AdjustorContext) { NULL, NULL };
}
// Remap the executable page as executable
mprotectForLinker(exec_page, pg_sz, MEM_READ_EXECUTE);
return chunk;
}
static void mk_adjustor_from_template(
uint8_t *exec_code,
const struct AdjustorContext *context,
void *user_data)
{
struct AdjustorTemplate *tmpl = (struct AdjustorTemplate *) user_data;
// Copy the code
memcpy(exec_code, tmpl->code_start, tmpl->code_end - tmpl->code_start);
// Fix up the context pointer
size_t context_off = (uint8_t *) tmpl->context_ptr - tmpl->code_start;
const struct AdjustorContext **context_ptr = (const struct AdjustorContext **) (exec_code + context_off);
*context_ptr = context;
}
struct AdjustorPool *new_adjustor_pool_from_template(const struct AdjustorTemplate *tmpl)
{
size_t code_size = tmpl->code_end - tmpl->code_start;
return new_adjustor_pool(code_size, mk_adjustor_from_template, (void *) tmpl);
}
|