diff options
author | Jacques-Henri Jourdan <jacques-henri.jourdan@normalesup.org> | 2019-05-23 16:00:08 +0200 |
---|---|---|
committer | Jacques-Henri Jourdan <jacques-henri.jourdan@normalesup.org> | 2019-06-05 14:25:33 +0200 |
commit | 0ca84f52cddad0b9b93026b6ceb8749488dd79d7 (patch) | |
tree | 6a320b9194c254a7707e67096535fb6ceb7d956e | |
parent | 79088fb09d23d6d8bde3258f856df2a1d6e31ddf (diff) | |
download | ocaml-0ca84f52cddad0b9b93026b6ceb8749488dd79d7.tar.gz |
Refactor the postponed blocks machinery in memprof.c
This makes sure that:
- Callbacks are never called when another is running
- The postponed queue is purged when setting memprof parameters
We now use a FIFO implemented as a circular buffer for remembering of
postponed blocks.
-rw-r--r-- | runtime/caml/memprof.h | 12 | ||||
-rw-r--r-- | runtime/memprof.c | 270 | ||||
-rw-r--r-- | runtime/signals.c | 4 | ||||
-rw-r--r-- | stdlib/gc.mli | 10 | ||||
-rw-r--r-- | testsuite/tests/statmemprof/arrays_in_minor.byte.reference | 4 | ||||
-rw-r--r-- | testsuite/tests/statmemprof/arrays_in_minor.ml | 3 | ||||
-rw-r--r-- | testsuite/tests/statmemprof/arrays_in_minor.opt.reference | 4 | ||||
-rw-r--r-- | testsuite/tests/statmemprof/exception_callback.ml | 3 |
8 files changed, 195 insertions, 115 deletions
diff --git a/runtime/caml/memprof.h b/runtime/caml/memprof.h index 7b7431a5f6..6bc3a71787 100644 --- a/runtime/caml/memprof.h +++ b/runtime/caml/memprof.h @@ -21,16 +21,16 @@ #include "config.h" #include "mlvalues.h" -extern void caml_memprof_track_alloc_shr(value block); +extern int caml_memprof_suspended; + extern void caml_memprof_handle_postponed(); +extern int caml_memprof_to_do; -extern void caml_memprof_renew_minor_sample(void); -extern value* caml_memprof_young_trigger; +extern void caml_memprof_track_alloc_shr(value block); extern void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml); -extern int caml_memprof_suspended; - -extern struct caml_memprof_postponed_block *caml_memprof_postponed_head; +extern void caml_memprof_renew_minor_sample(void); +extern value* caml_memprof_young_trigger; #endif diff --git a/runtime/memprof.c b/runtime/memprof.c index 4d6144a461..16dbeb5d22 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -98,7 +98,7 @@ static uintnat mt_generate_geom() bounded by the entropy provided by [mt_generate_uniform], which is 32bits. */ double res = 1 + logf(mt_generate_uniform()) * one_log1m_lambda; - if(res > Max_long) return Max_long; + if (res > Max_long) return Max_long; return (uintnat)res; } @@ -127,16 +127,29 @@ static uintnat mt_generate_binom(uintnat len) /**** Interface with the OCaml code. ****/ +static void purge_postponed_queue(void); +static void check_to_do(void); + CAMLprim value caml_memprof_set(value v) { CAMLparam1(v); double l = Double_val(Field(v, 0)); intnat sz = Long_val(Field(v, 1)); - if(sz < 0 || !(l >= 0.) || l > 1.) /* Checks that [l] is not NAN. */ + if (sz < 0 || !(l >= 0.) || l > 1.) /* Checks that [l] is not NAN. */ caml_invalid_argument("caml_memprof_set"); - if(!init) { + /* This call to [caml_memprof_set] may stop sampling or change the + callback. We have to make sure that the postponed queue is empty + before continuing. */ + if (!caml_memprof_suspended) + caml_memprof_handle_postponed(); + else + /* But if we are currently running a callback, there is nothing + else we can do than purging the queue. */ + purge_postponed_queue(); + + if (!init) { int i; init = 1; @@ -149,7 +162,7 @@ CAMLprim value caml_memprof_set(value v) } lambda = l; - if(l > 0) { + if (l > 0) { one_log1m_lambda = l == 1 ? 0 : 1/caml_log1p(-l); next_mt_generate_binom = mt_generate_geom(); } @@ -174,7 +187,10 @@ static value do_callback(tag_t tag, uintnat wosize, uintnat occurrences, value callstack, enum ml_alloc_kind cb_kind) { CAMLparam1(callstack); CAMLlocal1(sample_info); - CAMLassert(occurrences > 0); + value res; /* Not a root, can be an exception result. */ + CAMLassert(occurrences > 0 && !caml_memprof_suspended); + + caml_memprof_suspended = 1; sample_info = caml_alloc_small(5, 0); Field(sample_info, 0) = Val_long(occurrences); @@ -183,122 +199,181 @@ static value do_callback(tag_t tag, uintnat wosize, uintnat occurrences, Field(sample_info, 3) = Val_long(wosize); Field(sample_info, 4) = callstack; - CAMLreturn(caml_callback_exn(memprof_callback, sample_info)); + res = caml_callback_exn(memprof_callback, sample_info); + + caml_memprof_suspended = 0; + check_to_do(); + + CAMLreturn(res); } -/**** Sampling procedures ****/ +/**** Capturing the call stack *****/ -static value capture_callstack_major() +static value capture_callstack_major(void) { value res; uintnat wosize = caml_current_callstack_size(callstack_size); /* We do not use [caml_alloc] to make sure the GC will not get called. */ - if(wosize == 0) return Atom (0); + if (wosize == 0) return Atom (0); res = caml_alloc_shr_no_track_noexc(wosize, 0); - if(res != 0) caml_current_callstack_write(res); + if (res != 0) caml_current_callstack_write(res); return res; } -static value capture_callstack_minor() +static value capture_callstack_minor(void) { value res; uintnat wosize = caml_current_callstack_size(callstack_size); - CAMLassert(caml_memprof_suspended); /* => no samples in the call stack. */ + CAMLassert(!caml_memprof_suspended); + caml_memprof_suspended = 1; /* => no samples in the call stack. */ res = caml_alloc(wosize, 0); + caml_memprof_suspended = 0; + check_to_do(); caml_current_callstack_write(res); return res; } -struct caml_memprof_postponed_block { +/**** Handling postponed sampled blocks. ****/ +/* When allocating in from C code, we cannot call the callback, + because the [caml_alloc_***] are guaranteed not to do so. These + functions make it possible to register a sampled block in a + todo-list so that the callback call is performed when possible. */ +/* Note: the shorter the delay is, the better, because the block is + linked to a root during the delay, so that the reachability + properties of the sampled block are artificially modified. */ + +#define POSTPONED_DEFAULT_QUEUE_SIZE 128 +static struct postponed_block { value block; value callstack; uintnat occurrences; enum ml_alloc_kind kind; - struct caml_memprof_postponed_block* next; -} *caml_memprof_postponed_head = NULL; +} default_postponed_queue[POSTPONED_DEFAULT_QUEUE_SIZE], + *postponed_queue = default_postponed_queue, + *postponed_queue_end = default_postponed_queue + POSTPONED_DEFAULT_QUEUE_SIZE, + *postponed_tl = default_postponed_queue, /* Pointer to next pop */ + *postponed_hd = default_postponed_queue; /* Pointer to next push */ +int caml_memprof_to_do = 0; + +static void postponed_pop(void) +{ + caml_remove_global_root(&postponed_tl->block); + caml_remove_global_root(&postponed_tl->callstack); + postponed_tl++; + if (postponed_tl == postponed_queue_end) postponed_tl = postponed_queue; +} -/* When allocating in from C code, we cannot call the callback, - because the [caml_alloc_***] are guaranteed not to do so. This - function registers a sampled block in a todo-list so that the - callback call is performed when possible. */ +static void purge_postponed_queue(void) +{ + while (postponed_tl != postponed_hd) postponed_pop(); + if (postponed_queue != default_postponed_queue) { + caml_stat_free(postponed_queue); + postponed_queue = default_postponed_queue; + postponed_queue_end = postponed_queue + POSTPONED_DEFAULT_QUEUE_SIZE; + } + postponed_hd = postponed_tl = postponed_queue; +} + +/* This function does not call the GC. This is important since it is + called when allocating a block using [caml_alloc_shr]: The new + block is allocated, but not yet initialized, so that the heap + invariants are broken. */ static void register_postponed_callback(value block, uintnat occurrences, - enum ml_alloc_kind kind) { - struct caml_memprof_postponed_block* pb; + enum ml_alloc_kind kind) +{ value callstack; - if(occurrences == 0) return; - pb = caml_stat_alloc_noexc(sizeof(struct caml_memprof_postponed_block)); - if(pb == NULL) return; /* OOM */ + struct postponed_block* new_hd; + if (occurrences == 0) return; callstack = capture_callstack_major(); - if(callstack == 0) { /* OOM */ - caml_stat_free(pb); - return; + if (callstack == 0) return; /* OOM */ + + new_hd = postponed_hd + 1; + if (new_hd == postponed_queue_end) new_hd = postponed_queue; + if (new_hd == postponed_tl) { + /* Queue is full, reallocate it. (We always leave one free slot in + order to be able to distinguish the 100% full and the empty + states). */ + uintnat sz = 4 * (postponed_queue_end - postponed_queue); + struct postponed_block* new_queue = + caml_stat_alloc_noexc(sz * sizeof(struct postponed_block)); + if (new_queue == NULL) return; + new_hd = new_queue; + while (postponed_tl != postponed_hd) { + *new_hd = *postponed_tl; + caml_register_global_root(&new_hd->block); + caml_register_global_root(&new_hd->callstack); + new_hd++; + postponed_pop(); + } + if (postponed_queue != default_postponed_queue) + caml_stat_free(postponed_queue); + postponed_tl = postponed_queue = new_queue; + postponed_hd = new_hd; + postponed_queue_end = postponed_queue + sz; + new_hd++; } - pb->block = block; - caml_register_generational_global_root(&pb->block); - pb->callstack = callstack; - caml_register_generational_global_root(&pb->callstack); - pb->occurrences = occurrences; - pb->kind = kind; - pb->next = caml_memprof_postponed_head; - caml_memprof_postponed_head = pb; -#ifndef NATIVE_CODE - caml_something_to_do = 1; -#else - caml_young_limit = caml_young_alloc_end; -#endif + + postponed_hd->block = block; + postponed_hd->callstack = callstack; + caml_register_global_root(&postponed_hd->block); + caml_register_global_root(&postponed_hd->callstack); + postponed_hd->occurrences = occurrences; + postponed_hd->kind = kind; + postponed_hd = new_hd; + + check_to_do(); } -void caml_memprof_handle_postponed() +void caml_memprof_handle_postponed(void) { - struct caml_memprof_postponed_block *p, *q; + CAMLparam0(); + CAMLlocal1(block); value ephe; - if(caml_memprof_postponed_head == NULL) - return; - - /* We first reverse the list */ - p = caml_memprof_postponed_head; - q = caml_memprof_postponed_head->next; - p->next = NULL; - while(q != NULL) { - struct caml_memprof_postponed_block* next = q->next; - q->next = p; - p = q; - q = next; + if (caml_memprof_suspended) { + caml_memprof_to_do = 0; + CAMLreturn0; } - caml_memprof_postponed_head = NULL; - caml_update_young_limit(); -#define NEXT_P \ - { struct caml_memprof_postponed_block* next = p->next; \ - caml_remove_generational_global_root(&p->callstack); \ - caml_remove_generational_global_root(&p->block); \ - caml_stat_free(p); \ - p = next; } + while (postponed_tl != postponed_hd) { + struct postponed_block pb = *postponed_tl; + block = pb.block; /* pb.block is not a root! */ + postponed_pop(); + if (postponed_tl == postponed_hd) purge_postponed_queue(); - caml_memprof_suspended = 1; - /* We then do the actual iteration on postponed blocks */ - while(p != NULL) { - ephe = do_callback(Tag_val(p->block), Wosize_val(p->block), - p->occurrences, p->callstack, p->kind); - if (Is_exception_result(ephe)) { - caml_memprof_suspended = 0; - /* In the case of an exception, we just forget the entire list. */ - while(p != NULL) NEXT_P; - caml_raise(Extract_exception(ephe)); - } - if(Is_block(ephe)) - caml_ephemeron_set_key(Field(ephe, 0), 0, p->block); - NEXT_P; + /* If using threads, this call can trigger reentrant calls to + [caml_memprof_handle_postponed] even though we set + [caml_memprof_suspended]. */ + ephe = do_callback(Tag_val(block), Wosize_val(block), + pb.occurrences, pb.callstack, pb.kind); + + if (Is_exception_result(ephe)) caml_raise(Extract_exception(ephe)); + if (Is_block(ephe)) caml_ephemeron_set_key(Field(ephe, 0), 0, block); + } + + caml_memprof_to_do = 0; + CAMLreturn0; +} + +static void check_to_do(void) +{ + if (!caml_memprof_suspended && postponed_tl != postponed_hd) { + caml_memprof_to_do = 1; +#ifndef NATIVE_CODE + caml_something_to_do = 1; +#else + caml_young_limit = caml_young_alloc_end; +#endif } - caml_memprof_suspended = 0; } +/**** Sampling procedures ****/ + void caml_memprof_track_alloc_shr(value block) { CAMLassert(Is_in_heap(block)); /* This test also makes sure memprof is initialized. */ - if(lambda == 0 || caml_memprof_suspended) return; + if (lambda == 0 || caml_memprof_suspended) return; register_postponed_callback( block, mt_generate_binom(Whsize_val(block)), Major); } @@ -308,7 +383,7 @@ void caml_memprof_track_alloc_shr(value block) heap. */ static void shift_sample(uintnat n) { - if(caml_memprof_young_trigger - caml_young_alloc_start > n) + if (caml_memprof_young_trigger - caml_young_alloc_start > n) caml_memprof_young_trigger -= n; else caml_memprof_young_trigger = caml_young_alloc_start; @@ -324,11 +399,11 @@ static void shift_sample(uintnat n) void caml_memprof_renew_minor_sample(void) { - if(lambda == 0) /* No trigger in the current minor heap. */ + if (lambda == 0) /* No trigger in the current minor heap. */ caml_memprof_young_trigger = caml_young_alloc_start; else { uintnat geom = mt_generate_geom(); - if(caml_young_ptr - caml_young_alloc_start < geom) + if (caml_young_ptr - caml_young_alloc_start < geom) /* No trigger in the current minor heap. */ caml_memprof_young_trigger = caml_young_alloc_start; caml_memprof_young_trigger = caml_young_ptr - (geom - 1); @@ -347,7 +422,7 @@ void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml) uintnat whsize = Whsize_wosize(wosize); uintnat occurrences; - if(caml_memprof_suspended) { + if (caml_memprof_suspended) { caml_memprof_renew_minor_sample(); CAMLreturn0; } @@ -361,7 +436,7 @@ void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml) occurrences = mt_generate_binom(caml_memprof_young_trigger - 1 - caml_young_ptr) + 1; - if(!from_caml) { + if (!from_caml) { register_postponed_callback(Val_hp(caml_young_ptr), occurrences, Minor); caml_memprof_renew_minor_sample(); CAMLreturn0; @@ -374,44 +449,47 @@ void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml) call the callback and use as a sample the block which will be allocated right after the callback. */ - /* Restore the minor heap in a valid state and suspend sampling for - calling the callback. - We should not call the GC before this. */ + /* Restore the minor heap in a valid state for calling the callback. + We should not call the GC before these two instructions. */ caml_young_ptr += whsize; - caml_memprof_suspended = 1; caml_memprof_renew_minor_sample(); + /* Empty the queue to make sure callbacks are called in the right + order. */ + caml_memprof_handle_postponed(); + callstack = capture_callstack_minor(); ephe = do_callback(tag, wosize, occurrences, callstack, Minor); - caml_memprof_suspended = 0; if (Is_exception_result(ephe)) caml_raise(Extract_exception(ephe)); /* We can now restore the minor heap in the state needed by - [Alloc_small_aux]. - We should not call the GC after this. */ - if(caml_young_ptr - whsize < caml_young_trigger) { + [Alloc_small_aux]. */ + if (caml_young_ptr - whsize < caml_young_trigger) { CAML_INSTR_INT ("force_minor/memprof@", 1); caml_gc_dispatch(); } + + /* Re-allocate the block in the minor heap. We should not call the + GC after this. */ caml_young_ptr -= whsize; /* Make sure this block is not going to be sampled again. */ shift_sample(whsize); /* Write the ephemeron if not [None]. */ - if(Is_block(ephe)) { + if (Is_block(ephe)) { /* Subtlety: we are actually writing the ephemeron with an invalid (uninitialized) block. This is correct for two reasons: - The logic of [caml_ephemeron_set_key] never inspects the content of the block. In only checks that the block is young. - The allocation and initialization happens right after returning - from [caml_memprof_track_young]. Since the heap is in an invalid - state before that initialization, very little heap operations are - allowed until then. - */ + from [caml_memprof_track_young]. */ caml_ephemeron_set_key(Field(ephe, 0), 0, Val_hp(caml_young_ptr)); } + /* /!\ Since the heap is in an invalid state before initialization, + very little heap operations are allowed until then. */ + CAMLreturn0; } diff --git a/runtime/signals.c b/runtime/signals.c index 62cdb610c8..cd1fa412df 100644 --- a/runtime/signals.c +++ b/runtime/signals.c @@ -259,9 +259,7 @@ void caml_update_young_limit (void) #ifdef NATIVE_CODE if(caml_requested_major_slice || caml_requested_minor_gc || - caml_final_to_do || - caml_signals_are_pending || - caml_memprof_postponed_head != NULL) + caml_final_to_do || caml_signals_are_pending || caml_memprof_to_do) caml_young_limit = caml_young_alloc_end; #endif } diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 2de9a5d8ee..f706dca23a 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -440,10 +440,12 @@ module Memprof : possible that a context switch occurs during a callback, in which case reentrancy has to be taken into account. - Note that when the callback kind is [Major], the callback can - be postponed after the actual allocation. Therefore, the - context of the callback may be slightly different than - expected. This should not happen if no C binding is used. *) + Note that the callback can be postponed slightly after the + actual allocation. Therefore, the context of the callback may + be slightly different than expected. + + In addition, note that calling [start] or [stop] in a callback + can lead to losses of samples. *) type 'a ctrl = { sampling_rate : float; diff --git a/testsuite/tests/statmemprof/arrays_in_minor.byte.reference b/testsuite/tests/statmemprof/arrays_in_minor.byte.reference index 183af7bbf9..17a0ed6fb6 100644 --- a/testsuite/tests/statmemprof/arrays_in_minor.byte.reference +++ b/testsuite/tests/statmemprof/arrays_in_minor.byte.reference @@ -8,6 +8,6 @@ check_distrib 1 250 1000 0.900000 check_distrib 1 1 10000000 0.010000 check_distrib 250 250 100000 0.100000 check_callstack -Raised by primitive operation at file "arrays_in_minor.ml", line 153, characters 2-35 -Called from file "arrays_in_minor.ml", line 159, characters 9-27 +Raised by primitive operation at file "arrays_in_minor.ml", line 154, characters 2-35 +Called from file "arrays_in_minor.ml", line 160, characters 9-27 OK ! diff --git a/testsuite/tests/statmemprof/arrays_in_minor.ml b/testsuite/tests/statmemprof/arrays_in_minor.ml index fa8b26ecfa..8c67964ded 100644 --- a/testsuite/tests/statmemprof/arrays_in_minor.ml +++ b/testsuite/tests/statmemprof/arrays_in_minor.ml @@ -43,7 +43,8 @@ let check_ephe_full_major () = start { sampling_rate = 0.01; callstack_size = 10; - callback = fun _ -> + callback = fun s -> + assert (s.tag = 0 || s.tag = 1); let res = Ephemeron.K1.create () in ephes := res :: !ephes; Some res diff --git a/testsuite/tests/statmemprof/arrays_in_minor.opt.reference b/testsuite/tests/statmemprof/arrays_in_minor.opt.reference index e62709ca30..640d95231b 100644 --- a/testsuite/tests/statmemprof/arrays_in_minor.opt.reference +++ b/testsuite/tests/statmemprof/arrays_in_minor.opt.reference @@ -9,6 +9,6 @@ check_distrib 1 1 10000000 0.010000 check_distrib 250 250 100000 0.100000 check_callstack Raised by primitive operation at file "arrays_in_minor.ml", line 22, characters 20-34 -Called from file "arrays_in_minor.ml", line 153, characters 2-35 -Called from file "arrays_in_minor.ml", line 159, characters 9-27 +Called from file "arrays_in_minor.ml", line 154, characters 2-35 +Called from file "arrays_in_minor.ml", line 160, characters 9-27 OK ! diff --git a/testsuite/tests/statmemprof/exception_callback.ml b/testsuite/tests/statmemprof/exception_callback.ml index a490c0f2aa..8a04d0f76f 100644 --- a/testsuite/tests/statmemprof/exception_callback.ml +++ b/testsuite/tests/statmemprof/exception_callback.ml @@ -20,4 +20,5 @@ let _ = callstack_size = 10; callback = fun _ -> assert false }; - Array.make 200 0 + ignore (Sys.opaque_identity (Array.make 200 0)); + stop () |