diff options
Diffstat (limited to 'erts/emulator/beam')
40 files changed, 766 insertions, 406 deletions
diff --git a/erts/emulator/beam/beam_code.h b/erts/emulator/beam/beam_code.h index 3465c4e6ef..0ef5bf6471 100644 --- a/erts/emulator/beam/beam_code.h +++ b/erts/emulator/beam/beam_code.h @@ -29,7 +29,11 @@ #define IS_VALID_LOCATION(File, Line) \ ((unsigned) (File) < 255 && (unsigned) (Line) < ((1 << 24) - 1)) -#define MAKE_LOCATION(File, Line) (((File) << 24) | (Line)) +/* Builds a location entry, silently ignoring unrepresentable locations. */ +#define MAKE_LOCATION(File, Line) \ + (IS_VALID_LOCATION((File), (Line)) ? \ + (((File) << 24) | (Line)) : \ + LINE_INVALID_LOCATION) #define LOC_FILE(Loc) ((Loc) >> 24) #define LOC_LINE(Loc) ((Loc) & ((1 << 24)-1)) diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index a50ec46659..f8bc2af783 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -96,7 +96,7 @@ erts_debug_size_shared_1(BIF_ALIST_1) } BIF_RETTYPE -erts_debug_copy_shared_1(BIF_ALIST_1) +erts_debug_copy_shared_2(BIF_ALIST_2) { Process* p = BIF_P; Eterm term = BIF_ARG_1; @@ -106,6 +106,13 @@ erts_debug_copy_shared_1(BIF_ALIST_1) erts_shcopy_t info; INITIALIZE_SHCOPY(info); + switch (BIF_ARG_2) { + case am_true: info.copy_literals = 1; break; + case am_false: info.copy_literals = 0; break; + default: + BIF_ERROR(p, BADARG); + } + size = copy_shared_calculate(term, &info); if (size > 0) { hp = HAlloc(p, size); diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index c81b2a9f48..267bfc4af4 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -354,7 +354,6 @@ demonitor(Process *c_p, Eterm ref, Eterm *multip) return am_false; /* Not a monitor (may have been...) */ switch (mon->flags & ERTS_ML_STATE_ALIAS_MASK) { - case ERTS_ML_STATE_ALIAS_ONCE: case ERTS_ML_STATE_ALIAS_UNALIAS: { ErtsMonitorData *amdp = erts_monitor_create(ERTS_MON_TYPE_ALIAS, ref, @@ -366,6 +365,7 @@ demonitor(Process *c_p, Eterm ref, Eterm *multip) erts_monitor_tree_replace(&ERTS_P_MONITORS(c_p), mon, &amdp->origin); break; } + case ERTS_ML_STATE_ALIAS_ONCE: case ERTS_ML_STATE_ALIAS_DEMONITOR: erts_pid_ref_delete(ref); /* fall through... */ @@ -4271,7 +4271,7 @@ BIF_RETTYPE halt_2(BIF_ALIST_2) erts_exit(ERTS_ABORT_EXIT, ""); } else if (is_list(BIF_ARG_1) || BIF_ARG_1 == NIL) { -# define HALT_MSG_SIZE 200 +# define HALT_MSG_SIZE 1023 static byte halt_msg[4*HALT_MSG_SIZE+1]; Sint written; diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 9ba49376dd..c3e40633e3 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -199,6 +199,8 @@ bif erts_internal:ets_super_user/1 bif erts_internal:spawn_request/4 bif erts_internal:dist_spawn_request/4 +bif erts_internal:no_aux_work_threads/0 + bif erlang:spawn_request_abandon/1 # Static native functions in erts_literal_area_collector @@ -665,7 +667,7 @@ bif erts_internal:purge_module/2 bif binary:split/2 bif binary:split/3 bif erts_debug:size_shared/1 -bif erts_debug:copy_shared/1 +bif erts_debug:copy_shared/2 bif erlang:has_prepared_code_on_load/1 bif maps:take/2 diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c index 5527b62211..df4daff230 100644 --- a/erts/emulator/beam/break.c +++ b/erts/emulator/beam/break.c @@ -792,33 +792,6 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args) if (ERTS_SOMEONE_IS_CRASH_DUMPING) return; - /* Order all managed threads to block, this has to be done - first to guarantee that this is the only thread to generate - crash dump. */ - erts_thr_progress_fatal_error_block(&tpd_buf); - -#ifdef ERTS_SYS_SUSPEND_SIGNAL - /* - * We suspend all scheduler threads so that we can dump some - * data about the currently running processes and scheduler data. - * We have to be very very careful when doing this as the schedulers - * could be anywhere. - */ - sys_init_suspend_handler(); - - for (i = 0; i < erts_no_schedulers; i++) { - erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid; - if (!erts_equal_tids(tid,erts_thr_self())) - sys_thr_suspend(tid); - } - -#endif - - /* Allow us to pass certain places without locking... */ - erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1); - erts_tsd_set(erts_is_crash_dumping_key, (void *) 1); - - envsz = sizeof(env); /* ERL_CRASH_DUMP_SECONDS not set * if we have a heart port, break immediately @@ -916,6 +889,36 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args) time(&now); erts_cbprintf(to, to_arg, "=erl_crash_dump:0.5\n%s", ctime(&now)); + /* Order all managed threads to block, this has to be done + first to guarantee that this is the only thread to generate + crash dump. */ + erts_thr_progress_fatal_error_block(&tpd_buf); + +#ifdef ERTS_SYS_SUSPEND_SIGNAL + /* + * We suspend all scheduler threads so that we can dump some + * data about the currently running processes and scheduler data. + * We have to be very very careful when doing this as the schedulers + * could be anywhere. + * It may happen that scheduler thread is suspended while holding + * malloc lock. Therefore code running in this thread must not use + * it, or it will deadlock. ctime and fdopen calls both use malloc + * internally and must be executed prior to. + */ + sys_init_suspend_handler(); + + for (i = 0; i < erts_no_schedulers; i++) { + erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid; + if (!erts_equal_tids(tid,erts_thr_self())) + sys_thr_suspend(tid); + } + +#endif + + /* Allow us to pass certain places without locking... */ + erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1); + erts_tsd_set(erts_is_crash_dumping_key, (void *) 1); + if (file != NULL) erts_cbprintf(to, to_arg, "The error occurred in file %s, line %d\n", file, line); diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index 23335fe595..355e15fe7a 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -1743,7 +1743,7 @@ Uint copy_shared_perform_x(Eterm obj, Uint size, erts_shcopy_t *info, goto cleanup_next; } case REF_SUBTAG: - if (is_magic_ref_thing(ptr)) { + if (is_magic_ref_thing_with_hdr(ptr,hdr)) { ErtsMRefThing *mreft = (ErtsMRefThing *) ptr; erts_refc_inc(&mreft->mb->intern.refc, 2); goto off_heap_node_container_common; diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index e8392cd486..e0399904ed 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -1582,12 +1582,12 @@ erts_dsig_send_reg_msg(ErtsDSigSendContext* ctx, Eterm remote_name, /* local has died, deliver the exit signal to remote */ int -erts_dsig_send_exit_tt(ErtsDSigSendContext *ctx, Eterm local, Eterm remote, +erts_dsig_send_exit_tt(ErtsDSigSendContext *ctx, Process *c_p, Eterm remote, Eterm reason, Eterm token) { - Eterm ctl, msg = THE_NON_VALUE; + Eterm ctl, msg = THE_NON_VALUE, local = c_p->common.id; #ifdef USE_VM_PROBES - Process *sender = ctx->c_p; + Process *sender = c_p; Sint tok_label = 0; Sint tok_lastcnt = 0; Sint tok_serial = 0; @@ -1601,7 +1601,7 @@ erts_dsig_send_exit_tt(ErtsDSigSendContext *ctx, Eterm local, Eterm remote, msg = reason; if (have_seqtrace(token)) { - seq_trace_update_serial(ctx->c_p); + seq_trace_update_serial(c_p); seq_trace_output_exit(token, reason, SEQ_TRACE_SEND, remote, local); if (ctx->dep->dflags & DFLAG_EXIT_PAYLOAD) { ctl = TUPLE4(&ctx->ctl_heap[0], @@ -2706,6 +2706,7 @@ int erts_net_message(Port *prt, so.group_leader = gl; so.mfa = mfa; so.dist_entry = dep; + so.conn_id = conn_id; so.mld = ede.mld; so.edep = edep; so.ede_hfrag = ede_hfrag; @@ -2940,6 +2941,11 @@ erts_dsig_prepare(ErtsDSigSendContext *ctx, int no_trap, int connect) { + /* + * No process imply that we should force data through. That + * is, ignore busy state of dist entry and allow enqueue + * regardless of its state... + */ int res; ASSERT(no_trap || proc); @@ -2984,7 +2990,7 @@ retry: goto fail; } - if (no_suspend) { + if (no_suspend && proc) { if (erts_atomic32_read_acqb(&dep->qflgs) & ERTS_DE_QFLG_BUSY) { res = ERTS_DSIG_PREP_WOULD_SUSPEND; goto fail; diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index 12392955e6..7666e8951f 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -400,7 +400,7 @@ struct dist_sequences { extern int erts_dsig_send_msg(ErtsDSigSendContext*, Eterm, Eterm); extern int erts_dsig_send_reg_msg(ErtsDSigSendContext*, Eterm, Eterm, Eterm); extern int erts_dsig_send_link(ErtsDSigSendContext *, Eterm, Eterm); -extern int erts_dsig_send_exit_tt(ErtsDSigSendContext *, Eterm, Eterm, Eterm, Eterm); +extern int erts_dsig_send_exit_tt(ErtsDSigSendContext *, Process *, Eterm, Eterm, Eterm); extern int erts_dsig_send_unlink(ErtsDSigSendContext *, Eterm, Eterm, Uint64); extern int erts_dsig_send_unlink_ack(ErtsDSigSendContext *, Eterm, Eterm, Uint64); extern int erts_dsig_send_group_leader(ErtsDSigSendContext *, Eterm, Eterm); diff --git a/erts/emulator/beam/emu/bs_instrs.tab b/erts/emulator/beam/emu/bs_instrs.tab index c52367de2a..84adc06a9d 100644 --- a/erts/emulator/beam/emu/bs_instrs.tab +++ b/erts/emulator/beam/emu/bs_instrs.tab @@ -795,7 +795,7 @@ i_bs_utf16_size(Src, Dst) { $Dst = result; } -bs_put_utf16(Fail, Flags, Src) { +i_bs_put_utf16(Fail, Flags, Src) { if (!erts_bs_put_utf16(ERL_BITS_ARGS_2($Src, $Flags))) { $BADARG($Fail); } diff --git a/erts/emulator/beam/emu/msg_instrs.tab b/erts/emulator/beam/emu/msg_instrs.tab index 085cdb34cd..15002fa807 100644 --- a/erts/emulator/beam/emu/msg_instrs.tab +++ b/erts/emulator/beam/emu/msg_instrs.tab @@ -271,8 +271,7 @@ remove_message() { tok_label, tok_lastcnt, tok_serial); } #endif - erts_msgq_unlink_msg(c_p, msgp); - erts_msgq_set_save_first(c_p); + erts_msgq_unlink_msg_set_save_first(c_p, msgp); CANCEL_TIMER(c_p); erts_save_message_in_proc(c_p, msgp); diff --git a/erts/emulator/beam/emu/ops.tab b/erts/emulator/beam/emu/ops.tab index 336c4d0283..e1dba18a9f 100644 --- a/erts/emulator/beam/emu/ops.tab +++ b/erts/emulator/beam/emu/ops.tab @@ -1203,8 +1203,8 @@ i_bs_get_utf8 xy f? d bs_skip_utf8 Fail=f Ms=xy u u => i_bs_get_utf8 Ms Fail x -bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst -bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_get_utf16 Ms Fail Flags x +bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => get_utf16(Fail, Ms, Flags, Dst) +bs_skip_utf16 Fail=f Ms=xy Unit=u Flags=u => bs_get_utf16 Fail Ms Unit Flags x i_bs_get_utf16 xy f? t d @@ -1303,6 +1303,8 @@ bs_utf16_size j Src Dst=d => i_bs_utf16_size Src Dst bs_put_utf8 Fail u Src => i_bs_put_utf8 Fail Src +bs_put_utf16 Fail Flags Src => put_utf16(Fail, Flags, Src) + bs_put_utf32 Fail=j Flags=u Src=s => \ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src @@ -1310,15 +1312,15 @@ i_bs_utf8_size S x i_bs_utf16_size S x i_bs_put_utf8 j? S -bs_put_utf16 j? t S +i_bs_put_utf16 j? t S i_bs_validate_unicode j? S -# Handle unoptimized code. +# Handle unoptimized code and the 'native' flag for utf16 segments. i_bs_utf8_size Src=c Dst => move Src x | i_bs_utf8_size x Dst i_bs_utf16_size Src=c Dst => move Src x | i_bs_utf16_size x Dst i_bs_put_utf8 Fail Src=c => move Src x | i_bs_put_utf8 Fail x -bs_put_utf16 Fail Flags Src=c => move Src x | bs_put_utf16 Fail Flags x +i_bs_put_utf16 Fail Flags Src=c => move Src x | i_bs_put_utf16 Fail Flags x i_bs_validate_unicode Fail Src=c => move Src x | i_bs_validate_unicode Fail x # diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 2cea68a817..125a5a240e 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -148,6 +148,9 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(aireq, ERTS_ALC_T_AINFO_REQ) ErtsAlcType_t erts_fix_core_allocator_ix; +erts_tsd_key_t erts_thr_alloc_data_key; + +Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_alloc_instances); struct au_init { int enable; @@ -186,6 +189,7 @@ typedef struct { #endif int trim_threshold; int top_pad; + int dirty_alloc_insts; AlcUInit_t alloc_util; struct { char *mtrace; @@ -461,7 +465,7 @@ set_default_test_alloc_opts(struct au_init *ip) { SET_DEFAULT_ALLOC_OPTS(ip); ip->enable = 0; /* Disabled by default */ - ip->thr_spec = -1 * erts_no_schedulers; + ip->thr_spec = -1; ip->astrat = ERTS_ALC_S_FIRSTFIT; ip->init.aoff.crr_order = FF_AOFF; ip->init.aoff.blk_order = FF_BF; @@ -488,10 +492,10 @@ set_default_test_alloc_opts(struct au_init *ip) static void -adjust_tpref(struct au_init *ip, int no_sched) +adjust_tpref(struct au_init *ip, int no_sched, int no_dirty_inst) { if (ip->thr_spec) { - ip->thr_spec = no_sched; + ip->thr_spec = no_sched + no_dirty_inst; ip->thr_spec *= -1; /* thread preferred */ /* If default ... */ @@ -607,6 +611,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) #endif ERTS_DEFAULT_TRIM_THRESHOLD, ERTS_DEFAULT_TOP_PAD, + 0, /* Default dirty alloc instances */ ERTS_DEFAULT_ALCU_INIT, }; size_t fix_type_sizes[ERTS_ALC_NO_FIXED_SIZES] = {0}; @@ -644,6 +649,8 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) hdbg_init(); #endif + erts_tsd_key_create(&erts_thr_alloc_data_key, "erts_alc_data_key"); + lock_all_physical_memory = 0; ncpu = eaiop->ncpu; @@ -686,6 +693,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) #endif } + erts_no_dirty_alloc_instances = init.dirty_alloc_insts; /* Make adjustments for carrier migration support */ init.temp_alloc.init.util.acul = 0; @@ -729,18 +737,19 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) /* Only temp_alloc can use thread specific interface */ if (init.temp_alloc.thr_spec) - init.temp_alloc.thr_spec = erts_no_schedulers; + init.temp_alloc.thr_spec = erts_no_schedulers + init.dirty_alloc_insts; /* Others must use thread preferred interface */ - adjust_tpref(&init.sl_alloc, erts_no_schedulers); - adjust_tpref(&init.std_alloc, erts_no_schedulers); - adjust_tpref(&init.ll_alloc, erts_no_schedulers); - adjust_tpref(&init.eheap_alloc, erts_no_schedulers); - adjust_tpref(&init.binary_alloc, erts_no_schedulers); - adjust_tpref(&init.ets_alloc, erts_no_schedulers); - adjust_tpref(&init.driver_alloc, erts_no_schedulers); - adjust_tpref(&init.fix_alloc, erts_no_schedulers); - adjust_tpref(&init.literal_alloc, erts_no_schedulers); + adjust_tpref(&init.sl_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.std_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.ll_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.eheap_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.binary_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.ets_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.driver_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.fix_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.literal_alloc, erts_no_schedulers, init.dirty_alloc_insts); + adjust_tpref(&init.test_alloc, erts_no_schedulers, init.dirty_alloc_insts); /* @@ -756,6 +765,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) refuse_af_strategy(&init.driver_alloc); refuse_af_strategy(&init.fix_alloc); refuse_af_strategy(&init.literal_alloc); + refuse_af_strategy(&init.test_alloc); if (!init.temp_alloc.thr_spec) refuse_af_strategy(&init.temp_alloc); @@ -763,6 +773,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) erts_mtrace_pre_init(); #if HAVE_ERTS_MSEG init.mseg.nos = erts_no_schedulers; + init.mseg.ndai = init.dirty_alloc_insts; erts_mseg_init(&init.mseg); #endif @@ -1038,12 +1049,12 @@ start_au_allocator(ErtsAlcType_t alctr_n, if (!as0) continue; if (init->thr_spec < 0) { - init->init.util.ts = i == 0; + init->init.util.ts = (i == 0 || erts_no_schedulers < i); init->init.util.tspec = 0; init->init.util.tpref = -1*init->thr_spec + 1; } else { - if (i != 0) + if (0 < i && i <= erts_no_schedulers) init->init.util.ts = 0; else { if (astrat == ERTS_ALC_S_AFIT) @@ -1749,6 +1760,24 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) } bad_param(param, param+2); break; + case 'd': + if (has_prefix("ai", param+2)) { + arg = get_value(argv[i]+5, argv, &i); + if (sys_strcmp("max", arg) == 0) + init->dirty_alloc_insts = (int) erts_no_dirty_cpu_schedulers; + else { + Sint tmp; + char *rest; + errno = 0; + tmp = (Sint) ErtsStrToSint(arg, &rest, 10); + if (errno != 0 || rest == arg || tmp < 0) + bad_value(param, param+4, arg); + init->dirty_alloc_insts = (int) tmp; + } + break; + } + bad_param(param, param+2); + break; case 'u': if (has_prefix("ycs", argv[i]+3)) { init->alloc_util.ycs @@ -1817,6 +1846,10 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init) argv[j++] = argv[i]; } *argc = j; + + if (init->dirty_alloc_insts > erts_no_dirty_cpu_schedulers) + init->dirty_alloc_insts = (int) erts_no_dirty_cpu_schedulers; + } static char *type_no_str(ErtsAlcType_t n) @@ -1837,46 +1870,92 @@ void erts_alloc_register_scheduler(void *vesdp) { ErtsSchedulerData *esdp = (ErtsSchedulerData *) vesdp; - int ix = (int) esdp->no; + int ix; int aix; + int normal_sched; - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); + ASSERT(esdp == erts_get_scheduler_data()); + + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + ix = (int) esdp->no; + ASSERT(0 < ix && ix <= erts_no_schedulers); + normal_sched = !0; + } + else if (!erts_no_dirty_alloc_instances) { + ix = 0; + normal_sched = 0; + } + else { + ix = (int) esdp->dirty_no; + ASSERT(ix > 0); + ix = ((ix - 1) % erts_no_dirty_alloc_instances) + 1; + ix += erts_no_schedulers; + ASSERT(erts_no_schedulers < ix + && ix <= (erts_no_schedulers + + erts_no_dirty_alloc_instances)); + normal_sched = 0; + } + + esdp->aux_work_data.alloc_data.delayed_dealloc_handler = normal_sched; + esdp->aux_work_data.alloc_data.alc_ix = ix; for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) { ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix]; - esdp->alloc_data.deallctr[aix] = NULL; - esdp->alloc_data.pref_ix[aix] = -1; - if (tspec->enabled) { - if (!tspec->dd) - esdp->alloc_data.pref_ix[aix] = ix; - else { - Allctr_t *allctr = tspec->allctr[ix]; - ASSERT(allctr); - esdp->alloc_data.deallctr[aix] = allctr; - esdp->alloc_data.pref_ix[aix] = ix; - } - } + esdp->aux_work_data.alloc_data.deallctr[aix] = NULL; + if (!normal_sched) + continue; + /* + * Delayed dealloc is handled by normal schedulers, + * but not by dirty schedulers... + */ + if (tspec->enabled && tspec->dd) { + Allctr_t *allctr = tspec->allctr[ix]; + ASSERT(allctr); + esdp->aux_work_data.alloc_data.deallctr[aix] = allctr; + } } + erts_tsd_set(erts_thr_alloc_data_key, (void *) &esdp->aux_work_data.alloc_data); } void -erts_alloc_scheduler_handle_delayed_dealloc(void *vesdp, - int *need_thr_progress, - ErtsThrPrgrVal *thr_prgr_p, - int *more_work) +erts_alloc_register_delayed_dealloc_handler_thread(ErtsThrAllocData *tadp, int ix) { - ErtsSchedulerData *esdp = (ErtsSchedulerData *) vesdp; int aix; + + /* + * Should not be a scheduler of any kind and should not + * handle an 'ix' reserved for normal schedulers... + */ + ASSERT(!erts_get_scheduler_data()); + ASSERT(0 == ix /* Aux thread... */ + || /* Handler thread for dirty scheds instances... */ + (erts_no_schedulers < ix + && ix <= (erts_no_schedulers + + erts_no_dirty_alloc_instances))); + + tadp->delayed_dealloc_handler = !0; + tadp->alc_ix = ix; for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) { - Allctr_t *allctr; - if (esdp) - allctr = esdp->alloc_data.deallctr[aix]; - else { - ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix]; - if (tspec->enabled && tspec->dd) - allctr = tspec->allctr[0]; - else - allctr = NULL; - } + ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix]; + tadp->deallctr[aix] = NULL; + if (tspec->enabled && tspec->dd) { + Allctr_t *allctr = tspec->allctr[ix]; + ASSERT(allctr); + tadp->deallctr[aix] = allctr; + } + } + erts_tsd_set(erts_thr_alloc_data_key, (void *) tadp); +} + +void +erts_alloc_handle_delayed_dealloc(ErtsThrAllocData *thr_alloc_data, + int *need_thr_progress, + ErtsThrPrgrVal *thr_prgr_p, + int *more_work) +{ + int aix; + ASSERT(thr_alloc_data); + for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) { + Allctr_t *allctr = thr_alloc_data->deallctr[aix]; if (allctr) { erts_alcu_check_delayed_dealloc(allctr, 1, @@ -1912,7 +1991,7 @@ erts_alloc_get_verify_unused_temp_alloc(Allctr_t **allctr) if (erts_allctrs_info[ERTS_ALC_A_TEMPORARY].alloc_util && erts_allctrs_info[ERTS_ALC_A_TEMPORARY].thr_spec) { ErtsAllocatorThrSpec_t *tspec; - int ix = ERTS_ALC_GET_THR_IX(); + int ix = erts_get_thr_alloc_ix(); tspec = &erts_allctr_thr_spec[ERTS_ALC_A_TEMPORARY]; if (ix < tspec->size) { @@ -3001,8 +3080,8 @@ static void reply_alloc_info(void *vair) { ErtsAllocInfoReq *air = (ErtsAllocInfoReq *) vair; - Uint sched_id = erts_get_scheduler_id(); - int global_instances = air->req_sched == sched_id; + int tix = erts_get_thr_alloc_ix(); + int global_instances = air->req_sched == tix; ErtsProcLocks rp_locks; Process *rp = air->proc; Eterm ref_copy = NIL, ai_list, msg = NIL; @@ -3028,7 +3107,7 @@ reply_alloc_info(void *vair) ? erts_alcu_sz_info : erts_alcu_info); - rp_locks = air->req_sched == sched_id ? ERTS_PROC_LOCK_MAIN : 0; + rp_locks = air->req_sched == tix ? ERTS_PROC_LOCK_MAIN : 0; sz = 0; hpp = NULL; @@ -3196,11 +3275,11 @@ reply_alloc_info(void *vair) case ERTS_ALC_INFO_A_MSEG_ALLOC: #if HAVE_ERTS_MSEG alloc_atom = erts_bld_atom(hpp, szp, "mseg_alloc"); - ainfo = erts_mseg_info(sched_id, NULL, NULL, + ainfo = erts_mseg_info(tix, NULL, NULL, hpp != NULL, air->only_sz, hpp, szp); ainfo = erts_bld_tuple(hpp, szp, 3, alloc_atom, - make_small(sched_id), + make_small(tix), ainfo); ai_list = erts_bld_cons(hpp, szp, ainfo, ai_list); #endif @@ -3209,7 +3288,7 @@ reply_alloc_info(void *vair) if (erts_allctrs_info[ai].thr_spec) { alloc_atom = erts_bld_atom(hpp, szp, (char *) ERTS_ALC_A2AD(ai)); - allctr = erts_allctr_thr_spec[ai].allctr[sched_id]; + allctr = erts_allctr_thr_spec[ai].allctr[tix]; ainfo = info_func(allctr, air->internal, hpp != NULL, NULL, NULL, hpp, szp); ai_list = erts_bld_cons(hpp, szp, @@ -3217,7 +3296,7 @@ reply_alloc_info(void *vair) hpp, szp, 3, alloc_atom, - make_small(sched_id), + make_small(tix), ainfo), ai_list); } @@ -3226,7 +3305,7 @@ reply_alloc_info(void *vair) msg = erts_bld_tuple(hpp, szp, 3, ref_copy, - make_small(sched_id), + make_small(tix), ai_list); } @@ -3245,7 +3324,7 @@ reply_alloc_info(void *vair) erts_queue_message(rp, rp_locks, mp, msg, am_system); - if (air->req_sched == sched_id) + if (air->req_sched == tix) rp_locks &= ~ERTS_PROC_LOCK_MAIN; erts_proc_unlock(rp, rp_locks); @@ -3325,13 +3404,14 @@ erts_request_alloc_info(struct process *c_p, air->allocs[airix] = ERTS_ALC_A_INVALID; erts_atomic32_init_nob(&air->refc, - (erts_aint32_t) erts_no_schedulers); + (erts_aint32_t) erts_no_aux_work_threads-1); - erts_proc_add_refc(c_p, (Sint) erts_no_schedulers); + erts_proc_add_refc(c_p, (Sint) erts_no_aux_work_threads-1); - if (erts_no_schedulers > 1) + if (erts_no_aux_work_threads > 2) erts_schedule_multi_misc_aux_work(1, - erts_no_schedulers, + 1, + erts_no_aux_work_threads-1, reply_alloc_info, (void *) air); diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h index 831e7ab0a7..dffcef7370 100644 --- a/erts/emulator/beam/erl_alloc.h +++ b/erts/emulator/beam/erl_alloc.h @@ -107,10 +107,9 @@ typedef struct { typedef struct { Allctr_t *deallctr[ERTS_ALC_A_MAX+1]; - int pref_ix[ERTS_ALC_A_MAX+1]; - int flist_ix[ERTS_ALC_A_MAX+1]; - int pre_alc_ix; -} ErtsSchedAllocData; + int delayed_dealloc_handler; + int alc_ix; +} ErtsThrAllocData; void erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop); void erts_alloc_late_init(void); @@ -147,11 +146,14 @@ typedef struct { void *extra; } ErtsAllocatorFunctions_t; +extern erts_tsd_key_t erts_thr_alloc_data_key; extern ErtsAllocatorFunctions_t ERTS_WRITE_UNLIKELY(erts_allctrs[ERTS_ALC_A_MAX+1]); extern ErtsAllocatorInfo_t ERTS_WRITE_UNLIKELY(erts_allctrs_info[ERTS_ALC_A_MAX+1]); +extern Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_alloc_instances); + typedef struct { int enabled; int dd; @@ -175,10 +177,12 @@ void erts_allctr_wrapper_pre_lock(void); void erts_allctr_wrapper_pre_unlock(void); void erts_alloc_register_scheduler(void *vesdp); -void erts_alloc_scheduler_handle_delayed_dealloc(void *vesdp, - int *need_thr_progress, - ErtsThrPrgrVal *thr_prgr_p, - int *more_work); +void erts_alloc_register_delayed_dealloc_handler_thread(ErtsThrAllocData *tadp, + int ix); +void erts_alloc_handle_delayed_dealloc(ErtsThrAllocData *thr_alloc_data, + int *need_thr_progress, + ErtsThrPrgrVal *thr_prgr_p, + int *more_work); erts_aint32_t erts_alloc_fix_alloc_shrink(int ix, erts_aint32_t flgs); __decl_noreturn void erts_alloc_enomem(ErtsAlcType_t,Uint) @@ -230,6 +234,8 @@ int erts_is_allctr_wrapper_prelocked(void); #ifdef ERTS_HAVE_IS_IN_LITERAL_RANGE int erts_is_in_literal_range(void* ptr); #endif +ErtsThrAllocData *erts_get_thr_alloc_data(void); +int erts_get_thr_alloc_ix(void); #endif /* #if !ERTS_ALC_DO_INLINE */ @@ -321,6 +327,21 @@ int erts_is_allctr_wrapper_prelocked(void) && !!erts_tsd_get(erts_allctr_prelock_tsd_key); /* by me */ } +ERTS_ALC_INLINE +ErtsThrAllocData *erts_get_thr_alloc_data(void) +{ + return (ErtsThrAllocData *) erts_tsd_get(erts_thr_alloc_data_key); +} + +ERTS_ALC_INLINE +int erts_get_thr_alloc_ix(void) +{ + ErtsThrAllocData *tadp = (ErtsThrAllocData *) erts_tsd_get(erts_thr_alloc_data_key); + if (!tadp) + return 0; + return tadp->alc_ix; +} + #ifdef ERTS_HAVE_IS_IN_LITERAL_RANGE ERTS_ALC_FORCE_INLINE @@ -345,8 +366,6 @@ int erts_is_in_literal_range(void* ptr) #endif /* #if ERTS_ALC_DO_INLINE || defined(ERTS_ALC_INTERNAL__) */ -#define ERTS_ALC_GET_THR_IX() ((int) erts_get_scheduler_id()) - typedef void (*erts_alloc_verify_func_t)(Allctr_t *); erts_alloc_verify_func_t diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index ac3e9fe967..0c405a87b8 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -1715,7 +1715,7 @@ get_pref_allctr(void *extra) ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int pref_ix; - pref_ix = ERTS_ALC_GET_THR_IX(); + pref_ix = erts_get_thr_alloc_ix(); ERTS_CT_ASSERT(sizeof(UWord) == sizeof(Allctr_t *)); ASSERT(0 <= pref_ix && pref_ix < tspec->size); @@ -6047,7 +6047,7 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) Allctr_t *allctr; void *res; - ix = ERTS_ALC_GET_THR_IX(); + ix = erts_get_thr_alloc_ix(); ASSERT(0 <= ix && ix < tspec->size); @@ -6174,7 +6174,7 @@ erts_alcu_free_thr_spec(ErtsAlcType_t type, void *extra, void *p) int ix; Allctr_t *allctr; - ix = ERTS_ALC_GET_THR_IX(); + ix = erts_get_thr_alloc_ix(); ASSERT(0 <= ix && ix < tspec->size); @@ -6460,7 +6460,7 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, Allctr_t *allctr; void *res; - ix = ERTS_ALC_GET_THR_IX(); + ix = erts_get_thr_alloc_ix(); ASSERT(0 <= ix && ix < tspec->size); @@ -6497,7 +6497,7 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, Allctr_t *allctr; void *res; - ix = ERTS_ALC_GET_THR_IX(); + ix = erts_get_thr_alloc_ix(); ASSERT(0 <= ix && ix < tspec->size); @@ -7371,7 +7371,7 @@ static int blockscan_sweep_cpool(blockscan_t *state) } static int blockscan_get_specific_allocator(int allocator_num, - int sched_id, + int aux_work_tid, Allctr_t **out) { ErtsAllocatorInfo_t *ai; @@ -7379,7 +7379,7 @@ static int blockscan_get_specific_allocator(int allocator_num, ASSERT(allocator_num >= ERTS_ALC_A_MIN && allocator_num <= ERTS_ALC_A_MAX); - ASSERT(sched_id >= 0 && sched_id <= erts_no_schedulers); + ASSERT(0 <= aux_work_tid && aux_work_tid < erts_no_aux_work_threads); ai = &erts_allctrs_info[allocator_num]; @@ -7388,7 +7388,7 @@ static int blockscan_get_specific_allocator(int allocator_num, } if (!ai->thr_spec) { - if (sched_id != 0) { + if (aux_work_tid != 0) { /* Only thread-specific allocators can be scanned on a specific * scheduler. */ return 0; @@ -7399,9 +7399,9 @@ static int blockscan_get_specific_allocator(int allocator_num, } else { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t*)ai->extra; - ASSERT(sched_id < tspec->size); + ASSERT(aux_work_tid < tspec->size); - allocator = tspec->allctr[sched_id]; + allocator = tspec->allctr[aux_work_tid]; } *out = allocator; @@ -7411,14 +7411,9 @@ static int blockscan_get_specific_allocator(int allocator_num, static void blockscan_sched_trampoline(void *arg) { - ErtsAlcuBlockscanYieldData *yield; - ErtsSchedulerData *esdp; - blockscan_t *scanner; - - esdp = erts_get_scheduler_data(); - scanner = (blockscan_t*)arg; - - yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + ErtsAuxWorkData *awdp = erts_get_aux_work_data(); + ErtsAlcuBlockscanYieldData *yield = &awdp->yield.alcu_blockscan; + blockscan_t *scanner = (blockscan_t*)arg; ASSERT((yield->last == NULL) == (yield->current == NULL)); @@ -7435,20 +7430,14 @@ static void blockscan_sched_trampoline(void *arg) scanner->scanner_queue = NULL; yield->last = scanner; - erts_notify_new_aux_yield_work(esdp); + erts_more_yield_aux_work(awdp); } static void blockscan_dispatch(blockscan_t *scanner, Process *owner, - Allctr_t *allocator, int sched_id) + Allctr_t *allocator, int aux_work_tid) { ASSERT(erts_get_scheduler_id() != 0); - if (sched_id == 0) { - /* Global instances are always handled on the current scheduler. */ - sched_id = ERTS_ALC_GET_THR_IX(); - ASSERT(allocator->thread_safe); - } - scanner->allocator = allocator; scanner->process = owner; @@ -7464,22 +7453,20 @@ static void blockscan_dispatch(blockscan_t *scanner, Process *owner, scanner->next_op = blockscan_sweep_mbcs; } - /* Aux yield jobs can only be set up while running on the scheduler that - * services them, so we move there before continuing. + /* Aux yield jobs can only be set up while running on the aux work + * thread that services them, so we move there before continuing. * - * We can't drive the scan itself through this since the scheduler will + * We can't drive the scan itself through this since the aux work thread will * always finish *all* misc aux work in one go which makes it impossible to * yield. */ - erts_schedule_misc_aux_work(sched_id, blockscan_sched_trampoline, scanner); + erts_schedule_misc_aux_work(aux_work_tid, blockscan_sched_trampoline, scanner); } -int erts_handle_yielded_alcu_blockscan(ErtsSchedulerData *esdp, - ErtsAlcuBlockscanYieldData *yield) +int erts_handle_yielded_alcu_blockscan(ErtsAuxWorkData *awdp) { + ErtsAlcuBlockscanYieldData *yield = &awdp->yield.alcu_blockscan; blockscan_t *scanner = yield->current; - (void)esdp; - ASSERT((yield->last == NULL) == (yield->current == NULL)); if (scanner) { @@ -7508,14 +7495,10 @@ int erts_handle_yielded_alcu_blockscan(ErtsSchedulerData *esdp, return 0; } -void erts_alcu_sched_spec_data_init(ErtsSchedulerData *esdp) +void erts_alcu_blockscan_init(ErtsAuxWorkData *awdp) { - ErtsAlcuBlockscanYieldData *yield; - - yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); - - yield->current = NULL; - yield->last = NULL; + awdp->yield.alcu_blockscan.current = NULL; + awdp->yield.alcu_blockscan.last = NULL; } /* ------------------------------------------------------------------------- */ @@ -7815,7 +7798,7 @@ static void gather_ahist_abort(void *arg) } int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, - int sched_id, int hist_width, + int aux_work_tid, int hist_width, UWord hist_start, Eterm ref) { gather_ahist_t *gather_state; @@ -7825,7 +7808,7 @@ int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, ASSERT(is_internal_ref(ref)); if (!blockscan_get_specific_allocator(allocator_num, - sched_id, + aux_work_tid, &allocator)) { return 0; } @@ -7846,7 +7829,7 @@ int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, gather_state->hist_slot_count = hist_width; gather_state->process = p; - blockscan_dispatch(scanner, p, allocator, sched_id); + blockscan_dispatch(scanner, p, allocator, aux_work_tid); return 1; } @@ -8138,7 +8121,7 @@ static void gather_cinfo_abort(void *arg) } int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, - int sched_id, int hist_width, + int aux_work_tid, int hist_width, UWord hist_start, Eterm ref) { gather_cinfo_t *gather_state; @@ -8148,7 +8131,7 @@ int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, ASSERT(is_internal_ref(ref)); if (!blockscan_get_specific_allocator(allocator_num, - sched_id, + aux_work_tid, &allocator)) { return 0; } @@ -8170,7 +8153,7 @@ int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, gather_state->hist_slot_count = hist_width; gather_state->process = p; - blockscan_dispatch(scanner, p, allocator, sched_id); + blockscan_dispatch(scanner, p, allocator, aux_work_tid); return 1; } diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index 45e5bfc211..9444a4e32b 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -259,9 +259,10 @@ typedef struct { struct alcu_blockscan *last; } ErtsAlcuBlockscanYieldData; -int erts_handle_yielded_alcu_blockscan(struct ErtsSchedulerData_ *esdp, - ErtsAlcuBlockscanYieldData *yield); -void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp); +struct ErtsAuxWorkData_; + +int erts_handle_yielded_alcu_blockscan(struct ErtsAuxWorkData_ *awdp); +void erts_alcu_blockscan_init(struct ErtsAuxWorkData_ *awdp); #endif /* !ERL_ALLOC_UTIL__ */ diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 8466f88fd5..17141f3551 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -5127,7 +5127,7 @@ static BIF_RETTYPE gather_histograms_helper(Process * c_p, Eterm arg_tuple, int gather(Process *, int, int, int, UWord, Eterm)) { - SWord hist_start, hist_width, sched_id; + SWord hist_start, hist_width, aux_work_tid; int msg_count, alloc_num; Eterm *args; @@ -5147,15 +5147,15 @@ gather_histograms_helper(Process * c_p, Eterm arg_tuple, BIF_ERROR(c_p, BADARG); } - sched_id = signed_val(args[2]); + aux_work_tid = signed_val(args[2]); hist_width = signed_val(args[3]); hist_start = signed_val(args[4]); - if (sched_id < 0 || sched_id > erts_no_schedulers) { + if (aux_work_tid < 0 || erts_no_aux_work_threads <= aux_work_tid) { BIF_ERROR(c_p, BADARG); } - msg_count = gather(c_p, alloc_num, sched_id, hist_width, hist_start, args[5]); + msg_count = gather(c_p, alloc_num, aux_work_tid, hist_width, hist_start, args[5]); BIF_RET(make_small(msg_count)); } diff --git a/erts/emulator/beam/erl_bif_trace.c b/erts/emulator/beam/erl_bif_trace.c index 8024687c5e..e01ad49f79 100644 --- a/erts/emulator/beam/erl_bif_trace.c +++ b/erts/emulator/beam/erl_bif_trace.c @@ -2359,6 +2359,7 @@ trace_delivered_1(BIF_ALIST_1) (erts_aint32_t) erts_no_schedulers); erts_proc_add_refc(BIF_P, 1); erts_schedule_multi_misc_aux_work(0, + 1, erts_no_schedulers, reply_trace_delivered_all, (void *) tdarp); diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h index 89d6e358fb..4c6335dc45 100644 --- a/erts/emulator/beam/erl_bif_unique.h +++ b/erts/emulator/beam/erl_bif_unique.h @@ -79,7 +79,7 @@ Eterm erts_debug_make_unique_integer(Process *c_p, Eterm etval1); -ERTS_GLB_INLINE void erts_set_ref_numbers(Uint32 *ref, Uint32 thr_id, Uint64 value); +ERTS_GLB_INLINE void erts_set_ref_numbers(Uint32 ref[ERTS_REF_NUMBERS], Uint32 thr_id, Uint64 value); ERTS_GLB_INLINE Uint32 erts_get_ref_numbers_thr_id(Uint32 *ref); ERTS_GLB_INLINE int erts_is_ref_numbers_magic(Uint32 *ref); ERTS_GLB_INLINE int erts_is_pid_ref_numbers(Uint32 *ref); diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index 2990f4e775..3cdbc66fb9 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -1419,7 +1419,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term, erts_bin_offset = 8*sb->size + sb->bitsize; if (unit > 1) { if ((unit == 8 && (erts_bin_offset & 7) != 0) || - (erts_bin_offset % unit) != 0) { + (unit != 8 && (erts_bin_offset % unit) != 0)) { goto badarg; } } @@ -1509,7 +1509,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term, erts_bin_offset = 8*binary_size(bin) + bitsize; if (unit > 1) { if ((unit == 8 && (erts_bin_offset & 7) != 0) || - (erts_bin_offset % unit) != 0) { + (unit != 8 && (erts_bin_offset % unit) != 0)) { goto badarg; } } diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index 06cf04f9ee..08fc410414 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -3197,12 +3197,21 @@ ets_all_reply(ErtsSchedulerData *esdp, ErtsEtsAllReq **reqpp, } int -erts_handle_yielded_ets_all_request(ErtsSchedulerData *esdp, - ErtsEtsAllYieldData *eaydp) +erts_handle_yielded_ets_all_request(ErtsAuxWorkData *awdp) { - int ix = (int) esdp->no - 1; - int yc = ERTS_ETS_ALL_TB_YCNT; + ErtsSchedulerData *esdp; + ErtsEtsAllYieldData *eaydp; + int ix, yc; + esdp = awdp->esdp; + /* only on normal scheduler threads... */ + if (!esdp || esdp->type != ERTS_SCHED_NORMAL) + return 0; + + eaydp = &awdp->yield.ets_all; + ix = (int) esdp->no - 1; + yc = ERTS_ETS_ALL_TB_YCNT; + while (1) { if (!eaydp->ongoing) { ErtsEtsAllReq *ongoing; @@ -3249,7 +3258,7 @@ handle_ets_all_request(void *vreq) eayp->ongoing = req; eayp->hfrag = hf; eayp->tab = tb; - erts_notify_new_aux_yield_work(esdp); + erts_more_yield_aux_work(&esdp->aux_work_data); } } else { @@ -3282,6 +3291,7 @@ BIF_RETTYPE ets_internal_request_all_0(BIF_ALIST_0) if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, + 1, erts_no_schedulers, handle_ets_all_request, (void *) req); @@ -5353,7 +5363,7 @@ static void lcnt_update_db_locks_per_sched(void *enable) { } void erts_lcnt_update_db_locks(int enable) { - erts_schedule_multi_misc_aux_work(0, erts_no_schedulers, + erts_schedule_multi_misc_aux_work(0, 1, erts_no_schedulers, &lcnt_update_db_locks_per_sched, (void*)(UWord)enable); } #endif /* ERTS_ENABLE_LOCK_COUNT */ diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h index 2f6debe72a..a18be4d168 100644 --- a/erts/emulator/beam/erl_db.h +++ b/erts/emulator/beam/erl_db.h @@ -78,9 +78,7 @@ typedef struct { } DbTableRelease; struct ErtsSchedulerData_; -int erts_handle_yielded_ets_all_request(struct ErtsSchedulerData_ *esdp, - ErtsEtsAllYieldData *eadp); - +int erts_handle_yielded_ets_all_request(ErtsAuxWorkData *awdp); void erts_ets_sched_spec_data_init(struct ErtsSchedulerData_ *esdp); /* diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 07a7b6c9e8..cf56d21f1e 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -3383,6 +3383,7 @@ erts_gc_info_request(Process *c_p) if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, + 1, erts_no_schedulers, reply_gc_info, (void *) gcirp); diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index cc2a168813..bb922cb412 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -643,7 +643,7 @@ void erts_usage(void) #ifdef BEAMASM erts_fprintf(stderr, "-JDdump bool enable or disable dumping of generated assembly code for each module loaded\n"); - erts_fprintf(stderr, "-JDperf bool enable or disable support for perf on Linux\n"); + erts_fprintf(stderr, "-JPperf bool enable or disable support for perf on Linux\n"); erts_fprintf(stderr, "\n"); #endif @@ -823,6 +823,7 @@ early_init(int *argc, char **argv) /* int dirty_cpu_scheds_pctg = 100; int dirty_cpu_scheds_onln_pctg = 100; int dirty_io_scheds; + int aux_threads = 1; int max_reader_groups; int reader_groups; int max_decentralized_counter_groups; @@ -1227,11 +1228,12 @@ early_init(int *argc, char **argv) /* erts_no_dirty_cpu_schedulers = no_dirty_cpu_schedulers = dirty_cpu_scheds; no_dirty_cpu_schedulers_online = dirty_cpu_scheds_online; erts_no_dirty_io_schedulers = no_dirty_io_schedulers = dirty_io_scheds; - erts_early_init_scheduling(no_schedulers); + erts_early_init_scheduling(no_schedulers + 1 + dirty_cpu_scheds); alloc_opts.ncpu = ncpu; erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes) -M flags. */ + aux_threads += erts_no_dirty_alloc_instances; /* Require allocators */ erts_init_check_io(argc, argv); @@ -1241,7 +1243,7 @@ early_init(int *argc, char **argv) /* * * * Managed threads: * ** Scheduler threads (see erl_process.c) - * ** Aux thread (see erl_process.c) + * ** Aux threads (see erl_process.c) * ** Sys message dispatcher thread (see erl_trace.c) * ** IO Poll threads (see erl_check_io.c) * @@ -1250,11 +1252,13 @@ early_init(int *argc, char **argv) /* * ** Dirty scheduler threads */ erts_thr_progress_init(no_schedulers, - no_schedulers+2+erts_no_poll_threads, - erts_async_max_threads + - erts_no_dirty_cpu_schedulers + - erts_no_dirty_io_schedulers - ); + (no_schedulers + + aux_threads + + 1 + + erts_no_poll_threads), + (erts_async_max_threads + + erts_no_dirty_cpu_schedulers + + erts_no_dirty_io_schedulers)); erts_thr_q_init(); erts_init_utils(); erts_early_init_cpu_topology(no_schedulers, diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c index a6eecfabdc..4b7bb22a31 100644 --- a/erts/emulator/beam/erl_message.c +++ b/erts/emulator/beam/erl_message.c @@ -1072,6 +1072,7 @@ void erts_factory_proc_init(ErtsHeapFactory* factory, Process* p) factory->mode = FACTORY_HALLOC; factory->p = p; factory->hp_start = HEAP_TOP(p); + factory->original_htop = factory->hp_start; factory->hp = factory->hp_start; if (factory->hp) factory->hp_end = HEAP_LIMIT(p); @@ -1097,6 +1098,11 @@ void erts_factory_proc_prealloc_init(ErtsHeapFactory* factory, ErlHeapFragment *bp = p->mbuf; factory->mode = FACTORY_HALLOC; factory->p = p; + factory->original_htop = HEAP_TOP(p); + /* + factory->hp_start is a pointer to somewhere in the data area of + a heap fragment or to the main heap. + */ factory->hp_start = HAlloc(p, size); factory->hp = factory->hp_start; factory->hp_end = factory->hp_start + size; @@ -1163,6 +1169,12 @@ erts_factory_message_create(ErtsHeapFactory* factory, ASSERT(ohp == &proc->off_heap); factory->mode = FACTORY_HALLOC; factory->p = proc; + /* + If on_heap is set then hp must be on the process main heap. + */ + factory->original_htop = hp; + ASSERT(HEAP_START(proc) <= factory->original_htop); + ASSERT(factory->original_htop <= HEAP_LIMIT(proc)); factory->heap_frags_saved = proc->mbuf; factory->heap_frags_saved_used = proc->mbuf ? proc->mbuf->used_size : 0; } @@ -1495,10 +1507,10 @@ void erts_factory_undo(ErtsHeapFactory* factory) /* Rollback heap top */ - if (HEAP_START(factory->p) <= factory->hp_start - && factory->hp_start <= HEAP_LIMIT(factory->p)) { - HEAP_TOP(factory->p) = factory->hp_start; - } + ASSERT(HEAP_START(factory->p) <= factory->original_htop); + ASSERT(factory->original_htop <= HEAP_LIMIT(factory->p)); + HEAP_TOP(factory->p) = factory->original_htop; + /* Fix last heap frag */ if (factory->heap_frags_saved) { diff --git a/erts/emulator/beam/erl_message.h b/erts/emulator/beam/erl_message.h index 89017a3998..85a16cb605 100644 --- a/erts/emulator/beam/erl_message.h +++ b/erts/emulator/beam/erl_message.h @@ -74,7 +74,27 @@ typedef struct { FACTORY_TMP } mode; Process* p; + /* + If the factory is initialized with erts_factory_proc_prealloc_init, + hp_start points to the top of the main heap if the preallocated data + fits in the main heap and otherwise it points to somewhere in the + data area of a heap fragment. If the factory is initialized with any + of the other init functions that sets the mode to FACTORY_HALLOC, + hp_start and original_htop always have the same value. + + When erts_factory_proc_prealloc_init is used for initialization the + preallocated data might be allocated in an existing heap fragment but + data that is later allocated with erts_produce_heap might fit in the + main heap, so both hp_start and original_htop are needed to correctly + restore the heap in the erts_factory_undo function. + */ Eterm* hp_start; + /* + original_htop stores the top of the main heap at the time + the factory was initialized and is used to reset the heap + state if an erts_factory_undo call is made. + */ + Eterm* original_htop; Eterm* hp; Eterm* hp_end; ErtsMessage *message; diff --git a/erts/emulator/beam/erl_msacc.c b/erts/emulator/beam/erl_msacc.c index ed47cb0b83..89fdaf77cd 100644 --- a/erts/emulator/beam/erl_msacc.c +++ b/erts/emulator/beam/erl_msacc.c @@ -364,20 +364,17 @@ erts_msacc_request(Process *c_p, int action, Eterm *threads) msaccrp->ref = STORE_NC(&hp, NULL, ref); msaccrp->req_sched = esdp->no; - *threads = erts_no_schedulers; - *threads += 1; /* aux thread */ + *threads = erts_no_aux_work_threads; erts_atomic32_init_nob(&msaccrp->refc,(erts_aint32_t)*threads); erts_proc_add_refc(c_p, *threads); - if (erts_no_schedulers > 1) - erts_schedule_multi_misc_aux_work(1, - erts_no_schedulers, - reply_msacc, - (void *) msaccrp); - /* aux thread */ - erts_schedule_misc_aux_work(0, reply_msacc, (void *) msaccrp); + erts_schedule_multi_misc_aux_work(1, + 0, + erts_no_aux_work_threads-1, + reply_msacc, + (void *) msaccrp); /* Manage unmanaged threads */ switch (action) { diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index aabd9f7aa6..46bdbb277d 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -252,6 +252,7 @@ handle_move_msgq_off_heap(Process *c_p, static void send_cla_reply(Process *c_p, ErtsMessage *sig, Eterm to, Eterm req_id, Eterm result); +static void handle_missing_spawn_reply(Process *c_p, ErtsMonitor *omon); #ifdef ERTS_PROC_SIG_HARD_DEBUG #define ERTS_PROC_SIG_HDBG_PRIV_CHKQ(P, T, NMN) \ @@ -1927,7 +1928,7 @@ reply_dist_unlink_ack(Process *c_p, ErtsSigDistUnlinkOp *sdulnk) */ if (dep != erts_this_dist_entry && sdulnk->nodename == dep->sysname) { ErtsDSigSendContext ctx; - int code = erts_dsig_prepare(&ctx, dep, c_p, 0, + int code = erts_dsig_prepare(&ctx, dep, NULL, 0, ERTS_DSP_NO_LOCK, 1, 1, 0); switch (code) { case ERTS_DSIG_PREP_CONNECTED: @@ -4619,14 +4620,14 @@ handle_dist_spawn_reply(Process *c_p, ErtsSigRecvTracing *tracing, if (dep != erts_this_dist_entry && dist->nodename == dep->sysname) { ErtsDSigSendContext ctx; - int code = erts_dsig_prepare(&ctx, dep, c_p, 0, + int code = erts_dsig_prepare(&ctx, dep, NULL, 0, ERTS_DSP_NO_LOCK, 1, 1, 0); switch (code) { case ERTS_DSIG_PREP_CONNECTED: case ERTS_DSIG_PREP_PENDING: if (dist->connection_id == ctx.connection_id) { code = erts_dsig_send_exit_tt(&ctx, - c_p->common.id, + c_p, result, am_abandoned, SEQ_TRACE_TOKEN(c_p)); @@ -5076,11 +5077,27 @@ erts_proc_sig_handle_incoming(Process *c_p, erts_aint32_t *statep, xsigd->u.ref); if (omon) { ASSERT(erts_monitor_is_origin(omon)); + if (omon->type == ERTS_MON_TYPE_ALIAS) { + omon = NULL; + break; + } + if (omon->flags & ERTS_ML_FLG_SPAWN_PENDING) { + handle_missing_spawn_reply(c_p, omon); + /* + * We leave the pending spawn monitor as is, + * so that the nodedown will trigger an error + * spawn_reply... + */ + omon = NULL; + cnt += 4; + break; + } mdp = erts_monitor_to_data(omon); if (omon->type == ERTS_MON_TYPE_DIST_PROC) { if (erts_monitor_dist_delete(&mdp->u.target)) tmon = &mdp->u.target; } + ASSERT(!(omon->flags & ERTS_ML_FLGS_SPAWN)); cnt += convert_prepared_down_message(c_p, sig, xsigd->message, next_nm_sig); @@ -6981,6 +6998,42 @@ handle_msg_tracing(Process *c_p, ErtsSigRecvTracing *tracing, return 0; } +static void +handle_missing_spawn_reply(Process *c_p, ErtsMonitor *omon) +{ + ErtsMonitorData *mdp; + ErtsMonitorDataExtended *mdep; + erts_dsprintf_buf_t *dsbufp; + Eterm nodename; + DistEntry *dep; + + /* Terminate connection to the node and report it... */ + + if (omon->type != ERTS_MON_TYPE_DIST_PROC) + ERTS_INTERNAL_ERROR("non-distributed missing spawn_reply"); + + mdp = erts_monitor_to_data(omon); + ASSERT(mdp->origin.flags & ERTS_ML_FLG_EXTENDED); + mdep = (ErtsMonitorDataExtended *) mdp; + ASSERT(mdep->dist); + nodename = mdep->dist->nodename; + ASSERT(is_atom(nodename)); + + dep = erts_find_dist_entry(nodename); + if (dep) + erts_kill_dist_connection(dep, mdep->dist->connection_id); + + dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, + "Missing 'spawn_reply' signal from the node %T " + "detected by %T on the node %T. The node %T " + "probably suffers from the bug with ticket id " + "OTP-17737.", + nodename, c_p->common.id, + erts_this_dist_entry->sysname, nodename); + erts_send_error_to_logger_nogl(dsbufp); +} + Uint erts_proc_sig_prep_msgq_for_inspection(Process *c_p, Process *rp, diff --git a/erts/emulator/beam/erl_proc_sig_queue.h b/erts/emulator/beam/erl_proc_sig_queue.h index 236b20ab4d..90a87570c2 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.h +++ b/erts/emulator/beam/erl_proc_sig_queue.h @@ -1457,6 +1457,22 @@ ERTS_GLB_INLINE void erts_msgq_set_save_first(Process *c_p); /** * + * @brief Remove a message from the message queue and set + * the save pointer to the start of the message queue. + * + * + * @param[in] c_p Pointer to process struct of + * currently executing process. + * + * @param[in] msgp A pointer to the message to + * remove from the message queue. + * + */ +ERTS_GLB_INLINE void erts_msgq_unlink_msg_set_save_first(Process *c_p, + ErtsMessage *msgp); + +/** + * * @brief Advance the save pointer to the next message in the * message queue. * @@ -1865,6 +1881,21 @@ erts_msgq_set_save_first(Process *c_p) } ERTS_GLB_INLINE void +erts_msgq_unlink_msg_set_save_first(Process *c_p, ErtsMessage *msgp) +{ + ErtsMessage *sigp = msgp->next; + ERTS_HDBG_CHECK_SIGNAL_PRIV_QUEUE__(c_p, 0, "before"); + *c_p->sig_qs.save = sigp; + c_p->sig_qs.len--; + if (!sigp) + c_p->sig_qs.last = c_p->sig_qs.save; + else if (ERTS_SIG_IS_RECV_MARKER(sigp)) + ((ErtsRecvMarker *) sigp)->prev_next = c_p->sig_qs.save; + erts_msgq_set_save_first(c_p); + ERTS_HDBG_CHECK_SIGNAL_PRIV_QUEUE__(c_p, 0, "after"); +} + +ERTS_GLB_INLINE void erts_msgq_set_save_next(Process *c_p) { ErtsMessage *sigp = (*c_p->sig_qs.save)->next; diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index eacbb90770..8313a4863e 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -137,6 +137,7 @@ Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers); Uint ERTS_WRITE_UNLIKELY(erts_no_total_schedulers); Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers) = 0; Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers) = 0; +int ERTS_WRITE_UNLIKELY(erts_no_aux_work_threads); static char *erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_NO_FLAGS] = {0}; int erts_aux_work_no_flags = ERTS_SSI_AUX_WORK_NO_FLAGS; @@ -171,7 +172,12 @@ sched_get_busy_wait_params(ErtsSchedulerData *esdp) return &sched_busy_wait_params[esdp->type]; } -static ErtsAuxWorkData *aux_thread_aux_work_data; +typedef union { + ErtsAuxWorkData data; + char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAuxWorkData))]; +} ErtsAlignedAuxWorkData; + +static ErtsAlignedAuxWorkData *ERTS_WRITE_UNLIKELY(aligned_aux_work_data); #define ERTS_SCHDLR_SSPND_CHNG_NMSB (((erts_aint32_t) 1) << 0) #define ERTS_SCHDLR_SSPND_CHNG_MSB (((erts_aint32_t) 1) << 1) @@ -485,8 +491,8 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, ERTS_ALC_T_PROC_LIST) #define ERTS_SCHED_SLEEP_INFO_IX(IX) \ - (ASSERT(((int)-1) <= ((int) (IX)) \ - && ((int) (IX)) < ((int) erts_no_schedulers)), \ + (ASSERT((((int)-1) <= ((int) (IX))) \ + && (((int) (IX)) < (erts_no_aux_work_threads-1))), \ &aligned_sched_sleep_info[(IX)].ssi) #define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \ (ASSERT(0 <= ((int) (IX)) \ @@ -555,7 +561,7 @@ static void print_function_from_pc(fmtfn_t to, void *to_arg, ErtsCodePtr x); static int stack_element_dump(fmtfn_t to, void *to_arg, Eterm* sp, int yreg); static void aux_work_timeout(void *unused); -static void aux_work_timeout_early_init(int no_schedulers); +static void aux_work_timeout_early_init(int max_no_aux_work_threads); static void setup_aux_work_timer(ErtsSchedulerData *esdp); static int execute_sys_tasks(Process *c_p, @@ -1393,6 +1399,7 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable, if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, + 1, erts_no_schedulers, reply_sched_wall_time, (void *) swtrp); @@ -1457,6 +1464,7 @@ Eterm erts_system_check_request(Process *c_p) { if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, + 1, erts_no_schedulers, reply_system_check, (void *) scrp); @@ -1814,11 +1822,9 @@ init_misc_aux_work(void) misc_aux_work_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_MISC_AUX_WORK_Q, sizeof(erts_algnd_misc_aux_work_q_t) - * (erts_no_schedulers+1)); - - ix = 0; /* aux_thread + schedulers */ + * erts_no_aux_work_threads); - for (; ix <= erts_no_schedulers; ix++) { + for (ix = 0; ix < erts_no_aux_work_threads; ix++) { qinit.arg = (void *) ERTS_SCHED_SLEEP_INFO_IX(ix-1); erts_thr_q_initialize(&misc_aux_work_queues[ix].q, &qinit); } @@ -1849,7 +1855,7 @@ handle_misc_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) { - ErtsThrQ_t *q = &misc_aux_work_queues[awdp->sched_id].q; + ErtsThrQ_t *q = &misc_aux_work_queues[awdp->aux_work_tid].q; unset_aux_work_flags_mb(awdp->ssi, ERTS_SSI_AUX_WORK_MISC); while (1) { @@ -1877,23 +1883,36 @@ handle_misc_aux_work_thr_prgr(ErtsAuxWorkData *awdp, unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_MISC_THR_PRGR); - return misc_aux_work_clean(&misc_aux_work_queues[awdp->sched_id].q, + return misc_aux_work_clean(&misc_aux_work_queues[awdp->aux_work_tid].q, awdp, aux_work & ~ERTS_SSI_AUX_WORK_MISC_THR_PRGR); } +static ERTS_INLINE int +get_aux_work_tid(void) +{ + /* + * All aux work threads are registered as delayed dealloc + * handlers, and their allocation index equals their tid. + */ + ErtsThrAllocData *tadp = erts_get_thr_alloc_data(); + if (!tadp || !tadp->delayed_dealloc_handler) + return -1; /* Not an aux work thread */ + return tadp->alc_ix; +} static ERTS_INLINE void -schedule_misc_aux_work(int sched_id, +schedule_misc_aux_work(int aux_work_tid, void (*func)(void *), void *arg) { ErtsThrQ_t *q; erts_misc_aux_work_t *mawp; - ASSERT(0 <= sched_id && sched_id <= erts_no_schedulers); + ASSERT(0 <= aux_work_tid + && aux_work_tid < erts_no_aux_work_threads); - q = &misc_aux_work_queues[sched_id].q; + q = &misc_aux_work_queues[aux_work_tid].q; mawp = misc_aux_work_alloc(); mawp->func = func; mawp->arg = arg; @@ -1901,37 +1920,53 @@ schedule_misc_aux_work(int sched_id, } void -erts_schedule_misc_aux_work(int sched_id, +erts_schedule_misc_aux_work(int aux_work_tid, void (*func)(void *), void *arg) { - schedule_misc_aux_work(sched_id, func, arg); + schedule_misc_aux_work(aux_work_tid, func, arg); } void erts_schedule_multi_misc_aux_work(int ignore_self, - int max_sched, + int min_tid, + int max_tid, void (*func)(void *), void *arg) { - int id, self = 0; - - if (ignore_self) { - ErtsSchedulerData *esdp = erts_get_scheduler_data(); + int tid, self; - /* ignore_self is meaningless on dirty schedulers since aux work can - * only run on normal schedulers, and their ids do not translate. */ - if(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { - self = (int)esdp->no; - } - } + /* + * Threads handling misc aux work are: + * * Normal scheduler threads + * * Aux work threads + * + * Tids corresponds to threads as follows: + * * tid == 0 + * Standard aux thread. This thread is always + * present on all systems. + * * 1 <= tid <= erts_no_schedulers + * Normal scheduler threads. There are always at + * least one normal scheduler. Tid equals scheduler + * id. + * * erts_no_schedulers < tid < erts_no_aux_work_threads + * Extra aux threads. Main purpose to handle + * delayed dealloc for allocator instances for + * dirty schedulers. May or may not exist. Maximum + * amount of these threads equals amount of dirty + * cpu schedulers. + */ - ASSERT(0 < max_sched && max_sched <= erts_no_schedulers); + ASSERT(0 <= min_tid && min_tid < erts_no_aux_work_threads); + ASSERT(0 <= max_tid && max_tid < erts_no_aux_work_threads); + ASSERT(min_tid <= max_tid); - for (id = 1; id <= max_sched; id++) { - if (id == self) + self = ignore_self ? get_aux_work_tid() : -1; + + for (tid = min_tid; tid <= max_tid; tid++) { + if (tid == self) continue; - schedule_misc_aux_work(id, func, arg); + schedule_misc_aux_work(tid, func, arg); } } @@ -2013,7 +2048,7 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC)); aux_work &= ~(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC); - res = erts_alloc_fix_alloc_shrink(awdp->sched_id, aux_work); + res = erts_alloc_fix_alloc_shrink(awdp->aux_work_tid, aux_work); if (res) { set_aux_work_flags(ssi, res); aux_work |= res; @@ -2060,10 +2095,10 @@ handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waitin unset_aux_work_flags_mb(ssi, ERTS_SSI_AUX_WORK_DD); ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_ALLOC); - erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp, - &need_thr_progress, - &wakeup, - &more_work); + erts_alloc_handle_delayed_dealloc(&awdp->alloc_data, + &need_thr_progress, + &wakeup, + &more_work); ERTS_MSACC_POP_STATE_M_X(); if (more_work) { if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD) @@ -2103,10 +2138,10 @@ handle_delayed_dealloc_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, i need_thr_progress = 0; more_work = 0; - erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp, - &need_thr_progress, - &wakeup, - &more_work); + erts_alloc_handle_delayed_dealloc(&awdp->alloc_data, + &need_thr_progress, + &wakeup, + &more_work); if (more_work) { set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD); unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR); @@ -2347,13 +2382,24 @@ setup_thr_debug_wait_completed(void *vproc) ErtsSchedulerData *esdp = erts_get_scheduler_data(); ErtsAuxWorkData *awdp; erts_aint32_t wait_flags, aux_work_flags; - awdp = esdp ? &esdp->aux_work_data : aux_thread_aux_work_data; + + if (esdp) + awdp = &esdp->aux_work_data; /* A normal scheduler... */ + else { + /* An aux thread... */ + ErtsThrAllocData *tadp = erts_get_thr_alloc_data(); + char *ptr; + ASSERT(tadp); + ptr = (char *) tadp; + ptr -= offsetof(ErtsAuxWorkData, alloc_data); + awdp = (ErtsAuxWorkData *) ptr; + } wait_flags = 0; aux_work_flags = ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED; if (debug_wait_completed_flags & ERTS_DEBUG_WAIT_COMPLETED_DEALLOCATIONS) { - erts_alloc_fix_alloc_shrink(awdp->sched_id, 0); + erts_alloc_fix_alloc_shrink(awdp->aux_work_tid, 0); wait_flags |= (ERTS_SSI_AUX_WORK_DD | ERTS_SSI_AUX_WORK_DD_THR_PRGR); aux_work_flags |= ERTS_SSI_AUX_WORK_DD; @@ -2383,19 +2429,15 @@ static void later_thr_debug_wait_completed(void *vlop) struct debug_lop *lop = vlop; if (erts_atomic32_dec_read_mb(&debug_wait_completed_count) == 1) { - erts_aint32_t count = (erts_aint32_t) erts_no_schedulers; - count += 1; /* aux thread */ + erts_aint32_t count = (erts_aint32_t) erts_no_aux_work_threads; erts_atomic32_set_nob(&debug_wait_completed_count, count); - /* scheduler threads */ + /* All scheduler threads as well as all aux threads... */ erts_schedule_multi_misc_aux_work(0, - erts_no_schedulers, + 0, + erts_no_aux_work_threads-1, setup_thr_debug_wait_completed, lop->proc); - /* aux_thread */ - erts_schedule_misc_aux_work(0, - setup_thr_debug_wait_completed, - lop->proc); } erts_free(ERTS_ALC_T_DEBUG, lop); } @@ -2435,6 +2477,7 @@ erts_debug_wait_completed(Process *c_p, int flags) /* First flush later-ops on all scheduler threads */ erts_schedule_multi_misc_aux_work(0, + 1, erts_no_schedulers, init_thr_debug_wait_completed, (void *) c_p); @@ -2507,11 +2550,27 @@ handle_reap_ports(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) } void -erts_notify_new_aux_yield_work(ErtsSchedulerData *esdp) +erts_more_yield_aux_work(ErtsAuxWorkData *awdp) { - ASSERT(esdp == erts_get_scheduler_data()); - /* Always called by the scheduler itself... */ - set_aux_work_flags_wakeup_nob(esdp->ssi, ERTS_SSI_AUX_WORK_YIELD); + /* Should *always* be called by the aux-work thread itself... */ + ASSERT(awdp && awdp->aux_work_tid == get_aux_work_tid()); + set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_YIELD); +} + +ErtsAuxWorkData * +erts_get_aux_work_data(void) +{ + ErtsAuxWorkData *awdp; + int tid = get_aux_work_tid(); + ASSERT(0 <= tid && tid < erts_no_aux_work_threads); + if (tid < 1) + awdp = tid == 0 ? &aligned_aux_work_data[0].data : NULL; + else if (tid <= erts_no_schedulers) + awdp = &erts_aligned_scheduler_data[tid-1].esd.aux_work_data; + else + awdp = &aligned_aux_work_data[tid - (int) erts_no_schedulers].data; + ASSERT(!awdp || awdp->aux_work_tid == tid); + return awdp; } static ERTS_INLINE erts_aint32_t @@ -2531,10 +2590,8 @@ handle_yield(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) /* Various yielding operations... */ - yield |= erts_handle_yielded_ets_all_request(awdp->esdp, - &awdp->yield.ets_all); - yield |= erts_handle_yielded_alcu_blockscan(awdp->esdp, - &awdp->yield.alcu_blockscan); + yield |= erts_handle_yielded_ets_all_request(awdp); + yield |= erts_handle_yielded_alcu_blockscan(awdp); /* * Other yielding operations... @@ -2718,7 +2775,7 @@ start_aux_work_timer(ErtsSchedulerData *esdp) } static void -aux_work_timeout_early_init(int no_schedulers) +aux_work_timeout_early_init(int max_no_aux_work_threads) { int i; UWord p; @@ -2729,7 +2786,8 @@ aux_work_timeout_early_init(int no_schedulers) */ p = (UWord) malloc((sizeof(ErtsAuxWorkTmo) - + sizeof(erts_atomic32_t)*(no_schedulers+1)) + + (sizeof(erts_atomic32_t) + * max_no_aux_work_threads)) + ERTS_CACHE_LINE_SIZE-1); if (!p) { ERTS_INTERNAL_ERROR("malloc failed to allocate memory!"); @@ -2744,7 +2802,7 @@ aux_work_timeout_early_init(int no_schedulers) #ifdef DEBUG erts_atomic32_init_nob(&aux_work_tmo->used, 0); #endif - for (i = 0; i <= no_schedulers; i++) + for (i = 0; i < max_no_aux_work_threads; i++) erts_atomic32_init_nob(&aux_work_tmo->type[i], 0); } @@ -3079,37 +3137,49 @@ thr_prgr_fin_wait(void *vssi) | ERTS_SSI_FLG_TSE_SLEEPING)); } -static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp); +static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, + char *dawwp); void -erts_aux_thread_poke() +erts_aux_thread_poke(void) { erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(-1)); } static void * -aux_thread(void *unused) +aux_thread(void *vix) { - ErtsAuxWorkData *awdp = aux_thread_aux_work_data; - ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1); + int ix = (int) (Sint) vix; + int id = ix == 0 ? 1 : ix + 1 - erts_no_schedulers; + ErtsAuxWorkData *awdp = &aligned_aux_work_data[id-1].data; + ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix-1); erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; ErtsThrPrgrData *tpd; int thr_prgr_active = 1; ERTS_MSACC_DECLARE_CACHE(); + ASSERT(ix == 0 + || (erts_no_schedulers < ix + && ix < erts_no_aux_work_threads)); + ASSERT(0 < id && id <= 1 + erts_no_dirty_alloc_instances); + #ifdef ERTS_ENABLE_LOCK_CHECK { - char buf[] = "aux_thread"; - erts_lc_set_thread_name(buf); + char buf[31]; + erts_snprintf(&buf[0], 31, "aux-thread %d", id); + erts_lc_set_thread_name(&buf[0]); } #endif - erts_port_task_pre_alloc_init_thread(); + if (ix == 0) + erts_port_task_pre_alloc_init_thread(); ssi->event = erts_tse_fetch(); erts_tse_return(ssi->event); - erts_msacc_init_thread("aux", 1, 1); + erts_msacc_init_thread("aux", id, 1); + + erts_alloc_register_delayed_dealloc_handler_thread(&awdp->alloc_data, ix); callbacks.arg = (void *) ssi; callbacks.wakeup = thr_prgr_wakeup; @@ -3120,13 +3190,16 @@ aux_thread(void *unused) tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1, 0); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; + awdp->aux_work_tid = ix; #if ERTS_POLL_USE_FALLBACK + if (ix == 0) { #if ERTS_POLL_USE_SCHEDULER_POLLING - ssi->psi = erts_create_pollset_thread(-2, tpd); + ssi->psi = erts_create_pollset_thread(-2, tpd); #else - ssi->psi = erts_create_pollset_thread(-1, tpd); + ssi->psi = erts_create_pollset_thread(-1, tpd); #endif + } #endif sched_prep_spin_wait(ssi); @@ -3149,7 +3222,7 @@ aux_thread(void *unused) if (!aux_work) { #ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) + if (ix == 0 && ERTS_BREAK_REQUESTED) erts_do_break_handling(); #endif @@ -3157,40 +3230,43 @@ aux_thread(void *unused) erts_thr_progress_active(tpd, thr_prgr_active = 0); #if ERTS_POLL_USE_FALLBACK + if (ix == 0) { - flgs = sched_spin_wait(ssi, 0); + flgs = sched_spin_wait(ssi, 0); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT, 0); - } + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT, 0); + } + } } -#else - erts_thr_progress_prepare_wait(tpd); - - flgs = sched_spin_wait(ssi, 0); + else +#endif + { + erts_thr_progress_prepare_wait(tpd); + flgs = sched_spin_wait(ssi, 0); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - int res; - ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); - ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); - } - erts_tse_return(ssi->event); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + int res; + ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + } + erts_tse_return(ssi->event); + } + erts_thr_progress_finalize_wait(tpd); } - erts_thr_progress_finalize_wait(tpd); -#endif - } + } flgs = sched_prep_spin_wait(ssi); } @@ -5609,11 +5685,11 @@ runq_supervisor(void *unused) void -erts_early_init_scheduling(int no_schedulers) +erts_early_init_scheduling(int max_no_aux_work_threads) { ErtsSchedType type; - aux_work_timeout_early_init(no_schedulers); + aux_work_timeout_early_init(max_no_aux_work_threads); for (type = ERTS_SCHED_TYPE_FIRST; type <= ERTS_SCHED_TYPE_LAST; type++) { erts_sched_set_wakeup_other_threshold(type, "medium"); @@ -5723,7 +5799,8 @@ erts_sched_set_wake_cleanup_threshold(char *str) } static void -init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) +init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, + char *dawwp) { int id = 0; if (esdp) { @@ -5746,7 +5823,7 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) } } - awdp->sched_id = id; + awdp->aux_work_tid = id; awdp->esdp = esdp; awdp->ssi = esdp ? esdp->ssi : NULL; awdp->latest_wakeup = ERTS_THR_PRGR_VAL_FIRST; @@ -5769,12 +5846,13 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) else { int i; awdp->delayed_wakeup.job = (ErtsDelayedAuxWorkWakeupJob *) dawwp; - dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*(erts_no_schedulers+1); + dawwp += sizeof(ErtsDelayedAuxWorkWakeupJob)*erts_no_aux_work_threads; awdp->delayed_wakeup.sched2jix = (int *) dawwp; awdp->delayed_wakeup.jix = -1; - for (i = 0; i <= erts_no_schedulers; i++) + for (i = 0; i < erts_no_aux_work_threads; i++) awdp->delayed_wakeup.sched2jix[i] = -1; } + erts_alcu_blockscan_init(awdp); awdp->debug.wait_completed.flags = 0; awdp->debug.wait_completed.callback = NULL; awdp->debug.wait_completed.arg = NULL; @@ -5896,7 +5974,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th int no_dirty_io_schedulers ) { - int ix, n, no_ssi, tot_rqs; + int ix, n, tot_rqs; char *daww_ptr; size_t daww_sz; size_t size_runqs; @@ -6019,13 +6097,19 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th erts_no_dirty_io_schedulers = no_dirty_io_schedulers; erts_no_total_schedulers += no_dirty_io_schedulers; + /* normal schedulers */ + erts_no_aux_work_threads = n; + /* standard aux thread */ + erts_no_aux_work_threads += 1; + /* extra aux threads */ + erts_no_aux_work_threads += erts_no_dirty_alloc_instances; + /* Create and initialize scheduler sleep info */ - no_ssi = n + 1 /* aux thread */; aligned_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, - no_ssi*sizeof(ErtsAlignedSchedulerSleepInfo)); - for (ix = 0; ix < no_ssi; ix++) { + erts_no_aux_work_threads*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < erts_no_aux_work_threads; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_sched_sleep_info[ix].ssi; #if 0 /* no need to initialize these... */ ssi->next = NULL; @@ -6067,7 +6151,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th /* Create and initialize scheduler specific data */ daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob) - + sizeof(int))*(n+1)); + + sizeof(int))*erts_no_aux_work_threads); daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, daww_sz*n); @@ -6079,7 +6163,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); init_scheduler_data(esdp, ix+1, ERTS_SCHED_SLEEP_INFO_IX(ix), ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz, - NULL, 0); + NULL, 0); } { @@ -6109,7 +6193,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix), ERTS_DIRTY_IO_RUNQ, NULL, 0, - &adsp[adspix++].dsp, ts); + &adsp[adspix++].dsp, ts); } } @@ -6120,9 +6204,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th erts_atomic32_init_nob(&debug_wait_completed_count, 0); /* debug only */ debug_wait_completed_flags = 0; - aux_thread_aux_work_data = + aligned_aux_work_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, - sizeof(ErtsAuxWorkData)); + sizeof(ErtsAlignedAuxWorkData) + * (erts_no_dirty_alloc_instances + 1)); init_no_runqs(no_schedulers_online, no_schedulers_online); balance_info.last_active_runqs = no_schedulers; @@ -8539,7 +8624,6 @@ sched_thread_func(void *vesdp) esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); #endif - erts_alloc_register_scheduler(vesdp); #ifdef ERTS_ENABLE_LOCK_CHECK { char buf[31]; @@ -8548,6 +8632,7 @@ sched_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); + erts_alloc_register_scheduler(vesdp); #if HAVE_ERTS_MSEG erts_mseg_late_init(); #endif @@ -8564,7 +8649,6 @@ sched_thread_func(void *vesdp) ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL); #endif - erts_alcu_sched_spec_data_init(esdp); erts_ets_sched_spec_data_init(esdp); erts_utils_sched_spec_data_init(); @@ -8612,6 +8696,7 @@ sched_dirty_cpu_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); + erts_alloc_register_scheduler(vesdp); esdp->aux_work_data.async_ready.queue = NULL; erts_proc_lock_prepare_proc_lock_waiter(); @@ -8659,6 +8744,7 @@ sched_dirty_io_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); + erts_alloc_register_scheduler(vesdp); esdp->aux_work_data.async_ready.queue = NULL; erts_proc_lock_prepare_proc_lock_waiter(); @@ -8737,11 +8823,19 @@ erts_start_schedulers(void) } } - erts_snprintf(opts.name, sizeof(name), "aux"); + ix = 0; + while (ix < erts_no_aux_work_threads) { + int id = ix == 0 ? 1 : ix + 1 - (int) erts_no_schedulers; + erts_snprintf(opts.name, sizeof(name), "%d_aux", id); - res = ethr_thr_create(&tid, aux_thread, NULL, &opts); - if (res != 0) - erts_exit(ERTS_ABORT_EXIT, "Failed to create aux thread, error = %d\n", res); + res = ethr_thr_create(&tid, aux_thread, (void *) (Sint) ix, &opts); + if (res != 0) + erts_exit(ERTS_ABORT_EXIT, "Failed to create aux thread %d, error = %d\n", res); + if (ix == 0) + ix = (int) (1 + erts_no_schedulers); + else + ix++; + } block_poll_thread_data = (ErtsAlignedBlockPollThreadData *) erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BLOCK_PTHR_DATA, @@ -8977,6 +9071,12 @@ erts_internal_is_process_executing_dirty_1(BIF_ALIST_1) BIF_RET(am_false); } +BIF_RETTYPE +erts_internal_no_aux_work_threads_0(BIF_ALIST_0) +{ + BIF_RET(make_small(erts_no_aux_work_threads)); +} + static ERTS_INLINE void run_queues_len_aux(ErtsRunQueue *rq, Uint *tot_len, Uint *qlen, int *ip, int incl_active_sched, int locked) { @@ -12499,7 +12599,8 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). code = erts_dsig_prepare(&ctx, so->dist_entry, NULL, 0, ERTS_DSP_NO_LOCK, 1, 1, 0); - if (code == ERTS_DSIG_PREP_CONNECTED) { + if (code == ERTS_DSIG_PREP_CONNECTED + && ctx.connection_id == so->conn_id) { int dsflags = 0; if (so->flags & SPO_LINK) dsflags |= ERTS_DIST_SPAWN_FLAG_LINK; @@ -13103,7 +13204,7 @@ proc_exit_handle_pend_spawn_monitors(ErtsMonitor *mon, void *vctxt, Sint reds) item = copy_struct(mon->other.item, item_sz, &hp, factory.off_heap); erts_factory_close(&factory); code = erts_dsig_send_exit_tt(&ctx, - c_p->common.id, + c_p, item, reason, SEQ_TRACE_TOKEN(c_p)); @@ -13385,7 +13486,7 @@ erts_proc_exit_handle_dist_link(ErtsLink *lnk, void *vctxt, Sint reds) item = copy_struct(lnk->other.item, item_sz, &hp, factory.off_heap); erts_factory_close(&factory); code = erts_dsig_send_exit_tt(&ctx, - c_p->common.id, + c_p, item, reason, SEQ_TRACE_TOKEN(c_p)); @@ -13470,13 +13571,13 @@ erts_proc_exit_handle_link(ErtsLink *lnk, void *vctxt, Sint reds) if (!erts_link_dist_delete(dlnk)) elnk = NULL; - code = erts_dsig_prepare(&ctx, dep, c_p, 0, ERTS_DSP_NO_LOCK, 1, 1, 0); + code = erts_dsig_prepare(&ctx, dep, NULL, 0, ERTS_DSP_NO_LOCK, 1, 1, 0); switch (code) { case ERTS_DSIG_PREP_CONNECTED: case ERTS_DSIG_PREP_PENDING: if (dist->connection_id == ctx.connection_id) { code = erts_dsig_send_exit_tt(&ctx, - c_p->common.id, + c_p, lnk->other.item, reason, SEQ_TRACE_TOKEN(c_p)); diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 6fa91a5049..56e5600e1b 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -106,6 +106,7 @@ extern Uint ERTS_WRITE_UNLIKELY(erts_no_total_schedulers); extern Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers); extern Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers); extern Uint ERTS_WRITE_UNLIKELY(erts_no_run_queues); +extern int ERTS_WRITE_UNLIKELY(erts_no_aux_work_threads); extern int erts_sched_thread_suggested_stack_size; extern int erts_dcpu_sched_thread_suggested_stack_size; extern int erts_dio_sched_thread_suggested_stack_size; @@ -558,8 +559,9 @@ typedef struct { erts_aint32_t aux_work; } ErtsDelayedAuxWorkWakeupJob; -typedef struct { - int sched_id; +typedef struct ErtsAuxWorkData_ { + int aux_work_tid; + ErtsThrAllocData alloc_data; ErtsSchedulerData *esdp; ErtsSchedulerSleepInfo *ssi; ErtsThrPrgrVal current_thr_prgr; @@ -607,7 +609,8 @@ typedef struct { #define ERTS_SCHED_AUX_YIELD_DATA(ESDP, NAME) \ (&(ESDP)->aux_work_data.yield.NAME) -void erts_notify_new_aux_yield_work(ErtsSchedulerData *esdp); +void erts_more_yield_aux_work(ErtsAuxWorkData *); +ErtsAuxWorkData *erts_get_aux_work_data(void); typedef enum { ERTS_DIRTY_CPU_SCHEDULER, @@ -707,8 +710,6 @@ struct ErtsSchedulerData_ { Uint64 unique; Uint64 ref; - ErtsSchedAllocData alloc_data; - struct { Uint64 out; Uint64 in; @@ -1434,6 +1435,7 @@ typedef struct { Eterm group_leader; Eterm mfa; DistEntry *dist_entry; + Uint32 conn_id; ErtsMonLnkDist *mld; /* copied from dist_entry->mld */ ErtsDistExternal *edep; ErlHeapFragment *ede_hfrag; @@ -1938,7 +1940,8 @@ void erts_schedule_misc_aux_work(int sched_id, void (*func)(void *), void *arg); void erts_schedule_multi_misc_aux_work(int ignore_self, - int max_sched, + int min_tid, + int max_tid, void (*func)(void *), void *arg); erts_aint32_t erts_set_aux_work_timeout(int, erts_aint32_t, int); diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 8b2b82e527..d7a22c7acc 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -955,9 +955,10 @@ typedef union { ((*((Eterm *)(x)) == ERTS_REF_THING_HEADER) \ & (((ErtsRefThing *) (x))->o.marker == ERTS_ORDINARY_REF_MARKER)) -#define is_magic_ref_thing(x) \ - ((*((Eterm *)(x)) == ERTS_REF_THING_HEADER) \ - & (((ErtsRefThing *) (x))->o.marker != ERTS_ORDINARY_REF_MARKER)) +/* the _with_hdr variant usable when header word may be broken (copy_shared) */ +#define is_magic_ref_thing_with_hdr(PTR,HDR) \ + (((HDR) == ERTS_REF_THING_HEADER) \ + & (((ErtsRefThing *) (PTR))->o.marker != ERTS_ORDINARY_REF_MARKER)) #else /* Ordinary and magic references of different sizes... */ @@ -970,11 +971,14 @@ typedef union { #define is_pid_ref_thing(x) \ (*((Eterm *)(x)) == ERTS_PID_REF_THING_HEADER) -#define is_magic_ref_thing(x) \ - (*((Eterm *)(x)) == ERTS_MAGIC_REF_THING_HEADER) +#define is_magic_ref_thing_with_hdr(PTR,HDR) \ + ((HDR) == ERTS_MAGIC_REF_THING_HEADER) #endif +#define is_magic_ref_thing(PTR) \ + is_magic_ref_thing_with_hdr(PTR, *(Eterm *)(PTR)) + #define is_internal_magic_ref(x) \ (_unchecked_is_boxed((x)) && is_magic_ref_thing(boxed_val((x)))) diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index f4c7f6dec4..72c7ead6ae 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -3039,7 +3039,7 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, const byte* ep, #define ENC_START_SORTING_MAP ((Eterm) 7) #define ENC_CONTINUE_SORTING_MAP ((Eterm) 8) #define ENC_PUSH_SORTED_MAP ((Eterm) 9) -#define ENC_LAST_ARRAY_ELEMENT ((Eterm) 10) +#define ENC_LAST_ARRAY_ELEMENT ((Eterm) 10) /* must be the largest one */ static Eterm* alloc_map_array(Uint size) { @@ -3466,6 +3466,7 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, ep += 4; } if (i > 0) { + ASSERT(ENC_LAST_ARRAY_ELEMENT+i-1 >= ENC_LAST_ARRAY_ELEMENT); WSTACK_PUSH2(s, ENC_LAST_ARRAY_ELEMENT+i-1, (UWord)ptr); } break; diff --git a/erts/emulator/beam/generators.tab b/erts/emulator/beam/generators.tab index 520c688113..9c9d357475 100644 --- a/erts/emulator/beam/generators.tab +++ b/erts/emulator/beam/generators.tab @@ -93,6 +93,19 @@ gen.get_float2(Fail, Ms, Live, Size, Unit, Flags, Dst) { return op; } +gen.get_utf16(Fail, Ms, Flags, Dst) { + BeamOp* op; + $NewBeamOp(S, op); + + $NativeEndian(Flags); + $BeamOpNameArity(op, i_bs_get_utf16, 4); + op->a[0] = Ms; + op->a[1] = Fail; + op->a[2] = Flags; + op->a[3] = Dst; + return op; +} + gen.put_binary(Fail, Size, Unit, Flags, Src) { BeamOp* op; $NewBeamOp(S, op); @@ -234,6 +247,18 @@ gen.put_float(Fail, Size, Unit, Flags, Src) { return op; } +gen.put_utf16(Fail, Flags, Src) { + BeamOp* op; + $NewBeamOp(S, op); + + $NativeEndian(Flags); + $BeamOpNameArity(op, i_bs_put_utf16, 3); + op->a[0] = Fail; + op->a[1] = Flags; + op->a[2] = Src; + return op; +} + // Generate the fastest instruction for bs_skip_bits. gen.skip_bits2(Fail, Ms, Size, Unit, Flags) { BeamOp* op; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 904d394fca..9176309e42 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -443,28 +443,29 @@ do { \ #define ESTACK_IS_STATIC(s) ((s).start == ESTK_DEF_STACK(s)) +#define ESTACK_RESERVE(s, push_cnt) \ +do { \ + if ((s).end - (s).sp < (Sint)(push_cnt)) { \ + erl_grow_estack(&(s), (push_cnt)); \ + } \ +} while(0) + #define ESTACK_PUSH(s, x) \ do { \ - if ((s).sp == (s).end) { \ - erl_grow_estack(&(s), 1); \ - } \ + ESTACK_RESERVE(s, 1); \ *(s).sp++ = (x); \ } while(0) #define ESTACK_PUSH2(s, x, y) \ do { \ - if ((s).sp > (s).end - 2) { \ - erl_grow_estack(&(s), 2); \ - } \ + ESTACK_RESERVE(s, 2); \ *(s).sp++ = (x); \ *(s).sp++ = (y); \ } while(0) #define ESTACK_PUSH3(s, x, y, z) \ do { \ - if ((s).sp > (s).end - 3) { \ - erl_grow_estack(&s, 3); \ - } \ + ESTACK_RESERVE(s, 3); \ *(s).sp++ = (x); \ *(s).sp++ = (y); \ *(s).sp++ = (z); \ @@ -472,22 +473,13 @@ do { \ #define ESTACK_PUSH4(s, E1, E2, E3, E4) \ do { \ - if ((s).sp > (s).end - 4) { \ - erl_grow_estack(&s, 4); \ - } \ + ESTACK_RESERVE(s, 4); \ *(s).sp++ = (E1); \ *(s).sp++ = (E2); \ *(s).sp++ = (E3); \ *(s).sp++ = (E4); \ } while(0) -#define ESTACK_RESERVE(s, push_cnt) \ -do { \ - if ((s).sp > (s).end - (push_cnt)) { \ - erl_grow_estack(&(s), (push_cnt)); \ - } \ -} while(0) - /* Must be preceded by ESTACK_RESERVE */ #define ESTACK_FAST_PUSH(s, x) \ do { \ @@ -642,28 +634,29 @@ do { \ #define WSTACK_IS_STATIC(s) (s.wstart == WSTK_DEF_STACK(s)) -#define WSTACK_PUSH(s, x) \ -do { \ - if (s.wsp == s.wend) { \ - erl_grow_wstack(&s, 1); \ - } \ - *s.wsp++ = (x); \ +#define WSTACK_RESERVE(s, push_cnt) \ +do { \ + if (s.wend - s.wsp < (Sint)(push_cnt)) { \ + erl_grow_wstack(&s, (push_cnt)); \ + } \ +} while(0) + +#define WSTACK_PUSH(s, x) \ +do { \ + WSTACK_RESERVE(s, 1); \ + *s.wsp++ = (x); \ } while(0) #define WSTACK_PUSH2(s, x, y) \ do { \ - if (s.wsp > s.wend - 2) { \ - erl_grow_wstack(&s, 2); \ - } \ + WSTACK_RESERVE(s, 2); \ *s.wsp++ = (x); \ *s.wsp++ = (y); \ } while(0) #define WSTACK_PUSH3(s, x, y, z) \ do { \ - if (s.wsp > s.wend - 3) { \ - erl_grow_wstack(&s, 3); \ - } \ + WSTACK_RESERVE(s, 3); \ *s.wsp++ = (x); \ *s.wsp++ = (y); \ *s.wsp++ = (z); \ @@ -671,9 +664,7 @@ do { \ #define WSTACK_PUSH4(s, A1, A2, A3, A4) \ do { \ - if (s.wsp > s.wend - 4) { \ - erl_grow_wstack(&s, 4); \ - } \ + WSTACK_RESERVE(s, 4); \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ *s.wsp++ = (A3); \ @@ -682,9 +673,7 @@ do { \ #define WSTACK_PUSH5(s, A1, A2, A3, A4, A5) \ do { \ - if (s.wsp > s.wend - 5) { \ - erl_grow_wstack(&s, 5); \ - } \ + WSTACK_RESERVE(s, 5); \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ *s.wsp++ = (A3); \ @@ -694,9 +683,7 @@ do { \ #define WSTACK_PUSH6(s, A1, A2, A3, A4, A5, A6) \ do { \ - if (s.wsp > s.wend - 6) { \ - erl_grow_wstack(&s, 6); \ - } \ + WSTACK_RESERVE(s, 6); \ *s.wsp++ = (A1); \ *s.wsp++ = (A2); \ *s.wsp++ = (A3); \ @@ -705,13 +692,6 @@ do { \ *s.wsp++ = (A6); \ } while(0) -#define WSTACK_RESERVE(s, push_cnt) \ -do { \ - if (s.wsp > s.wend - (push_cnt)) { \ - erl_grow_wstack(&s, (push_cnt)); \ - } \ -} while(0) - /* Must be preceded by WSTACK_RESERVE */ #define WSTACK_FAST_PUSH(s, x) \ do { \ diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 477f5bd749..f13a7d437a 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -4947,6 +4947,7 @@ erts_request_io_bytes(Process *c_p) if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, + 1, erts_no_schedulers, reply_io_bytes, (void *) req); diff --git a/erts/emulator/beam/jit/beam_jit_common.c b/erts/emulator/beam/jit/beam_jit_common.c index a69a48beff..d2d4ad43d6 100644 --- a/erts/emulator/beam/jit/beam_jit_common.c +++ b/erts/emulator/beam/jit/beam_jit_common.c @@ -664,8 +664,7 @@ Sint beam_jit_remove_message(Process *c_p, tok_serial); } #endif - erts_msgq_unlink_msg(c_p, msgp); - erts_msgq_set_save_first(c_p); + erts_msgq_unlink_msg_set_save_first(c_p, msgp); CANCEL_TIMER(c_p); erts_save_message_in_proc(c_p, msgp); diff --git a/erts/emulator/beam/jit/x86/generators.tab b/erts/emulator/beam/jit/x86/generators.tab index ab755fde4c..e09c9b5c7b 100644 --- a/erts/emulator/beam/jit/x86/generators.tab +++ b/erts/emulator/beam/jit/x86/generators.tab @@ -451,6 +451,18 @@ gen.get_binary2(Fail, Ms, Live, Size, Unit, Flags, Dst) { return op; } +gen.skip_utf16(Fail, Ms, Flags) { + BeamOp* op; + $NewBeamOp(S, op); + + $NativeEndian(Flags); + $BeamOpNameArity(op, i_bs_skip_utf16, 3); + op->a[0] = Ms; + op->a[1] = Fail; + op->a[2] = Flags; + return op; +} + gen.combine_conses(Len, Dst, Hd) { BeamOp* cons; BeamOp* tail; diff --git a/erts/emulator/beam/jit/x86/instr_arith.cpp b/erts/emulator/beam/jit/x86/instr_arith.cpp index 5615230551..56e2fa59da 100644 --- a/erts/emulator/beam/jit/x86/instr_arith.cpp +++ b/erts/emulator/beam/jit/x86/instr_arith.cpp @@ -417,14 +417,13 @@ void BeamGlobalAssembler::emit_int_div_rem_guard_shared() { emit_leave_runtime(); + /* erts_int_div returns 0 on failure and 1 on success. */ + a.test(RETd, RETd); + /* Place the result in RAX:RDX, mirroring the `idiv` instruction. */ a.mov(x86::rax, TMP_MEM1q); a.mov(x86::rdx, TMP_MEM2q); - /* erts_int_div returns a tagged value, so we know it's non-zero and can - * clear ZF by and it with itself. */ - a.test(RET, RET); - /* Fall through */ } @@ -498,7 +497,8 @@ void BeamGlobalAssembler::emit_int_div_rem_body_shared() { emit_leave_runtime(); - a.test(RET, RET); + /* erts_int_div returns 0 on failure and 1 on success. */ + a.test(RETd, RETd); /* Place the result in RAX:RDX, mirroring the `idiv` instruction. */ a.mov(x86::rax, TMP_MEM4q); diff --git a/erts/emulator/beam/jit/x86/instr_bs.cpp b/erts/emulator/beam/jit/x86/instr_bs.cpp index 7df0b84cae..3dc6894efa 100644 --- a/erts/emulator/beam/jit/x86/instr_bs.cpp +++ b/erts/emulator/beam/jit/x86/instr_bs.cpp @@ -1224,9 +1224,9 @@ void BeamModuleAssembler::emit_i_bs_utf16_size(const ArgVal &Src, mov_arg(Dst, RET); } -void BeamModuleAssembler::emit_bs_put_utf16(const ArgVal &Fail, - const ArgVal &Flags, - const ArgVal &Src) { +void BeamModuleAssembler::emit_i_bs_put_utf16(const ArgVal &Fail, + const ArgVal &Flags, + const ArgVal &Src) { Label next; if (Fail.getValue() == 0) { @@ -1407,8 +1407,8 @@ void BeamModuleAssembler::emit_bs_add(const ArgVal &Fail, a.and_(RETd, ARG1d); } } - a.and_(RETb, imm(_TAG_PRIMARY_MASK)); - a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); a.jne(fail); /* Verify that ARG2 >= 0 and multiply ARG2 by the unit. The diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index d6af3818a1..8ccfb6cfbc 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -886,8 +886,8 @@ i_bs_get_utf8 S f? d bs_skip_utf8 Fail=f Ms=xy u u => i_bs_skip_utf8 Ms Fail i_bs_skip_utf8 S f? -bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst -bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_skip_utf16 Ms Fail Flags +bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => get_utf16(Fail, Ms, Flags, Dst) +bs_skip_utf16 Fail=f Ms=xy u Flags=u => skip_utf16(Fail, Ms, Flags) i_bs_get_utf16 S f? t d i_bs_skip_utf16 S f? t @@ -983,6 +983,7 @@ bs_utf8_size j Src Dst=d => i_bs_utf8_size Src Dst bs_utf16_size j Src Dst=d => i_bs_utf16_size Src Dst bs_put_utf8 Fail u Src => i_bs_put_utf8 Fail Src +bs_put_utf16 Fail Flags Src => put_utf16(Fail, Flags, Src) bs_put_utf32 Fail=j Flags=u Src=s => \ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src @@ -991,7 +992,7 @@ i_bs_utf8_size s x i_bs_utf16_size s x i_bs_put_utf8 j? s -bs_put_utf16 j? t s +i_bs_put_utf16 j? t s i_bs_validate_unicode j? s |