diff options
-rw-r--r-- | erts/doc/src/erl_cmd.xml | 12 | ||||
-rw-r--r-- | erts/emulator/beam/erl_port_task.c | 30 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 32 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_check_io.c | 99 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_check_io.h | 18 | ||||
-rw-r--r-- | erts/emulator/test/scheduler_SUITE.erl | 51 | ||||
-rw-r--r-- | erts/etc/common/erlexec.c | 10 |
7 files changed, 182 insertions, 70 deletions
diff --git a/erts/doc/src/erl_cmd.xml b/erts/doc/src/erl_cmd.xml index 6eeaf27eaf..98412fcf25 100644 --- a/erts/doc/src/erl_cmd.xml +++ b/erts/doc/src/erl_cmd.xml @@ -969,6 +969,18 @@ $ <input>erl \ <c>+IOt</c> are used, <c>+IOPt</c> is ignored. </p> </item> + <tag><marker id="+IOs"/><c>+IOs true|false</c></tag> + <item> + <p>Enable or disable scheduler thread poll optimization. Default is + <c>true</c>. + </p> + <p>If enabled, file descriptors that are frequently read may be moved to + a special pollset used by scheduler threads. The objective is to + reduce the number of system calls and thereby CPU load, but it can + in some cases increase scheduling latency for individual file + descriptor input events. + </p> + </item> <tag><marker id="+JPperf"/><c>+JPperf true|false</c></tag> <item> <p>Enables or disables support for the `perf` profiler when running diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 27ecee8d09..33b6cf1c43 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -1326,22 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp) res = - 1; /* Task already aborted, executing, or executed */ else { reset_port_task_handle(pthp); - #if ERTS_POLL_USE_SCHEDULER_POLLING - switch (ptp->type) { - case ERTS_PORT_TASK_INPUT: - case ERTS_PORT_TASK_OUTPUT: - if (ptp->u.alive.td.io.is_scheduler_event) { - ASSERT(erts_atomic_read_nob( - &erts_port_task_outstanding_io_tasks) > 0); - erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); + if (erts_sched_poll_enabled()) { + switch (ptp->type) { + case ERTS_PORT_TASK_INPUT: + case ERTS_PORT_TASK_OUTPUT: + if (ptp->u.alive.td.io.is_scheduler_event) { + ASSERT(erts_atomic_read_nob( + &erts_port_task_outstanding_io_tasks) > 0); + erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); + } + break; + default: + break; } - break; - default: - break; - } + } #endif - res = 0; } } @@ -1844,16 +1844,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) } ERTS_MSACC_POP_STATE_M(); - #if ERTS_POLL_USE_SCHEDULER_POLLING - if (io_tasks_executed) { + if (erts_sched_poll_enabled() && io_tasks_executed) { ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) >= io_tasks_executed); erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks, -1*io_tasks_executed); } #endif - ASSERT(runq == erts_get_runq_port(pp)); active = finalize_exec(pp, &execq, processing_busy_q); diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index c75f67c0d7..fac9ab704b 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -3194,11 +3194,11 @@ aux_thread(void *vix) #if ERTS_POLL_USE_FALLBACK if (ix == 0) { -#if ERTS_POLL_USE_SCHEDULER_POLLING - ssi->psi = erts_create_pollset_thread(-2, tpd); -#else - ssi->psi = erts_create_pollset_thread(-1, tpd); -#endif + if (erts_sched_poll_enabled()) { + ssi->psi = erts_create_pollset_thread(-2, tpd); + } else { + ssi->psi = erts_create_pollset_thread(-1, tpd); + } } #endif @@ -3365,11 +3365,13 @@ try_set_sys_scheduling(void) static ERTS_INLINE int prepare_for_sys_schedule(void) { - while (!erts_port_task_have_outstanding_io_tasks() - && try_set_sys_scheduling()) { - if (!erts_port_task_have_outstanding_io_tasks()) - return 1; - clear_sys_scheduling(); + if (erts_sched_poll_enabled()) { + while (!erts_port_task_have_outstanding_io_tasks() + && try_set_sys_scheduling()) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + clear_sys_scheduling(); + } } return 0; } @@ -8620,9 +8622,9 @@ sched_thread_func(void *vesdp) erts_thr_progress_register_managed_thread(esdp, &callbacks, 0, 0); -#if ERTS_POLL_USE_SCHEDULER_POLLING - esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); -#endif + if (erts_sched_poll_enabled()) { + esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); + } #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -9522,7 +9524,9 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) esdp->virtual_reds = 0; #if ERTS_POLL_USE_SCHEDULER_POLLING - fcalls = (int) erts_atomic32_add_read_acqb(&function_calls, reds); + if (erts_sched_poll_enabled()) { + fcalls = (int)erts_atomic32_add_read_acqb(&function_calls, reds); + } #endif ASSERT(esdp && esdp == erts_get_scheduler_data()); diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 28716b313e..1b12ed3293 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -146,7 +146,7 @@ static ErtsPollThread *psiv; static ErtsPollSet *flbk_pollset; #endif #if ERTS_POLL_USE_SCHEDULER_POLLING -static ErtsPollSet *sched_pollset; +ErtsPollSet *sched_pollset; #endif typedef struct { @@ -418,7 +418,7 @@ static ERTS_INLINE ErtsPollSet * get_scheduler_pollset(ErtsSysFdType fd) { #if ERTS_POLL_USE_SCHEDULER_POLLING - return sched_pollset; + return sched_pollset ? sched_pollset : get_pollset(fd); #else return get_pollset(fd); #endif @@ -506,7 +506,7 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); if (state->events & ERTS_POLL_EV_IN) { active_events |= ERTS_POLL_EV_IN; - if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (state->count > 10 && erts_sched_poll_enabled()) { if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) op = ERTS_POLL_OP_ADD; state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; @@ -514,7 +514,7 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, DEBUG_PRINT_FD("moving to scheduler ps", state); } else new_events = active_events; - if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && erts_sched_poll_enabled()) state->count++; } break; @@ -1800,7 +1800,7 @@ erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time, int poll_only select/deselect in rapid succession. */ revents &= state->active_events | ERTS_POLL_EV_NVAL; - if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + if (psi->ps != get_scheduler_pollset(fd) || !erts_sched_poll_enabled()) { ErtsPollEvents reactive_events; state->active_events &= ~revents; @@ -2118,7 +2118,7 @@ get_arg(char* rest, char** argv, int* ip) } static void -parse_args(int *argc, char **argv, int concurrent_waiters) +parse_args(int *argc, char **argv, int concurrent_waiters, int* use_sched_poll) { int i = 0, j; int no_pollsets = 0, no_poll_threads = 0, @@ -2160,8 +2160,16 @@ parse_args(int *argc, char **argv, int concurrent_waiters) erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg); erts_usage(); } - } else { - break; + } else if (sys_strcmp(argv[i]+2, "Os") == 0) { + const char *arg = get_arg(argv[i]+4, argv, &i); + if (sys_strcmp(arg, "true") == 0) { + *use_sched_poll = 1; + } else if (sys_strcmp(arg, "false") == 0) { + *use_sched_poll = 0; + } else { + erts_fprintf(stderr,"bad +IOs boolean argument: %s\n", arg); + erts_usage(); + } } break; } @@ -2229,6 +2237,8 @@ void erts_init_check_io(int *argc, char **argv) { int j, concurrent_waiters, no_poll_threads; + int use_sched_poll = ERTS_POLL_USE_SCHEDULER_POLLING; + ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED | ERL_NIF_SELECT_STOP_SCHEDULED | ERL_NIF_SELECT_INVALID_EVENT | @@ -2240,7 +2250,7 @@ erts_init_check_io(int *argc, char **argv) erts_poll_init_flbk(NULL); #endif - parse_args(argc, argv, concurrent_waiters); + parse_args(argc, argv, concurrent_waiters, &use_sched_poll); /* Create the actual pollsets */ pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets); @@ -2252,10 +2262,16 @@ erts_init_check_io(int *argc, char **argv) j = -1; + if (use_sched_poll) { #if ERTS_POLL_USE_SCHEDULER_POLLING - sched_pollset = erts_poll_create_pollset(j--); - no_poll_threads++; + sched_pollset = erts_poll_create_pollset(j--); + ASSERT(erts_sched_poll_enabled()); + no_poll_threads++; +#else + erts_fprintf(stderr,"+IOs true: not supported by this emulator\n"); + erts_usage(); #endif + } #if ERTS_POLL_USE_FALLBACK flbk_pollset = erts_poll_create_pollset_flbk(j--); @@ -2272,13 +2288,13 @@ erts_init_check_io(int *argc, char **argv) psiv++; #endif -#if ERTS_POLL_USE_SCHEDULER_POLLING - psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; - psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, - sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); - psiv[0].ps = get_scheduler_pollset(0); - psiv++; -#endif + if (erts_sched_poll_enabled()) { + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); + psiv++; + } for (j = 0; j < erts_no_poll_threads; j++) { psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; @@ -2336,12 +2352,12 @@ erts_check_io_size(void) erts_poll_info(get_fallback_pollset(), &pi); res += pi.memory_size; #endif - #if ERTS_POLL_USE_SCHEDULER_POLLING - erts_poll_info(get_scheduler_pollset(0), &pi); - res += pi.memory_size; + if (erts_sched_poll_enabled()) { + erts_poll_info(sched_pollset, &pi); + res += pi.memory_size; + } #endif - for (i = 0; i < erts_no_pollsets; i++) { erts_poll_info(pollsetv[i], &pi); res += pi.memory_size; @@ -2370,9 +2386,9 @@ erts_check_io_info(void *proc) Uint sz, *szp, *hp, **hpp; ErtsPollInfo *piv; Sint i, j = 0, len; - int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + erts_sched_poll_enabled(); ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); - ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); + ERTS_ASSERT(erts_sched_poll_enabled() == 0 || erts_sched_poll_enabled() == 1); piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); @@ -2382,14 +2398,14 @@ erts_check_io_info(void *proc) piv[0].active_fds = 0; piv++; #endif - #if ERTS_POLL_USE_SCHEDULER_POLLING - erts_poll_info(get_scheduler_pollset(0), &piv[0]); - piv[0].poll_threads = 0; - piv[0].active_fds = 0; - piv++; + if (erts_sched_poll_enabled()) { + erts_poll_info(sched_pollset, &piv[0]); + piv[0].poll_threads = 0; + piv[0].active_fds = 0; + piv++; + } #endif - for (j = 0; j < erts_no_pollsets; j++) { erts_poll_info(pollsetv[j], &piv[j]); piv[j].active_fds = 0; @@ -2438,7 +2454,7 @@ erts_check_io_info(void *proc) sz = 0; piv -= ERTS_POLL_USE_FALLBACK; - piv -= ERTS_POLL_USE_SCHEDULER_POLLING; + piv -= erts_sched_poll_enabled(); bld_it: @@ -2919,18 +2935,19 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) } #endif #if ERTS_POLL_USE_SCHEDULER_POLLING - erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); - erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, - drv_ev_state.max_fds); - for (fd = 0; fd < len; fd++) { - if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { - if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) - counters.epep[fd] &= ~ERTS_POLL_EV_OUT; - doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + if (erts_sched_poll_enabled()) { + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(sched_pollset, counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } } #endif - erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); for (i = 0; i < erts_no_pollsets; i++) { @@ -2942,6 +2959,7 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) && get_pollset_id(fd) == i) { if (counters.epep[fd] != ERTS_POLL_EV_NONE && drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + ERTS_ASSERT(erts_sched_poll_enabled()); /* We add the in flag if it is enabled in the scheduler pollset and get_selected_events works on the platform */ counters.epep[fd] |= ERTS_POLL_EV_IN; @@ -2950,6 +2968,7 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) } } } + for (fd = len ; fd < drv_ev_state.max_fds; fd++) { null_des.fd = fd; doit_erts_check_io_debug(&null_des, &counters, dsbufp); diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index b96f4f9609..7af984fea9 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -112,6 +112,22 @@ typedef struct { } ErtsIoTask; +ERTS_GLB_INLINE int erts_sched_poll_enabled(void); + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE int erts_sched_poll_enabled(void) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + extern ErtsPollSet *sched_pollset; + return (sched_pollset != NULL); +#else + return 0; +#endif +} + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif /* ERL_CHECK_IO_H__ */ #if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__) @@ -133,6 +149,7 @@ extern int erts_no_poll_threads; #include "erl_poll.h" #include "erl_port_task.h" + typedef struct { Eterm inport; Eterm outport; @@ -152,3 +169,4 @@ typedef struct { } ErtsNifSelectDataState; #endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */ + diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index 817062c397..44000667a0 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -58,6 +58,7 @@ scheduler_suspend_basic/1, scheduler_suspend/1, dirty_scheduler_threads/1, + sched_poll/1, poll_threads/1, reader_groups/1, otp_16446/1, @@ -77,6 +78,7 @@ all() -> {group, scheduler_bind}, scheduler_threads, scheduler_suspend_basic, scheduler_suspend, dirty_scheduler_threads, + sched_poll, poll_threads, reader_groups, otp_16446, @@ -1528,6 +1530,55 @@ sst5_loop(N) -> erlang:system_flag(multi_scheduling, unblock_normal), sst5_loop(N-1). +%% Test scheduler polling: +IOs true|false +sched_poll(Config) when is_list(Config) -> + + Env = case os:getenv("ERL_AFLAGS") of + false -> + []; + AFLAGS1 -> + %% Remove any +IOs + AFLAGS2 = list_to_binary(re:replace(AFLAGS1, + "\\+IOs (true|false)", + "", [global])), + [{"ERL_AFLAGS", binary_to_list(AFLAGS2)}] + end, + + [PS | _] = get_iostate(""), + HaveSchedPoll = proplists:get_value(concurrent_updates, PS), + + 0 = get_sched_pollsets(["+IOs", "false"]), + if + HaveSchedPoll -> + 1 = get_sched_pollsets(["+IOs", "true"]), + 1 = get_sched_pollsets([], Env); + + not HaveSchedPoll -> + fail = get_sched_pollsets(["+IOs", "true"]), + 0 = get_sched_pollsets([], Env) + end, + fail = get_sched_pollsets(["+IOs", "bad"]), + ok. + +get_sched_pollsets(Cmd) -> + get_sched_pollsets(Cmd, []). + +get_sched_pollsets(Cmd, Env)-> + try + {ok, Peer, Node} = ?CT_PEER(#{connection => standard_io, args => Cmd, + env => [{"ERL_LIBS", false} | Env]}), + [IOStates] = mcall(Node,[fun () -> erlang:system_info(check_io) end]), + IO = [IOState || IOState <- IOStates, + %% We assume non-fallbacks without threads are scheduler pollsets + proplists:get_value(fallback, IOState) == false, + proplists:get_value(poll_threads, IOState) == 0], + peer:stop(Peer), + length(IO) % number of scheduler pollsets + catch + exit:{boot_failed, _} -> + fail + end. + poll_threads(Config) when is_list(Config) -> [PS | _] = get_iostate(""), Conc = proplists:get_value(concurrent_updates, PS), diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 8dc9363af6..78c4440a3a 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -875,6 +875,16 @@ int main(int argc, char **argv) i++; break; } + if (argv[i][2] == 'O' && argv[i][3] == 's') { + if (argv[i][4] != '\0') + goto the_default; + NEXT_ARG_CHECK(); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + break; + } usage(argv[i]); break; case 'J': |