diff options
author | Sridhar Samudrala <sridhar.samudrala@intel.com> | 2020-08-18 23:53:07 -0700 |
---|---|---|
committer | dormando <dormando@rydia.net> | 2020-11-02 15:00:36 -0800 |
commit | 4de258ed891c0e5048192be1626fff6fabb10438 (patch) | |
tree | 72d641c7d3cf7fe74817f229fed164342cfa41ff /thread.c | |
parent | 0b374c63ab7e63c0098983d0a68cefedfd94557a (diff) | |
download | memcached-4de258ed891c0e5048192be1626fff6fabb10438.tar.gz |
Introduce NAPI ID based worker thread selection
By default memcached assigns connections to worker threads in
a round-robin manner. This patch introduces an option to select
a worker thread based on the incoming connection's NAPI ID if
SO_INCOMING_NAPI_ID socket option is supported by the OS.
This allows a memcached worker thread to be associated with a
NIC HW receive queue and service all the connection requests
received on a specific RX queue. This mapping between a memcached
thread and a HW NIC queue streamlines the flow of data from the
NIC to the application. In addition, an optimal path with reduced
context switches is possible, if epoll based busy polling
(sysctl -w net.core.busy_poll = <non-zero value>) is also enabled.
This feature is enabled via a new command line parameter -N <num>
or "--napi_ids=<num>", where <num> is the number of available/assigned
NIC hardware RX queues through which the connections can be received.
The number of napi_ids specified cannot be greater than the number
of worker threads specified using -t/--threads option.
If the option is not specified, or the conditions not met, the code
defaults to round robin thread selection.
Signed-off-by: Kiran Patil <kiran.patil@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Diffstat (limited to 'thread.c')
-rw-r--r-- | thread.c | 80 |
1 files changed, 76 insertions, 4 deletions
@@ -587,6 +587,77 @@ static void thread_libevent_process(evutil_socket_t fd, short which, void *arg) /* Which thread we assigned a connection to most recently. */ static int last_thread = -1; +/* Last thread we assigned to a connection based on napi_id */ +static int last_thread_by_napi_id = -1; + +static LIBEVENT_THREAD *select_thread_round_robin(void) +{ + int tid = (last_thread + 1) % settings.num_threads; + + last_thread = tid; + + return threads + tid; +} + +static void reset_threads_napi_id(void) +{ + LIBEVENT_THREAD *thread; + int i; + + for (i = 0; i < settings.num_threads; i++) { + thread = threads + i; + thread->napi_id = 0; + } + + last_thread_by_napi_id = -1; +} + +/* Select a worker thread based on the NAPI ID of an incoming connection + * request. NAPI ID is a globally unique ID that identifies a NIC RX queue + * on which a flow is received. + */ +static LIBEVENT_THREAD *select_thread_by_napi_id(int sfd) +{ + LIBEVENT_THREAD *thread; + int napi_id, err, i; + socklen_t len; + int tid = -1; + + len = sizeof(socklen_t); + err = getsockopt(sfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len); + if ((err == -1) || (napi_id == 0)) { + STATS_LOCK(); + stats.round_robin_fallback++; + STATS_UNLOCK(); + return select_thread_round_robin(); + } + +select: + for (i = 0; i < settings.num_threads; i++) { + thread = threads + i; + if (last_thread_by_napi_id < i) { + thread->napi_id = napi_id; + last_thread_by_napi_id = i; + tid = i; + break; + } + if (thread->napi_id == napi_id) { + tid = i; + break; + } + } + + if (tid == -1) { + STATS_LOCK(); + stats.unexpected_napi_ids++; + STATS_UNLOCK(); + reset_threads_napi_id(); + goto select; + } + + return threads + tid; +} + /* * Dispatches a new connection to another thread. This is only ever called * from the main thread, either during initialization (for UDP) or because @@ -603,11 +674,12 @@ void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, return; } - int tid = (last_thread + 1) % settings.num_threads; - - LIBEVENT_THREAD *thread = threads + tid; + LIBEVENT_THREAD *thread; - last_thread = tid; + if (!settings.num_napi_ids) + thread = select_thread_round_robin(); + else + thread = select_thread_by_napi_id(sfd); item->sfd = sfd; item->init_state = init_state; |