handlers: Fix handlers mapping.

The handler and CPU mapping in upcalls are incorrect, and this is specially noticeable systems with cpu isolation enabled. Say we have a 12 core system where only every even number CPU is enabled C0, C2, C4, C6, C8, C10 This means we will create an array of size 6 that will be sent to kernel that is populated with sockets [S0, S1, S2, S3, S4, S5] The problem is when the kernel does an upcall it checks the socket array via the index of the CPU, effectively adding additional load on some CPUs while leaving no work on other CPUs. e.g. C0 indexes to S0 C2 indexes to S2 (should be S1) C4 indexes to S4 (should be S2) Modulo of 6 (size of socket array) is applied, so we wrap back to S0 C6 indexes to S0 (should be S3) C8 indexes to S2 (should be S4) C10 indexes to S4 (should be S5) Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5 get no work assigned to them This leads to the kernel to throw the following message: "openvswitch: cpu_id mismatch with handler threads" Instead we will send the kernel a corrected array of sockets the size of all CPUs in the system, or the largest core_id on the system, which ever one is greatest. This is to take care of systems with non-continous core cpus. In the above example we would create a corrected array in a round-robin(assuming prime bias) fashion as follows: [S0, S1, S2, S3, S4, S5, S6, S0, S1, S2, S3, S4] Fixes: b1e517bd2f81 ("dpif-netlink: Introduce per-cpu upcall dispatch.") Co-authored-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Michael Santana <msantana@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
author: Michael Santana <msantana@redhat.com> 2022-08-09 03:18:15 -0400
committer: Ilya Maximets <i.maximets@ovn.org> 2022-08-15 19:44:58 +0200
commit: 8a1b734480dfe1a78a2ea3e76918b6e2964fb07e (patch)
tree: 5f31668b9f4c90dd1cf3aba4156d57ca7420359b
parent: 713072fdacd544bdd0614fe1e3163ba92988cba7 (diff)
download: openvswitch-8a1b734480dfe1a78a2ea3e76918b6e2964fb07e.tar.gz
3 files changed, 57 insertions, 7 deletions
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 4e51b6e4b..8c8063f06 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -801,14 +801,28 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
                               uint32_t n_upcall_pids)
 {
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
+    int largest_cpu_id = ovs_numa_get_largest_core_id();
     struct dpif_netlink_dp request, reply;
     struct ofpbuf *bufp;
-    int error;
-    int n_cores;
 
-    n_cores = count_cpu_cores();
-    ovs_assert(n_cores == n_upcall_pids);
-    VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores);
+    uint32_t *corrected;
+    int error, i, n_cores;
+
+    if (largest_cpu_id == OVS_NUMA_UNSPEC) {
+        largest_cpu_id = -1;
+    }
+
+    /* Some systems have non-continuous cpu core ids.  count_total_cores()
+     * would return an accurate number, however, this number cannot be used.
+     * e.g. If the largest core_id of a system is cpu9, but the system only
+     * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
+     * warning.  With the MAX() in place in this example we send an array of
+     * size 10 and prevent the warning.  This has no bearing on the number of
+     * threads created.
+     */
+    n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
+    VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
+             n_cores);
 
     dpif_netlink_dp_init(&request);
     request.cmd = OVS_DP_CMD_SET;
@@ -817,7 +831,12 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
     request.user_features = dpif->user_features |
                             OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
 
-    request.upcall_pids = upcall_pids;
+    corrected = xcalloc(n_cores, sizeof *corrected);
+
+    for (i = 0; i < n_cores; i++) {
+        corrected[i] = upcall_pids[i % n_upcall_pids];
+    }
+    request.upcall_pids = corrected;
     request.n_upcall_pids = n_cores;
 
     error = dpif_netlink_dp_transact(&request, &reply, &bufp);
@@ -825,9 +844,10 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
         dpif->user_features = reply.user_features;
         ofpbuf_delete(bufp);
         if (!dpif_netlink_upcall_per_cpu(dpif)) {
-            return -EOPNOTSUPP;
+            error = -EOPNOTSUPP;
         }
     }
+    free(corrected);
     return error;
 }
 
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 9e3fa5421..6a197772c 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -387,6 +387,35 @@ ovs_numa_get_n_cores_on_numa(int numa_id)
     return OVS_CORE_UNSPEC;
 }
 
+/* Returns the largest core_id.
+ *
+ * Return OVS_CORE_UNSPEC, if core_id information is not found.
+ *
+ * Returning OVS_CORE_UNSPEC comes at a caveat.  The caller function
+ * must remember to check the return value of this callee function
+ * against OVS_CORE_UNSPEC.  OVS_CORE_UNSPEC is a positive integer
+ * INT_MAX, which the caller may interpret it as the largest
+ * core_id if it's not checking for it.
+ */
+unsigned
+ovs_numa_get_largest_core_id(void)
+{
+    struct cpu_core *core;
+    unsigned max_id = 0;
+
+    if (!found_numa_and_core) {
+        return OVS_CORE_UNSPEC;
+    }
+
+    HMAP_FOR_EACH (core, hmap_node, &all_cpu_cores) {
+        if (core->core_id > max_id) {
+            max_id = core->core_id;
+        }
+    }
+
+    return max_id;
+}
+
 static struct ovs_numa_dump *
 ovs_numa_dump_create(void)
 {
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 83bd10cca..02c9e84cf 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -56,6 +56,7 @@ int ovs_numa_get_n_numas(void);
 int ovs_numa_get_n_cores(void);
 int ovs_numa_get_numa_id(unsigned core_id);
 int ovs_numa_get_n_cores_on_numa(int numa_id);
+unsigned ovs_numa_get_largest_core_id(void);
 struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
 struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask);
 struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n);
author	Michael Santana <msantana@redhat.com>	2022-08-09 03:18:15 -0400
committer	Ilya Maximets <i.maximets@ovn.org>	2022-08-15 19:44:58 +0200
commit	8a1b734480dfe1a78a2ea3e76918b6e2964fb07e (patch)
tree	5f31668b9f4c90dd1cf3aba4156d57ca7420359b
parent	713072fdacd544bdd0614fe1e3163ba92988cba7 (diff)
download	openvswitch-8a1b734480dfe1a78a2ea3e76918b6e2964fb07e.tar.gz