diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-05-06 19:18:12 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-05-06 19:18:12 +0200 |
commit | 0500f69fb0163a9ca53c7f522b8ca3397f53c11c (patch) | |
tree | dc8f059ef9d3455fc56a35f876fdb679c1f6e569 | |
parent | 186fe8be4089bc3bfd870e64f8621187312de77c (diff) | |
download | linux-rt-0500f69fb0163a9ca53c7f522b8ca3397f53c11c.tar.gz |
[ANNOUNCE] v5.18-rc5-rt5v5.18-rc5-rt5-patches
Dear RT folks!
I'm pleased to announce the v5.18-rc5-rt5 patch set.
Changes since v5.18-rc5-rt4:
- Replace the ptrace patch with v4 of Eric W. Biederman's rework plus
a smaller patch on top to get it to work.
- Drop get_cpu_light from the series:
- Replace the crypto/cryptd patch with an alternative version.
- Replace the nfs/sunrpc patch with an alternative version.
- Replace the vmalloc related patch with an alternative version.
- Replace the block-mq related patch with an alternative version.
- Replace the scsi/fcoe patch with two patches by Davidlohr Bueso plus
two additional patches in order to tackle two remaining issues in
that area.
- Drop the preempt-disable -> migrate-disable replacement in kvm on
arm64. It seems no longer to be needed.
- Drop the "delay sending signals in atomic context" on x86/arm64. It
is not needed to send a SIGTRAP for a breakpoint on x86 (the
original motivation). Lets see if something else pops up. This might
break ARM64 in that regard and will be investigated in a follow up
release.
Known issues
- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com
The delta patch against v5.18-rc5-rt4 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.18/incr/patch-5.18-rc5-rt4-rt5.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.18-rc5-rt5
The RT patch against v5.18-rc5 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patch-5.18-rc5-rt5.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rc5-rt5.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
38 files changed, 2470 insertions, 750 deletions
diff --git a/patches/0001-sched-Fix-missing-prototype-warnings.patch b/patches/0001-sched-Fix-missing-prototype-warnings.patch index 25995095d3aa..dee84b170761 100644 --- a/patches/0001-sched-Fix-missing-prototype-warnings.patch +++ b/patches/0001-sched-Fix-missing-prototype-warnings.patch @@ -23,7 +23,7 @@ Link: https://lore.kernel.org/r/20220413133024.249118058@linutronix.de --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -2364,4 +2364,6 @@ static inline void sched_core_free(struc +@@ -2362,4 +2362,6 @@ static inline void sched_core_free(struc static inline void sched_core_fork(struct task_struct *p) { } #endif diff --git a/patches/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch b/patches/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch new file mode 100644 index 000000000000..075584b5dc47 --- /dev/null +++ b/patches/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch @@ -0,0 +1,69 @@ +From: Davidlohr Bueso <dave@stgolabs.net> +Date: Fri, 6 May 2022 12:57:55 +0200 +Subject: [PATCH 1/4] scsi: fcoe: Add a local_lock to fcoe_percpu + +fcoe_get_paged_crc_eof() relies on the caller having preemption +disabled to ensure the per-CPU fcoe_percpu context remains valid +throughout the call. This is done by either holding spinlocks +(such as bnx2fc_global_lock or qedf_global_lock) or the get_cpu() +from fcoe_alloc_paged_crc_eof(). This last one breaks PREEMPT_RT +semantics as there can be memory allocation and end up sleeping +in atomic contexts. + +Introduce a local_lock_t to struct fcoe_percpu that will keep the +non-RT case the same, mapping to preempt_disable/enable, while +RT will use a per-CPU spinlock allowing the region to be preemptible +but still maintain CPU locality. The other users of fcoe_percpu +are already safe in this regard and do not require local_lock()ing. + +Signed-off-by: Davidlohr Bueso <dbueso@suse.de> +Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20211117025956.79616-3-dave@stgolabs.net +Link: https://lore.kernel.org/r/20220506105758.283887-2-bigeasy@linutronix.de +--- + drivers/scsi/fcoe/fcoe.c | 6 ++++-- + include/scsi/libfcoe.h | 2 ++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/fcoe/fcoe.c ++++ b/drivers/scsi/fcoe/fcoe.c +@@ -1453,9 +1453,10 @@ static int fcoe_alloc_paged_crc_eof(stru + struct fcoe_percpu_s *fps; + int rc; + +- fps = &get_cpu_var(fcoe_percpu); ++ local_lock(&fcoe_percpu.lock); ++ fps = this_cpu_ptr(&fcoe_percpu); + rc = fcoe_get_paged_crc_eof(skb, tlen, fps); +- put_cpu_var(fcoe_percpu); ++ local_unlock(&fcoe_percpu.lock); + + return rc; + } +@@ -2488,6 +2489,7 @@ static int __init fcoe_init(void) + p = per_cpu_ptr(&fcoe_percpu, cpu); + INIT_WORK(&p->work, fcoe_receive_work); + skb_queue_head_init(&p->fcoe_rx_list); ++ local_lock_init(&p->lock); + } + + /* Setup link change notification */ +--- a/include/scsi/libfcoe.h ++++ b/include/scsi/libfcoe.h +@@ -14,6 +14,7 @@ + #include <linux/netdevice.h> + #include <linux/skbuff.h> + #include <linux/workqueue.h> ++#include <linux/local_lock.h> + #include <linux/random.h> + #include <scsi/fc/fc_fcoe.h> + #include <scsi/libfc.h> +@@ -326,6 +327,7 @@ struct fcoe_percpu_s { + struct sk_buff_head fcoe_rx_list; + struct page *crc_eof_page; + int crc_eof_offset; ++ local_lock_t lock; + }; + + /** diff --git a/patches/0001-signal-Rename-send_signal-send_signal_locked.patch b/patches/0001-signal-Rename-send_signal-send_signal_locked.patch new file mode 100644 index 000000000000..2f1ba470b873 --- /dev/null +++ b/patches/0001-signal-Rename-send_signal-send_signal_locked.patch @@ -0,0 +1,123 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:34 -0500 +Subject: [PATCH 01/12] signal: Rename send_signal send_signal_locked + +Rename send_signal and __send_signal to send_signal_locked and +__send_signal_locked to make send_signal usable outside of +signal.c. + +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-1-ebiederm@xmission.com +--- + include/linux/signal.h | 2 ++ + kernel/signal.c | 24 ++++++++++++------------ + 2 files changed, 14 insertions(+), 12 deletions(-) + +--- a/include/linux/signal.h ++++ b/include/linux/signal.h +@@ -283,6 +283,8 @@ extern int do_send_sig_info(int sig, str + extern int group_send_sig_info(int sig, struct kernel_siginfo *info, + struct task_struct *p, enum pid_type type); + extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); ++extern int send_signal_locked(int sig, struct kernel_siginfo *info, ++ struct task_struct *p, enum pid_type type); + extern int sigprocmask(int, sigset_t *, sigset_t *); + extern void set_current_blocked(sigset_t *); + extern void __set_current_blocked(const sigset_t *); +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -1071,8 +1071,8 @@ static inline bool legacy_queue(struct s + return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); + } + +-static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, +- enum pid_type type, bool force) ++static int __send_signal_locked(int sig, struct kernel_siginfo *info, ++ struct task_struct *t, enum pid_type type, bool force) + { + struct sigpending *pending; + struct sigqueue *q; +@@ -1212,8 +1212,8 @@ static inline bool has_si_pid_and_uid(st + return ret; + } + +-static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, +- enum pid_type type) ++int send_signal_locked(int sig, struct kernel_siginfo *info, ++ struct task_struct *t, enum pid_type type) + { + /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ + bool force = false; +@@ -1245,7 +1245,7 @@ static int send_signal(int sig, struct k + force = true; + } + } +- return __send_signal(sig, info, t, type, force); ++ return __send_signal_locked(sig, info, t, type, force); + } + + static void print_fatal_signal(int signr) +@@ -1284,7 +1284,7 @@ static int __init setup_print_fatal_sign + int + __group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) + { +- return send_signal(sig, info, p, PIDTYPE_TGID); ++ return send_signal_locked(sig, info, p, PIDTYPE_TGID); + } + + int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, +@@ -1294,7 +1294,7 @@ int do_send_sig_info(int sig, struct ker + int ret = -ESRCH; + + if (lock_task_sighand(p, &flags)) { +- ret = send_signal(sig, info, p, type); ++ ret = send_signal_locked(sig, info, p, type); + unlock_task_sighand(p, &flags); + } + +@@ -1347,7 +1347,7 @@ force_sig_info_to_task(struct kernel_sig + if (action->sa.sa_handler == SIG_DFL && + (!t->ptrace || (handler == HANDLER_EXIT))) + t->signal->flags &= ~SIGNAL_UNKILLABLE; +- ret = send_signal(sig, info, t, PIDTYPE_PID); ++ ret = send_signal_locked(sig, info, t, PIDTYPE_PID); + spin_unlock_irqrestore(&t->sighand->siglock, flags); + + return ret; +@@ -1567,7 +1567,7 @@ int kill_pid_usb_asyncio(int sig, int er + + if (sig) { + if (lock_task_sighand(p, &flags)) { +- ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false); ++ ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); + unlock_task_sighand(p, &flags); + } else + ret = -ESRCH; +@@ -2103,7 +2103,7 @@ bool do_notify_parent(struct task_struct + * parent's namespaces. + */ + if (valid_signal(sig) && sig) +- __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false); ++ __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); + __wake_up_parent(tsk, tsk->parent); + spin_unlock_irqrestore(&psig->siglock, flags); + +@@ -2601,7 +2601,7 @@ static int ptrace_signal(int signr, kern + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr) || + fatal_signal_pending(current)) { +- send_signal(signr, info, current, type); ++ send_signal_locked(signr, info, current, type); + signr = 0; + } + +@@ -4793,7 +4793,7 @@ void kdb_send_sig(struct task_struct *t, + "the deadlock.\n"); + return; + } +- ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); ++ ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); + spin_unlock(&t->sighand->siglock); + if (ret) + kdb_printf("Fail to deliver Signal %d to process %d.\n", diff --git a/patches/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch b/patches/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch new file mode 100644 index 000000000000..a6e7c6c558cd --- /dev/null +++ b/patches/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch @@ -0,0 +1,516 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 6 May 2022 12:57:56 +0200 +Subject: [PATCH 2/4] scsi: fcoe: Use per-CPU API to update per-CPU statistics. + +The per-CPU statistics (struct fc_stats) is updated by getting a stable +per-CPU pointer via get_cpu() + per_cpu_ptr() and then performing the +increment. This can be optimized by using this_cpu_*() which will do +whatever is needed on the architecture to perform the update safe and +efficient. +The read out of the individual value (fc_get_host_stats()) should be +done by using READ_ONCE() instead of a plain-C access. The difference is +that READ_ONCE() will always perform a single access while the plain-C +access can be splitt by the compiler into two loads if it appears +beneficial. +The usage of u64 has the side-effect that it is also 64bit wide on 32bit +architectures and the read is always split into two loads. The can lead +to strange values if the read happens during an update which alters both +32bit parts of the 64bit value. This can be circumvanted by either using +a 32bit variables on 32bit architecures or extending the statistics with +a sequence counter. + +Use this_cpu_*() API to update the statistics and READ_ONCE() to read +it. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220506105758.283887-3-bigeasy@linutronix.de +--- + drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 22 ++++++---------------- + drivers/scsi/bnx2fc/bnx2fc_io.c | 13 +++++-------- + drivers/scsi/fcoe/fcoe.c | 36 ++++++++++++------------------------ + drivers/scsi/fcoe/fcoe_ctlr.c | 26 +++++++++----------------- + drivers/scsi/fcoe/fcoe_transport.c | 6 +++--- + drivers/scsi/libfc/fc_fcp.c | 29 ++++++++++------------------- + drivers/scsi/libfc/fc_lport.c | 30 +++++++++++++++--------------- + drivers/scsi/qedf/qedf_main.c | 7 ++----- + 8 files changed, 62 insertions(+), 107 deletions(-) + +--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c ++++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +@@ -273,7 +273,6 @@ static int bnx2fc_xmit(struct fc_lport * + struct fcoe_port *port; + struct fcoe_hdr *hp; + struct bnx2fc_rport *tgt; +- struct fc_stats *stats; + u8 sof, eof; + u32 crc; + unsigned int hlen, tlen, elen; +@@ -399,10 +398,8 @@ static int bnx2fc_xmit(struct fc_lport * + } + + /*update tx stats */ +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->TxFrames++; +- stats->TxWords += wlen; +- put_cpu(); ++ this_cpu_inc(lport->stats->TxFrames); ++ this_cpu_add(lport->stats->TxWords, wlen); + + /* send down to lld */ + fr_dev(fp) = lport; +@@ -512,7 +509,6 @@ static void bnx2fc_recv_frame(struct sk_ + u32 fr_len, fr_crc; + struct fc_lport *lport; + struct fcoe_rcv_info *fr; +- struct fc_stats *stats; + struct fc_frame_header *fh; + struct fcoe_crc_eof crc_eof; + struct fc_frame *fp; +@@ -543,10 +539,8 @@ static void bnx2fc_recv_frame(struct sk_ + skb_pull(skb, sizeof(struct fcoe_hdr)); + fr_len = skb->len - sizeof(struct fcoe_crc_eof); + +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->RxFrames++; +- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; +- put_cpu(); ++ this_cpu_inc(lport->stats->RxFrames); ++ this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); + + fp = (struct fc_frame *)skb; + fc_frame_init(fp); +@@ -633,9 +627,7 @@ static void bnx2fc_recv_frame(struct sk_ + fr_crc = le32_to_cpu(fr_crc(fp)); + + if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- crc_err = (stats->InvalidCRCCount++); +- put_cpu(); ++ crc_err = this_cpu_inc_return(lport->stats->InvalidCRCCount); + if (crc_err < 5) + printk(KERN_WARNING PFX "dropping frame with " + "CRC error\n"); +@@ -964,9 +956,7 @@ static void bnx2fc_indicate_netevent(voi + mutex_unlock(&lport->lp_mutex); + fc_host_port_type(lport->host) = + FC_PORTTYPE_UNKNOWN; +- per_cpu_ptr(lport->stats, +- get_cpu())->LinkFailureCount++; +- put_cpu(); ++ this_cpu_inc(lport->stats->LinkFailureCount); + fcoe_clean_pending_queue(lport); + wait_for_upload = 1; + } +--- a/drivers/scsi/bnx2fc/bnx2fc_io.c ++++ b/drivers/scsi/bnx2fc/bnx2fc_io.c +@@ -2032,7 +2032,6 @@ int bnx2fc_post_io_req(struct bnx2fc_rpo + struct bnx2fc_interface *interface = port->priv; + struct bnx2fc_hba *hba = interface->hba; + struct fc_lport *lport = port->lport; +- struct fc_stats *stats; + int task_idx, index; + u16 xid; + +@@ -2045,20 +2044,18 @@ int bnx2fc_post_io_req(struct bnx2fc_rpo + io_req->data_xfer_len = scsi_bufflen(sc_cmd); + bnx2fc_priv(sc_cmd)->io_req = io_req; + +- stats = per_cpu_ptr(lport->stats, get_cpu()); + if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { + io_req->io_req_flags = BNX2FC_READ; +- stats->InputRequests++; +- stats->InputBytes += io_req->data_xfer_len; ++ this_cpu_inc(lport->stats->InputRequests); ++ this_cpu_add(lport->stats->InputBytes, io_req->data_xfer_len); + } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { + io_req->io_req_flags = BNX2FC_WRITE; +- stats->OutputRequests++; +- stats->OutputBytes += io_req->data_xfer_len; ++ this_cpu_inc(lport->stats->OutputRequests); ++ this_cpu_add(lport->stats->OutputBytes, io_req->data_xfer_len); + } else { + io_req->io_req_flags = 0; +- stats->ControlRequests++; ++ this_cpu_inc(lport->stats->ControlRequests); + } +- put_cpu(); + + xid = io_req->xid; + +--- a/drivers/scsi/fcoe/fcoe.c ++++ b/drivers/scsi/fcoe/fcoe.c +@@ -1434,8 +1434,7 @@ static int fcoe_rcv(struct sk_buff *skb, + + return NET_RX_SUCCESS; + err: +- per_cpu_ptr(lport->stats, get_cpu())->ErrorFrames++; +- put_cpu(); ++ this_cpu_inc(lport->stats->ErrorFrames); + err2: + kfree_skb(skb); + return NET_RX_DROP; +@@ -1475,7 +1474,6 @@ static int fcoe_xmit(struct fc_lport *lp + struct ethhdr *eh; + struct fcoe_crc_eof *cp; + struct sk_buff *skb; +- struct fc_stats *stats; + struct fc_frame_header *fh; + unsigned int hlen; /* header length implies the version */ + unsigned int tlen; /* trailer length */ +@@ -1586,10 +1584,8 @@ static int fcoe_xmit(struct fc_lport *lp + skb_shinfo(skb)->gso_size = 0; + } + /* update tx stats: regardless if LLD fails */ +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->TxFrames++; +- stats->TxWords += wlen; +- put_cpu(); ++ this_cpu_inc(lport->stats->TxFrames); ++ this_cpu_add(lport->stats->TxWords, wlen); + + /* send down to lld */ + fr_dev(fp) = lport; +@@ -1611,7 +1607,6 @@ static inline int fcoe_filter_frames(str + struct fcoe_interface *fcoe; + struct fc_frame_header *fh; + struct sk_buff *skb = (struct sk_buff *)fp; +- struct fc_stats *stats; + + /* + * We only check CRC if no offload is available and if it is +@@ -1641,11 +1636,8 @@ static inline int fcoe_filter_frames(str + return 0; + } + +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->InvalidCRCCount++; +- if (stats->InvalidCRCCount < 5) ++ if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < 5) + printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); +- put_cpu(); + return -EINVAL; + } + +@@ -1658,7 +1650,6 @@ static void fcoe_recv_frame(struct sk_bu + u32 fr_len; + struct fc_lport *lport; + struct fcoe_rcv_info *fr; +- struct fc_stats *stats; + struct fcoe_crc_eof crc_eof; + struct fc_frame *fp; + struct fcoe_hdr *hp; +@@ -1686,9 +1677,11 @@ static void fcoe_recv_frame(struct sk_bu + */ + hp = (struct fcoe_hdr *) skb_network_header(skb); + +- stats = per_cpu_ptr(lport->stats, get_cpu()); + if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { +- if (stats->ErrorFrames < 5) ++ struct fc_stats *stats; ++ ++ stats = per_cpu_ptr(lport->stats, raw_smp_processor_id()); ++ if (READ_ONCE(stats->ErrorFrames) < 5) + printk(KERN_WARNING "fcoe: FCoE version " + "mismatch: The frame has " + "version %x, but the " +@@ -1701,8 +1694,8 @@ static void fcoe_recv_frame(struct sk_bu + skb_pull(skb, sizeof(struct fcoe_hdr)); + fr_len = skb->len - sizeof(struct fcoe_crc_eof); + +- stats->RxFrames++; +- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; ++ this_cpu_inc(lport->stats->RxFrames); ++ this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); + + fp = (struct fc_frame *)skb; + fc_frame_init(fp); +@@ -1718,13 +1711,11 @@ static void fcoe_recv_frame(struct sk_bu + goto drop; + + if (!fcoe_filter_frames(lport, fp)) { +- put_cpu(); + fc_exch_recv(lport, fp); + return; + } + drop: +- stats->ErrorFrames++; +- put_cpu(); ++ this_cpu_inc(lport->stats->ErrorFrames); + kfree_skb(skb); + } + +@@ -1848,7 +1839,6 @@ static int fcoe_device_notification(stru + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct fcoe_ctlr *ctlr; + struct fcoe_interface *fcoe; +- struct fc_stats *stats; + u32 link_possible = 1; + u32 mfs; + int rc = NOTIFY_OK; +@@ -1922,9 +1912,7 @@ static int fcoe_device_notification(stru + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->LinkFailureCount++; +- put_cpu(); ++ this_cpu_inc(lport->stats->LinkFailureCount); + fcoe_clean_pending_queue(lport); + } + } +--- a/drivers/scsi/fcoe/fcoe_ctlr.c ++++ b/drivers/scsi/fcoe/fcoe_ctlr.c +@@ -824,22 +824,21 @@ static unsigned long fcoe_ctlr_age_fcfs( + unsigned long deadline; + unsigned long sel_time = 0; + struct list_head del_list; +- struct fc_stats *stats; + + INIT_LIST_HEAD(&del_list); + +- stats = per_cpu_ptr(fip->lp->stats, get_cpu()); +- + list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { + deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; + if (fip->sel_fcf == fcf) { + if (time_after(jiffies, deadline)) { +- stats->MissDiscAdvCount++; ++ u64 miss_cnt; ++ ++ miss_cnt = this_cpu_inc_return(fip->lp->stats->MissDiscAdvCount); + printk(KERN_INFO "libfcoe: host%d: " + "Missing Discovery Advertisement " + "for fab %16.16llx count %lld\n", + fip->lp->host->host_no, fcf->fabric_name, +- stats->MissDiscAdvCount); ++ miss_cnt); + } else if (time_after(next_timer, deadline)) + next_timer = deadline; + } +@@ -855,7 +854,7 @@ static unsigned long fcoe_ctlr_age_fcfs( + */ + list_del(&fcf->list); + list_add(&fcf->list, &del_list); +- stats->VLinkFailureCount++; ++ this_cpu_inc(fip->lp->stats->VLinkFailureCount); + } else { + if (time_after(next_timer, deadline)) + next_timer = deadline; +@@ -864,7 +863,6 @@ static unsigned long fcoe_ctlr_age_fcfs( + sel_time = fcf->time; + } + } +- put_cpu(); + + list_for_each_entry_safe(fcf, next, &del_list, list) { + /* Removes fcf from current list */ +@@ -1142,7 +1140,6 @@ static void fcoe_ctlr_recv_els(struct fc + struct fip_desc *desc; + struct fip_encaps *els; + struct fcoe_fcf *sel; +- struct fc_stats *stats; + enum fip_desc_type els_dtype = 0; + u8 els_op; + u8 sub; +@@ -1286,10 +1283,8 @@ static void fcoe_ctlr_recv_els(struct fc + fr_dev(fp) = lport; + fr_encaps(fp) = els_dtype; + +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->RxFrames++; +- stats->RxWords += skb->len / FIP_BPW; +- put_cpu(); ++ this_cpu_inc(lport->stats->RxFrames); ++ this_cpu_add(lport->stats->RxWords, skb->len / FIP_BPW); + + fc_exch_recv(lport, fp); + return; +@@ -1427,9 +1422,7 @@ static void fcoe_ctlr_recv_clr_vlink(str + ntoh24(vp->fd_fc_id)); + if (vn_port && (vn_port == lport)) { + mutex_lock(&fip->ctlr_mutex); +- per_cpu_ptr(lport->stats, +- get_cpu())->VLinkFailureCount++; +- put_cpu(); ++ this_cpu_inc(lport->stats->VLinkFailureCount); + fcoe_ctlr_reset(fip); + mutex_unlock(&fip->ctlr_mutex); + } +@@ -1457,8 +1450,7 @@ static void fcoe_ctlr_recv_clr_vlink(str + * followed by physical port + */ + mutex_lock(&fip->ctlr_mutex); +- per_cpu_ptr(lport->stats, get_cpu())->VLinkFailureCount++; +- put_cpu(); ++ this_cpu_inc(lport->stats->VLinkFailureCount); + fcoe_ctlr_reset(fip); + mutex_unlock(&fip->ctlr_mutex); + +--- a/drivers/scsi/fcoe/fcoe_transport.c ++++ b/drivers/scsi/fcoe/fcoe_transport.c +@@ -183,9 +183,9 @@ void __fcoe_get_lesb(struct fc_lport *lp + memset(lesb, 0, sizeof(*lesb)); + for_each_possible_cpu(cpu) { + stats = per_cpu_ptr(lport->stats, cpu); +- lfc += stats->LinkFailureCount; +- vlfc += stats->VLinkFailureCount; +- mdac += stats->MissDiscAdvCount; ++ lfc += READ_ONCE(stats->LinkFailureCount); ++ vlfc += READ_ONCE(stats->VLinkFailureCount); ++ mdac += READ_ONCE(stats->MissDiscAdvCount); + } + lesb->lesb_link_fail = htonl(lfc); + lesb->lesb_vlink_fail = htonl(vlfc); +--- a/drivers/scsi/libfc/fc_fcp.c ++++ b/drivers/scsi/libfc/fc_fcp.c +@@ -143,8 +143,7 @@ static struct fc_fcp_pkt *fc_fcp_pkt_all + INIT_LIST_HEAD(&fsp->list); + spin_lock_init(&fsp->scsi_pkt_lock); + } else { +- per_cpu_ptr(lport->stats, get_cpu())->FcpPktAllocFails++; +- put_cpu(); ++ this_cpu_inc(lport->stats->FcpPktAllocFails); + } + return fsp; + } +@@ -266,8 +265,7 @@ static int fc_fcp_send_abort(struct fc_f + if (!fsp->seq_ptr) + return -EINVAL; + +- per_cpu_ptr(fsp->lp->stats, get_cpu())->FcpPktAborts++; +- put_cpu(); ++ this_cpu_inc(fsp->lp->stats->FcpPktAborts); + + fsp->state |= FC_SRB_ABORT_PENDING; + rc = fc_seq_exch_abort(fsp->seq_ptr, 0); +@@ -436,8 +434,7 @@ static inline struct fc_frame *fc_fcp_fr + if (likely(fp)) + return fp; + +- per_cpu_ptr(lport->stats, get_cpu())->FcpFrameAllocFails++; +- put_cpu(); ++ this_cpu_inc(lport->stats->FcpFrameAllocFails); + /* error case */ + fc_fcp_can_queue_ramp_down(lport); + shost_printk(KERN_ERR, lport->host, +@@ -471,7 +468,6 @@ static void fc_fcp_recv_data(struct fc_f + { + struct scsi_cmnd *sc = fsp->cmd; + struct fc_lport *lport = fsp->lp; +- struct fc_stats *stats; + struct fc_frame_header *fh; + size_t start_offset; + size_t offset; +@@ -533,14 +529,12 @@ static void fc_fcp_recv_data(struct fc_f + + if (~crc != le32_to_cpu(fr_crc(fp))) { + crc_err: +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->ErrorFrames++; ++ this_cpu_inc(lport->stats->ErrorFrames); + /* per cpu count, not total count, but OK for limit */ +- if (stats->InvalidCRCCount++ < FC_MAX_ERROR_CNT) ++ if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < FC_MAX_ERROR_CNT) + printk(KERN_WARNING "libfc: CRC error on data " + "frame for port (%6.6x)\n", + lport->port_id); +- put_cpu(); + /* + * Assume the frame is total garbage. + * We may have copied it over the good part +@@ -1861,7 +1855,6 @@ int fc_queuecommand(struct Scsi_Host *sh + struct fc_fcp_pkt *fsp; + int rval; + int rc = 0; +- struct fc_stats *stats; + + rval = fc_remote_port_chkready(rport); + if (rval) { +@@ -1913,20 +1906,18 @@ int fc_queuecommand(struct Scsi_Host *sh + /* + * setup the data direction + */ +- stats = per_cpu_ptr(lport->stats, get_cpu()); + if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { + fsp->req_flags = FC_SRB_READ; +- stats->InputRequests++; +- stats->InputBytes += fsp->data_len; ++ this_cpu_inc(lport->stats->InputRequests); ++ this_cpu_add(lport->stats->InputBytes, fsp->data_len); + } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { + fsp->req_flags = FC_SRB_WRITE; +- stats->OutputRequests++; +- stats->OutputBytes += fsp->data_len; ++ this_cpu_inc(lport->stats->OutputRequests); ++ this_cpu_add(lport->stats->OutputBytes, fsp->data_len); + } else { + fsp->req_flags = 0; +- stats->ControlRequests++; ++ this_cpu_inc(lport->stats->ControlRequests); + } +- put_cpu(); + + /* + * send it to the lower layer +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -308,21 +308,21 @@ struct fc_host_statistics *fc_get_host_s + + stats = per_cpu_ptr(lport->stats, cpu); + +- fc_stats->tx_frames += stats->TxFrames; +- fc_stats->tx_words += stats->TxWords; +- fc_stats->rx_frames += stats->RxFrames; +- fc_stats->rx_words += stats->RxWords; +- fc_stats->error_frames += stats->ErrorFrames; +- fc_stats->invalid_crc_count += stats->InvalidCRCCount; +- fc_stats->fcp_input_requests += stats->InputRequests; +- fc_stats->fcp_output_requests += stats->OutputRequests; +- fc_stats->fcp_control_requests += stats->ControlRequests; +- fcp_in_bytes += stats->InputBytes; +- fcp_out_bytes += stats->OutputBytes; +- fc_stats->fcp_packet_alloc_failures += stats->FcpPktAllocFails; +- fc_stats->fcp_packet_aborts += stats->FcpPktAborts; +- fc_stats->fcp_frame_alloc_failures += stats->FcpFrameAllocFails; +- fc_stats->link_failure_count += stats->LinkFailureCount; ++ fc_stats->tx_frames += READ_ONCE(stats->TxFrames); ++ fc_stats->tx_words += READ_ONCE(stats->TxWords); ++ fc_stats->rx_frames += READ_ONCE(stats->RxFrames); ++ fc_stats->rx_words += READ_ONCE(stats->RxWords); ++ fc_stats->error_frames += READ_ONCE(stats->ErrorFrames); ++ fc_stats->invalid_crc_count += READ_ONCE(stats->InvalidCRCCount); ++ fc_stats->fcp_input_requests += READ_ONCE(stats->InputRequests); ++ fc_stats->fcp_output_requests += READ_ONCE(stats->OutputRequests); ++ fc_stats->fcp_control_requests += READ_ONCE(stats->ControlRequests); ++ fcp_in_bytes += READ_ONCE(stats->InputBytes); ++ fcp_out_bytes += READ_ONCE(stats->OutputBytes); ++ fc_stats->fcp_packet_alloc_failures += READ_ONCE(stats->FcpPktAllocFails); ++ fc_stats->fcp_packet_aborts += READ_ONCE(stats->FcpPktAborts); ++ fc_stats->fcp_frame_alloc_failures += READ_ONCE(stats->FcpFrameAllocFails); ++ fc_stats->link_failure_count += READ_ONCE(stats->LinkFailureCount); + } + fc_stats->fcp_input_megabytes = div_u64(fcp_in_bytes, 1000000); + fc_stats->fcp_output_megabytes = div_u64(fcp_out_bytes, 1000000); +--- a/drivers/scsi/qedf/qedf_main.c ++++ b/drivers/scsi/qedf/qedf_main.c +@@ -1067,7 +1067,6 @@ static int qedf_xmit(struct fc_lport *lp + u32 crc; + unsigned int hlen, tlen, elen; + int wlen; +- struct fc_stats *stats; + struct fc_lport *tmp_lport; + struct fc_lport *vn_port = NULL; + struct qedf_rport *fcport; +@@ -1215,10 +1214,8 @@ static int qedf_xmit(struct fc_lport *lp + hp->fcoe_sof = sof; + + /*update tx stats */ +- stats = per_cpu_ptr(lport->stats, get_cpu()); +- stats->TxFrames++; +- stats->TxWords += wlen; +- put_cpu(); ++ this_cpu_inc(lport->stats->TxFrames); ++ this_cpu_add(lport->stats->TxWords, wlen); + + /* Get VLAN ID from skb for printing purposes */ + __vlan_hwaccel_get_tag(skb, &vlan_tci); diff --git a/patches/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch b/patches/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch new file mode 100644 index 000000000000..797cefba2260 --- /dev/null +++ b/patches/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch @@ -0,0 +1,96 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:35 -0500 +Subject: [PATCH 02/12] signal: Replace __group_send_sig_info with + send_signal_locked + +The function __group_send_sig_info is just a light wrapper around +send_signal_locked with one parameter fixed to a constant value. As +the wrapper adds no real value update the code to directly call the +wrapped function. + +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-2-ebiederm@xmission.com +--- + drivers/tty/tty_jobctrl.c | 4 ++-- + include/linux/signal.h | 1 - + kernel/signal.c | 8 +------- + kernel/time/posix-cpu-timers.c | 6 +++--- + 4 files changed, 6 insertions(+), 13 deletions(-) + +--- a/drivers/tty/tty_jobctrl.c ++++ b/drivers/tty/tty_jobctrl.c +@@ -215,8 +215,8 @@ int tty_signal_session_leader(struct tty + spin_unlock_irq(&p->sighand->siglock); + continue; + } +- __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); +- __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); ++ send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); ++ send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); + put_pid(p->signal->tty_old_pgrp); /* A noop */ + spin_lock(&tty->ctrl.lock); + tty_pgrp = get_pid(tty->ctrl.pgrp); +--- a/include/linux/signal.h ++++ b/include/linux/signal.h +@@ -282,7 +282,6 @@ extern int do_send_sig_info(int sig, str + struct task_struct *p, enum pid_type type); + extern int group_send_sig_info(int sig, struct kernel_siginfo *info, + struct task_struct *p, enum pid_type type); +-extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); + extern int send_signal_locked(int sig, struct kernel_siginfo *info, + struct task_struct *p, enum pid_type type); + extern int sigprocmask(int, sigset_t *, sigset_t *); +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -1281,12 +1281,6 @@ static int __init setup_print_fatal_sign + + __setup("print-fatal-signals=", setup_print_fatal_signals); + +-int +-__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) +-{ +- return send_signal_locked(sig, info, p, PIDTYPE_TGID); +-} +- + int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, + enum pid_type type) + { +@@ -2173,7 +2167,7 @@ static void do_notify_parent_cldstop(str + spin_lock_irqsave(&sighand->siglock, flags); + if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && + !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) +- __group_send_sig_info(SIGCHLD, &info, parent); ++ send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); + /* + * Even if SIGCHLD is not generated, we must wake up wait4 calls. + */ +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -870,7 +870,7 @@ static inline void check_dl_overrun(stru + { + if (tsk->dl.dl_overrun) { + tsk->dl.dl_overrun = 0; +- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); ++ send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); + } + } + +@@ -884,7 +884,7 @@ static bool check_rlimit(u64 time, u64 l + rt ? "RT" : "CPU", hard ? "hard" : "soft", + current->comm, task_pid_nr(current)); + } +- __group_send_sig_info(signo, SEND_SIG_PRIV, current); ++ send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); + return true; + } + +@@ -958,7 +958,7 @@ static void check_cpu_itimer(struct task + trace_itimer_expire(signo == SIGPROF ? + ITIMER_PROF : ITIMER_VIRTUAL, + task_tgid(tsk), cur_time); +- __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); ++ send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); + } + + if (it->expires && it->expires < *expires) diff --git a/patches/0003-ptrace-um-Replace-PT_DTRACE-with-TIF_SINGLESTEP.patch b/patches/0003-ptrace-um-Replace-PT_DTRACE-with-TIF_SINGLESTEP.patch new file mode 100644 index 000000000000..150d99b8ab84 --- /dev/null +++ b/patches/0003-ptrace-um-Replace-PT_DTRACE-with-TIF_SINGLESTEP.patch @@ -0,0 +1,133 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:36 -0500 +Subject: [PATCH 03/12] ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP + +User mode linux is the last user of the PT_DTRACE flag. Using the flag to indicate +single stepping is a little confusing and worse changing tsk->ptrace without locking +could potentionally cause problems. + +So use a thread info flag with a better name instead of flag in tsk->ptrace. + +Remove the definition PT_DTRACE as uml is the last user. + +Cc: stable@vger.kernel.org +Acked-by: Johannes Berg <johannes@sipsolutions.net> +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-3-ebiederm@xmission.com +--- + arch/um/include/asm/thread_info.h | 2 ++ + arch/um/kernel/exec.c | 2 +- + arch/um/kernel/process.c | 2 +- + arch/um/kernel/ptrace.c | 8 ++++---- + arch/um/kernel/signal.c | 4 ++-- + include/linux/ptrace.h | 1 - + 6 files changed, 10 insertions(+), 9 deletions(-) + +--- a/arch/um/include/asm/thread_info.h ++++ b/arch/um/include/asm/thread_info.h +@@ -60,6 +60,7 @@ static inline struct thread_info *curren + #define TIF_RESTORE_SIGMASK 7 + #define TIF_NOTIFY_RESUME 8 + #define TIF_SECCOMP 9 /* secure computing */ ++#define TIF_SINGLESTEP 10 /* single stepping userspace */ + + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +@@ -68,5 +69,6 @@ static inline struct thread_info *curren + #define _TIF_MEMDIE (1 << TIF_MEMDIE) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) ++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) + + #endif +--- a/arch/um/kernel/exec.c ++++ b/arch/um/kernel/exec.c +@@ -43,7 +43,7 @@ void start_thread(struct pt_regs *regs, + { + PT_REGS_IP(regs) = eip; + PT_REGS_SP(regs) = esp; +- current->ptrace &= ~PT_DTRACE; ++ clear_thread_flag(TIF_SINGLESTEP); + #ifdef SUBARCH_EXECVE1 + SUBARCH_EXECVE1(regs->regs); + #endif +--- a/arch/um/kernel/process.c ++++ b/arch/um/kernel/process.c +@@ -335,7 +335,7 @@ int singlestepping(void * t) + { + struct task_struct *task = t ? t : current; + +- if (!(task->ptrace & PT_DTRACE)) ++ if (!test_thread_flag(TIF_SINGLESTEP)) + return 0; + + if (task->thread.singlestep_syscall) +--- a/arch/um/kernel/ptrace.c ++++ b/arch/um/kernel/ptrace.c +@@ -11,7 +11,7 @@ + + void user_enable_single_step(struct task_struct *child) + { +- child->ptrace |= PT_DTRACE; ++ set_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + + #ifdef SUBARCH_SET_SINGLESTEPPING +@@ -21,7 +21,7 @@ void user_enable_single_step(struct task + + void user_disable_single_step(struct task_struct *child) + { +- child->ptrace &= ~PT_DTRACE; ++ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + + #ifdef SUBARCH_SET_SINGLESTEPPING +@@ -120,7 +120,7 @@ static void send_sigtrap(struct uml_pt_r + } + + /* +- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and ++ * XXX Check TIF_SINGLESTEP for singlestepping check and + * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check + */ + int syscall_trace_enter(struct pt_regs *regs) +@@ -144,7 +144,7 @@ void syscall_trace_leave(struct pt_regs + audit_syscall_exit(regs); + + /* Fake a debug trap */ +- if (ptraced & PT_DTRACE) ++ if (test_thread_flag(TIF_SINGLESTEP)) + send_sigtrap(®s->regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) +--- a/arch/um/kernel/signal.c ++++ b/arch/um/kernel/signal.c +@@ -53,7 +53,7 @@ static void handle_signal(struct ksignal + unsigned long sp; + int err; + +- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) ++ if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + + /* Did we come from a system call? */ +@@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) + * on the host. The tracing thread will check this flag and + * PTRACE_SYSCALL if necessary. + */ +- if (current->ptrace & PT_DTRACE) ++ if (test_thread_flag(TIF_SINGLESTEP)) + current->thread.singlestep_syscall = + is_syscall(PT_REGS_IP(¤t->thread.regs)); + +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_ + + #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ + #define PT_PTRACED 0x00000001 +-#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ + + #define PT_OPT_FLAG_SHIFT 3 + /* PT_TRACE_* event enable flags */ diff --git a/patches/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch b/patches/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch new file mode 100644 index 000000000000..6cc4c9121000 --- /dev/null +++ b/patches/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch @@ -0,0 +1,43 @@ +From: Davidlohr Bueso <dave@stgolabs.net> +Date: Fri, 6 May 2022 12:57:57 +0200 +Subject: [PATCH 3/4] scsi: libfc: Remove get_cpu() semantics in + fc_exch_em_alloc() + +The get_cpu() in fc_exch_em_alloc() was introduced in: + + f018b73af6db ([SCSI] libfc, libfcoe, fcoe: use smp_processor_id() only when preempt disabled) + +for no other reason than to simply use smp_processor_id() +without getting a warning, because everything is done with +the pool->lock held anyway. However, get_cpu(), by disabling +preemption, does not play well with PREEMPT_RT, particularly +when acquiring a regular (and thus sleepable) spinlock. + +Therefore remove the get_cpu() and just use the unstable value +as we will have CPU locality guarantees next by taking the lock. +The window of migration, as noted by Sebastian, is small and +even if it happens the result is correct. + +Signed-off-by: Davidlohr Bueso <dbueso@suse.de> +Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20211117025956.79616-2-dave@stgolabs.net +Link: https://lore.kernel.org/r/20220506105758.283887-4-bigeasy@linutronix.de +--- + drivers/scsi/libfc/fc_exch.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -825,10 +825,9 @@ static struct fc_exch *fc_exch_em_alloc( + } + memset(ep, 0, sizeof(*ep)); + +- cpu = get_cpu(); ++ cpu = raw_smp_processor_id(); + pool = per_cpu_ptr(mp->pool, cpu); + spin_lock_bh(&pool->lock); +- put_cpu(); + + /* peek cache of free slot */ + if (pool->left != FC_XID_UNKNOWN) { diff --git a/patches/0004-ptrace-xtensa-Replace-PT_SINGLESTEP-with-TIF_SINGLES.patch b/patches/0004-ptrace-xtensa-Replace-PT_SINGLESTEP-with-TIF_SINGLES.patch new file mode 100644 index 000000000000..2ca6f66a3805 --- /dev/null +++ b/patches/0004-ptrace-xtensa-Replace-PT_SINGLESTEP-with-TIF_SINGLES.patch @@ -0,0 +1,77 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:37 -0500 +Subject: [PATCH 04/12] ptrace/xtensa: Replace PT_SINGLESTEP with + TIF_SINGLESTEP + +xtensa is the last user of the PT_SINGLESTEP flag. Changing tsk->ptrace in +user_enable_single_step and user_disable_single_step without locking could +potentiallly cause problems. + +So use a thread info flag instead of a flag in tsk->ptrace. Use TIF_SINGLESTEP +that xtensa already had defined but unused. + +Remove the definitions of PT_SINGLESTEP and PT_BLOCKSTEP as they have no more users. + +Cc: stable@vger.kernel.org +Acked-by: Max Filippov <jcmvbkbc@gmail.com> +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-4-ebiederm@xmission.com +--- + arch/xtensa/kernel/ptrace.c | 4 ++-- + arch/xtensa/kernel/signal.c | 4 ++-- + include/linux/ptrace.h | 6 ------ + 3 files changed, 4 insertions(+), 10 deletions(-) + +--- a/arch/xtensa/kernel/ptrace.c ++++ b/arch/xtensa/kernel/ptrace.c +@@ -225,12 +225,12 @@ const struct user_regset_view *task_user + + void user_enable_single_step(struct task_struct *child) + { +- child->ptrace |= PT_SINGLESTEP; ++ set_tsk_thread_flag(child, TIF_SINGLESTEP); + } + + void user_disable_single_step(struct task_struct *child) + { +- child->ptrace &= ~PT_SINGLESTEP; ++ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + } + + /* +--- a/arch/xtensa/kernel/signal.c ++++ b/arch/xtensa/kernel/signal.c +@@ -473,7 +473,7 @@ static void do_signal(struct pt_regs *re + /* Set up the stack frame */ + ret = setup_frame(&ksig, sigmask_to_save(), regs); + signal_setup_done(ret, &ksig, 0); +- if (current->ptrace & PT_SINGLESTEP) ++ if (test_thread_flag(TIF_SINGLESTEP)) + task_pt_regs(current)->icountlevel = 1; + + return; +@@ -499,7 +499,7 @@ static void do_signal(struct pt_regs *re + /* If there's no signal to deliver, we just restore the saved mask. */ + restore_saved_sigmask(); + +- if (current->ptrace & PT_SINGLESTEP) ++ if (test_thread_flag(TIF_SINGLESTEP)) + task_pt_regs(current)->icountlevel = 1; + return; + } +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -46,12 +46,6 @@ extern int ptrace_access_vm(struct task_ + #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) + #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) + +-/* single stepping state bits (used on ARM and PA-RISC) */ +-#define PT_SINGLESTEP_BIT 31 +-#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) +-#define PT_BLOCKSTEP_BIT 30 +-#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) +- + extern long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data); + extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); diff --git a/patches/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch b/patches/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch new file mode 100644 index 000000000000..25e7e9f4a1e6 --- /dev/null +++ b/patches/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch @@ -0,0 +1,55 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 6 May 2022 12:57:58 +0200 +Subject: [PATCH 4/4] scsi: bnx2fc: Avoid using get_cpu() in + bnx2fc_cmd_alloc(). + +Using get_cpu() leads to disabling preemption and in this context it is +not possible to acquire the following spinlock_t on PREEMPT_RT because +it becomes a sleeping lock. + +Commit + 0ea5c27583e1c ("[SCSI] bnx2fc: common free list for cleanup commands") + +says that it is using get_cpu() as a fix in case the CPU is preempted. +While this might be true, the important part is that it is now using the +same CPU for locking and unlocking while previously it always relied on +smp_processor_id(). +The date structure itself is protected with a lock so it does not rely +on CPU-local access. + +Replace get_cpu() with raw_smp_processor_id() to obtain the current CPU +number which is used as an index for the per-CPU resource. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220506105758.283887-5-bigeasy@linutronix.de +--- + drivers/scsi/bnx2fc/bnx2fc_io.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/scsi/bnx2fc/bnx2fc_io.c ++++ b/drivers/scsi/bnx2fc/bnx2fc_io.c +@@ -472,7 +472,7 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru + u32 free_sqes; + u32 max_sqes; + u16 xid; +- int index = get_cpu(); ++ int index = raw_smp_processor_id(); + + max_sqes = BNX2FC_SCSI_MAX_SQES; + /* +@@ -485,7 +485,6 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru + (tgt->num_active_ios.counter >= max_sqes) || + (free_sqes + max_sqes <= BNX2FC_SQ_WQES_MAX)) { + spin_unlock_bh(&cmd_mgr->free_list_lock[index]); +- put_cpu(); + return NULL; + } + +@@ -498,7 +497,6 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru + atomic_inc(&tgt->num_active_ios); + atomic_dec(&tgt->free_sqes); + spin_unlock_bh(&cmd_mgr->free_list_lock[index]); +- put_cpu(); + + INIT_LIST_HEAD(&io_req->link); + diff --git a/patches/0005-ptrace-Remove-arch_ptrace_attach.patch b/patches/0005-ptrace-Remove-arch_ptrace_attach.patch new file mode 100644 index 000000000000..e9e518ef7cdf --- /dev/null +++ b/patches/0005-ptrace-Remove-arch_ptrace_attach.patch @@ -0,0 +1,157 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:38 -0500 +Subject: [PATCH 05/12] ptrace: Remove arch_ptrace_attach + +The last remaining implementation of arch_ptrace_attach is ia64's +ptrace_attach_sync_user_rbs which was added at the end of 2007 in +commit aa91a2e90044 ("[IA64] Synchronize RBS on PTRACE_ATTACH"). + +Reading the comments and examining the code ptrace_attach_sync_user_rbs +has the sole purpose of saving registers to the stack when ptrace_attach +changes TASK_STOPPED to TASK_TRACED. In all other cases arch_ptrace_stop +takes care of the register saving. + +In commit d79fdd6d96f4 ("ptrace: Clean transitions between TASK_STOPPED and TRACED") +modified ptrace_attach to wake up the thread and enter ptrace_stop normally even +when the thread starts out stopped. + +This makes ptrace_attach_sync_user_rbs completely unnecessary. So just +remove it. + +Cc: linux-ia64@vger.kernel.org +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-5-ebiederm@xmission.com +--- + arch/ia64/include/asm/ptrace.h | 4 -- + arch/ia64/kernel/ptrace.c | 57 ----------------------------------------- + kernel/ptrace.c | 18 ------------ + 3 files changed, 79 deletions(-) + +--- a/arch/ia64/include/asm/ptrace.h ++++ b/arch/ia64/include/asm/ptrace.h +@@ -139,10 +139,6 @@ static inline long regs_return_value(str + #define arch_ptrace_stop_needed() \ + (!test_thread_flag(TIF_RESTORE_RSE)) + +- extern void ptrace_attach_sync_user_rbs (struct task_struct *); +- #define arch_ptrace_attach(child) \ +- ptrace_attach_sync_user_rbs(child) +- + #define arch_has_single_step() (1) + #define arch_has_block_step() (1) + +--- a/arch/ia64/kernel/ptrace.c ++++ b/arch/ia64/kernel/ptrace.c +@@ -618,63 +618,6 @@ void ia64_sync_krbs(void) + } + + /* +- * After PTRACE_ATTACH, a thread's register backing store area in user +- * space is assumed to contain correct data whenever the thread is +- * stopped. arch_ptrace_stop takes care of this on tracing stops. +- * But if the child was already stopped for job control when we attach +- * to it, then it might not ever get into ptrace_stop by the time we +- * want to examine the user memory containing the RBS. +- */ +-void +-ptrace_attach_sync_user_rbs (struct task_struct *child) +-{ +- int stopped = 0; +- struct unw_frame_info info; +- +- /* +- * If the child is in TASK_STOPPED, we need to change that to +- * TASK_TRACED momentarily while we operate on it. This ensures +- * that the child won't be woken up and return to user mode while +- * we are doing the sync. (It can only be woken up for SIGKILL.) +- */ +- +- read_lock(&tasklist_lock); +- if (child->sighand) { +- spin_lock_irq(&child->sighand->siglock); +- if (READ_ONCE(child->__state) == TASK_STOPPED && +- !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { +- set_notify_resume(child); +- +- WRITE_ONCE(child->__state, TASK_TRACED); +- stopped = 1; +- } +- spin_unlock_irq(&child->sighand->siglock); +- } +- read_unlock(&tasklist_lock); +- +- if (!stopped) +- return; +- +- unw_init_from_blocked_task(&info, child); +- do_sync_rbs(&info, ia64_sync_user_rbs); +- +- /* +- * Now move the child back into TASK_STOPPED if it should be in a +- * job control stop, so that SIGCONT can be used to wake it up. +- */ +- read_lock(&tasklist_lock); +- if (child->sighand) { +- spin_lock_irq(&child->sighand->siglock); +- if (READ_ONCE(child->__state) == TASK_TRACED && +- (child->signal->flags & SIGNAL_STOP_STOPPED)) { +- WRITE_ONCE(child->__state, TASK_STOPPED); +- } +- spin_unlock_irq(&child->sighand->siglock); +- } +- read_unlock(&tasklist_lock); +-} +- +-/* + * Write f32-f127 back to task->thread.fph if it has been modified. + */ + inline void +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -1285,10 +1285,6 @@ int ptrace_request(struct task_struct *c + return ret; + } + +-#ifndef arch_ptrace_attach +-#define arch_ptrace_attach(child) do { } while (0) +-#endif +- + SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, + unsigned long, data) + { +@@ -1297,8 +1293,6 @@ SYSCALL_DEFINE4(ptrace, long, request, l + + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); +- if (!ret) +- arch_ptrace_attach(current); + goto out; + } + +@@ -1310,12 +1304,6 @@ SYSCALL_DEFINE4(ptrace, long, request, l + + if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { + ret = ptrace_attach(child, request, addr, data); +- /* +- * Some architectures need to do book-keeping after +- * a ptrace attach. +- */ +- if (!ret) +- arch_ptrace_attach(child); + goto out_put_task_struct; + } + +@@ -1455,12 +1443,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_lo + + if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { + ret = ptrace_attach(child, request, addr, data); +- /* +- * Some architectures need to do book-keeping after +- * a ptrace attach. +- */ +- if (!ret) +- arch_ptrace_attach(child); + goto out_put_task_struct; + } + diff --git a/patches/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch b/patches/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch new file mode 100644 index 000000000000..1ffb6a73f3c9 --- /dev/null +++ b/patches/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch @@ -0,0 +1,39 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:39 -0500 +Subject: [PATCH 06/12] signal: Use lockdep_assert_held instead of + assert_spin_locked + +The distinction is that assert_spin_locked() checks if the lock is +held *by*anyone* whereas lockdep_assert_held() asserts the current +context holds the lock. Also, the check goes away if you build +without lockdep. + +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Link: https://lkml.kernel.org/r/Ympr/+PX4XgT/UKU@hirez.programming.kicks-ass.net +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-6-ebiederm@xmission.com +--- + kernel/signal.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -884,7 +884,7 @@ static int check_kill_permission(int sig + static void ptrace_trap_notify(struct task_struct *t) + { + WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); +- assert_spin_locked(&t->sighand->siglock); ++ lockdep_assert_held(&t->sighand->siglock); + + task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); + ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); +@@ -1079,7 +1079,7 @@ static int __send_signal_locked(int sig, + int override_rlimit; + int ret = 0, result; + +- assert_spin_locked(&t->sighand->siglock); ++ lockdep_assert_held(&t->sighand->siglock); + + result = TRACE_SIGNAL_IGNORED; + if (!prepare_signal(sig, t, force)) diff --git a/patches/0007-ptrace-Reimplement-PTRACE_KILL-by-always-sending-SIG.patch b/patches/0007-ptrace-Reimplement-PTRACE_KILL-by-always-sending-SIG.patch new file mode 100644 index 000000000000..6525db41415f --- /dev/null +++ b/patches/0007-ptrace-Reimplement-PTRACE_KILL-by-always-sending-SIG.patch @@ -0,0 +1,65 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:40 -0500 +Subject: [PATCH 07/12] ptrace: Reimplement PTRACE_KILL by always sending + SIGKILL + +The current implementation of PTRACE_KILL is buggy and has been for +many years as it assumes it's target has stopped in ptrace_stop. At a +quick skim it looks like this assumption has existed since ptrace +support was added in linux v1.0. + +While PTRACE_KILL has been deprecated we can not remove it as +a quick search with google code search reveals many existing +programs calling it. + +When the ptracee is not stopped at ptrace_stop some fields would be +set that are ignored except in ptrace_stop. Making the userspace +visible behavior of PTRACE_KILL a noop in those case. + +As the usual rules are not obeyed it is not clear what the +consequences are of calling PTRACE_KILL on a running process. +Presumably userspace does not do this as it achieves nothing. + +Replace the implementation of PTRACE_KILL with a simple +send_sig_info(SIGKILL) followed by a return 0. This changes the +observable user space behavior only in that PTRACE_KILL on a process +not stopped in ptrace_stop will also kill it. As that has always +been the intent of the code this seems like a reasonable change. + +Cc: stable@vger.kernel.org +Reported-by: Al Viro <viro@zeniv.linux.org.uk> +Suggested-by: Al Viro <viro@zeniv.linux.org.uk> +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-7-ebiederm@xmission.com +--- + arch/x86/kernel/step.c | 3 +-- + kernel/ptrace.c | 5 ++--- + 2 files changed, 3 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/step.c ++++ b/arch/x86/kernel/step.c +@@ -180,8 +180,7 @@ void set_task_blockstep(struct task_stru + * + * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if + * task is current or it can't be running, otherwise we can race +- * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but +- * PTRACE_KILL is not safe. ++ * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -1236,9 +1236,8 @@ int ptrace_request(struct task_struct *c + return ptrace_resume(child, request, data); + + case PTRACE_KILL: +- if (child->exit_state) /* already dead */ +- return 0; +- return ptrace_resume(child, request, SIGKILL); ++ send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); ++ return 0; + + #ifdef CONFIG_HAVE_ARCH_TRACEHOOK + case PTRACE_GETREGSET: diff --git a/patches/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch b/patches/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch new file mode 100644 index 000000000000..694933e0faac --- /dev/null +++ b/patches/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch @@ -0,0 +1,65 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:41 -0500 +Subject: [PATCH 08/12] ptrace: Document that wait_task_inactive can't fail + +After ptrace_freeze_traced succeeds it is known that the the tracee +has a __state value of __TASK_TRACED and that no __ptrace_unlink will +happen because the tracer is waiting for the tracee, and the tracee is +in ptrace_stop. + +The function ptrace_freeze_traced can succeed at any point after +ptrace_stop has set TASK_TRACED and dropped siglock. The read_lock on +tasklist_lock only excludes ptrace_attach. + +This means that the !current->ptrace which executes under a read_lock +of tasklist_lock will never see a ptrace_freeze_trace as the tracer +must have gone away before the tasklist_lock was taken and +ptrace_attach can not occur until the read_lock is dropped. As +ptrace_freeze_traced depends upon ptrace_attach running before it can +run that excludes ptrace_freeze_traced until __state is set to +TASK_RUNNING. This means that task_is_traced will fail in +ptrace_freeze_attach and ptrace_freeze_attached will fail. + +On the current->ptrace branch of ptrace_stop which will be reached any +time after ptrace_freeze_traced has succeed it is known that __state +is __TASK_TRACED and schedule() will be called with that state. + +Use a WARN_ON_ONCE to document that wait_task_inactive(TASK_TRACED) +should never fail. Remove the stale comment about may_ptrace_stop. + +Strictly speaking this is not true because if PREEMPT_RT is enabled +wait_task_inactive can fail because __state can be changed. I don't +see this as a problem as the ptrace code is currently broken on +PREMPT_RT, and this is one of the issues. Failing and warning when +the assumptions of the code are broken is good. + +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-8-ebiederm@xmission.com +--- + kernel/ptrace.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -266,17 +266,9 @@ static int ptrace_check_attach(struct ta + } + read_unlock(&tasklist_lock); + +- if (!ret && !ignore_state) { +- if (!wait_task_inactive(child, __TASK_TRACED)) { +- /* +- * This can only happen if may_ptrace_stop() fails and +- * ptrace_stop() changes ->state back to TASK_RUNNING, +- * so we should not worry about leaking __TASK_TRACED. +- */ +- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); +- ret = -ESRCH; +- } +- } ++ if (!ret && !ignore_state && ++ WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED))) ++ ret = -ESRCH; + + return ret; + } diff --git a/patches/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch b/patches/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch new file mode 100644 index 000000000000..0d5055d2a5cb --- /dev/null +++ b/patches/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch @@ -0,0 +1,197 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:42 -0500 +Subject: [PATCH 09/12] ptrace: Admit ptrace_stop can generate spuriuos + SIGTRAPs + +Long ago and far away there was a BUG_ON at the start of ptrace_stop +that did "BUG_ON(!(current->ptrace & PT_PTRACED));" [1]. The BUG_ON +had never triggered but examination of the code showed that the BUG_ON +could actually trigger. To complement removing the BUG_ON an attempt +to better handle the race was added. + +The code detected the tracer had gone away and did not call +do_notify_parent_cldstop. The code also attempted to prevent +ptrace_report_syscall from sending spurious SIGTRAPs when the tracer +went away. + +The code to detect when the tracer had gone away before sending a +signal to tracer was a legitimate fix and continues to work to this +date. + +The code to prevent sending spurious SIGTRAPs is a failure. At the +time and until today the code only catches it when the tracer goes +away after siglock is dropped and before read_lock is acquired. If +the tracer goes away after read_lock is dropped a spurious SIGTRAP can +still be sent to the tracee. The tracer going away after read_lock +is dropped is the far likelier case as it is the bigger window. + +Given that the attempt to prevent the generation of a SIGTRAP was a +failure and continues to be a failure remove the code that attempts to +do that. This simplifies the code in ptrace_stop and makes +ptrace_stop much easier to reason about. + +To successfully deal with the tracer going away, all of the tracer's +instrumentation of the child would need to be removed, and reliably +detecting when the tracer has set a signal to continue with would need +to be implemented. + +[1] 66519f549ae5 ("[PATCH] fix ptracer death race yielding bogus BUG_ON") + +History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-9-ebiederm@xmission.com +--- + kernel/signal.c | 92 +++++++++++++++++++++++--------------------------------- + 1 file changed, 38 insertions(+), 54 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2187,13 +2187,12 @@ static void do_notify_parent_cldstop(str + * with. If the code did not stop because the tracer is gone, + * the stop signal remains unchanged unless clear_code. + */ +-static int ptrace_stop(int exit_code, int why, int clear_code, +- unsigned long message, kernel_siginfo_t *info) ++static int ptrace_stop(int exit_code, int why, unsigned long message, ++ kernel_siginfo_t *info) + __releases(¤t->sighand->siglock) + __acquires(¤t->sighand->siglock) + { + bool gstop_done = false; +- bool read_code = true; + + if (arch_ptrace_stop_needed()) { + /* +@@ -2212,7 +2211,14 @@ static int ptrace_stop(int exit_code, in + /* + * schedule() will not sleep if there is a pending signal that + * can awaken the task. ++ * ++ * After this point ptrace_signal_wake_up will clear TASK_TRACED ++ * if ptrace_unlink happens. Handle previous ptrace_unlinks ++ * here to prevent ptrace_stop sleeping in schedule. + */ ++ if (!current->ptrace) ++ return exit_code; ++ + set_special_state(TASK_TRACED); + + /* +@@ -2259,54 +2265,33 @@ static int ptrace_stop(int exit_code, in + + spin_unlock_irq(¤t->sighand->siglock); + read_lock(&tasklist_lock); +- if (likely(current->ptrace)) { +- /* +- * Notify parents of the stop. +- * +- * While ptraced, there are two parents - the ptracer and +- * the real_parent of the group_leader. The ptracer should +- * know about every stop while the real parent is only +- * interested in the completion of group stop. The states +- * for the two don't interact with each other. Notify +- * separately unless they're gonna be duplicates. +- */ ++ /* ++ * Notify parents of the stop. ++ * ++ * While ptraced, there are two parents - the ptracer and ++ * the real_parent of the group_leader. The ptracer should ++ * know about every stop while the real parent is only ++ * interested in the completion of group stop. The states ++ * for the two don't interact with each other. Notify ++ * separately unless they're gonna be duplicates. ++ */ ++ if (current->ptrace) + do_notify_parent_cldstop(current, true, why); +- if (gstop_done && ptrace_reparented(current)) +- do_notify_parent_cldstop(current, false, why); +- +- /* +- * Don't want to allow preemption here, because +- * sys_ptrace() needs this task to be inactive. +- * +- * XXX: implement read_unlock_no_resched(). +- */ +- preempt_disable(); +- read_unlock(&tasklist_lock); +- cgroup_enter_frozen(); +- preempt_enable_no_resched(); +- freezable_schedule(); +- cgroup_leave_frozen(true); +- } else { +- /* +- * By the time we got the lock, our tracer went away. +- * Don't drop the lock yet, another tracer may come. +- * +- * If @gstop_done, the ptracer went away between group stop +- * completion and here. During detach, it would have set +- * JOBCTL_STOP_PENDING on us and we'll re-enter +- * TASK_STOPPED in do_signal_stop() on return, so notifying +- * the real parent of the group stop completion is enough. +- */ +- if (gstop_done) +- do_notify_parent_cldstop(current, false, why); ++ if (gstop_done && (!current->ptrace || ptrace_reparented(current))) ++ do_notify_parent_cldstop(current, false, why); + +- /* tasklist protects us from ptrace_freeze_traced() */ +- __set_current_state(TASK_RUNNING); +- read_code = false; +- if (clear_code) +- exit_code = 0; +- read_unlock(&tasklist_lock); +- } ++ /* ++ * Don't want to allow preemption here, because ++ * sys_ptrace() needs this task to be inactive. ++ * ++ * XXX: implement read_unlock_no_resched(). ++ */ ++ preempt_disable(); ++ read_unlock(&tasklist_lock); ++ cgroup_enter_frozen(); ++ preempt_enable_no_resched(); ++ freezable_schedule(); ++ cgroup_leave_frozen(true); + + /* + * We are back. Now reacquire the siglock before touching +@@ -2314,8 +2299,7 @@ static int ptrace_stop(int exit_code, in + * any signal-sending on another CPU that wants to examine it. + */ + spin_lock_irq(¤t->sighand->siglock); +- if (read_code) +- exit_code = current->exit_code; ++ exit_code = current->exit_code; + current->last_siginfo = NULL; + current->ptrace_message = 0; + current->exit_code = 0; +@@ -2343,7 +2327,7 @@ static int ptrace_do_notify(int signr, i + info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + + /* Let the debugger run. */ +- return ptrace_stop(exit_code, why, 1, message, &info); ++ return ptrace_stop(exit_code, why, message, &info); + } + + int ptrace_notify(int exit_code, unsigned long message) +@@ -2515,7 +2499,7 @@ static void do_jobctl_trap(void) + CLD_STOPPED, 0); + } else { + WARN_ON_ONCE(!signr); +- ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); ++ ptrace_stop(signr, CLD_STOPPED, 0, NULL); + } + } + +@@ -2568,7 +2552,7 @@ static int ptrace_signal(int signr, kern + * comment in dequeue_signal(). + */ + current->jobctl |= JOBCTL_STOP_DEQUEUED; +- signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); ++ signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); + + /* We're back. Did the debugger cancel the sig? */ + if (signr == 0) diff --git a/patches/0010-ptrace-Don-t-change-__state.patch b/patches/0010-ptrace-Don-t-change-__state.patch new file mode 100644 index 000000000000..19123078a935 --- /dev/null +++ b/patches/0010-ptrace-Don-t-change-__state.patch @@ -0,0 +1,182 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:43 -0500 +Subject: [PATCH 10/12] ptrace: Don't change __state + +Stop playing with tsk->__state to remove TASK_WAKEKILL while a ptrace +command is executing. + +Instead remove TASK_WAKEKILL from the definition of TASK_TRACED, and +implement a new jobctl flag TASK_PTRACE_FROZEN. This new flag is set +in jobctl_freeze_task and cleared when ptrace_stop is awoken or in +jobctl_unfreeze_task (when ptrace_stop remains asleep). + +In signal_wake_up add __TASK_TRACED to state along with TASK_WAKEKILL +when the wake up is for a fatal signal. Skip adding __TASK_TRACED +when TASK_PTRACE_FROZEN is not set. This has the same effect as +changing TASK_TRACED to __TASK_TRACED as all of the wake_ups that use +TASK_KILLABLE go through signal_wake_up. + +Handle a ptrace_stop being called with a pending fatal signal. +Previously it would have been handled by schedule simply failing to +sleep. As TASK_WAKEKILL is no longer part of TASK_TRACED schedule +will sleep with a fatal_signal_pending. The code in signal_wake_up +guarantees that the code will be awaked by any fatal signal that +codes after TASK_TRACED is set. + +Previously the __state value of __TASK_TRACED was changed to +TASK_RUNNING when woken up or back to TASK_TRACED when the code was +left in ptrace_stop. Now when woken up ptrace_stop now clears +JOBCTL_PTRACE_FROZEN and when left sleeping ptrace_unfreezed_traced +clears JOBCTL_PTRACE_FROZEN. + +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-10-ebiederm@xmission.com +--- + include/linux/sched.h | 2 +- + include/linux/sched/jobctl.h | 2 ++ + include/linux/sched/signal.h | 5 +++-- + kernel/ptrace.c | 21 ++++++++------------- + kernel/sched/core.c | 5 +---- + kernel/signal.c | 14 ++++++-------- + 6 files changed, 21 insertions(+), 28 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -103,7 +103,7 @@ struct task_group; + /* Convenience macros for the sake of set_current_state: */ + #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) + #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +-#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) ++#define TASK_TRACED __TASK_TRACED + + #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +--- a/include/linux/sched/jobctl.h ++++ b/include/linux/sched/jobctl.h +@@ -19,6 +19,7 @@ struct task_struct; + #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ + #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ ++#define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ + + #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) + #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +@@ -28,6 +29,7 @@ struct task_struct; + #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) + #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) + #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) ++#define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) + + #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) + #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -435,9 +435,10 @@ extern void calculate_sigpending(void); + + extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +-static inline void signal_wake_up(struct task_struct *t, bool resume) ++static inline void signal_wake_up(struct task_struct *t, bool fatal) + { +- signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); ++ fatal = fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN); ++ signal_wake_up_state(t, fatal ? TASK_WAKEKILL | __TASK_TRACED : 0); + } + static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) + { +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -197,7 +197,7 @@ static bool ptrace_freeze_traced(struct + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { +- WRITE_ONCE(task->__state, __TASK_TRACED); ++ task->jobctl |= JOBCTL_PTRACE_FROZEN; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); +@@ -207,23 +207,19 @@ static bool ptrace_freeze_traced(struct + + static void ptrace_unfreeze_traced(struct task_struct *task) + { +- if (READ_ONCE(task->__state) != __TASK_TRACED) +- return; +- +- WARN_ON(!task->ptrace || task->parent != current); ++ unsigned long flags; + + /* +- * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. +- * Recheck state under the lock to close this race. ++ * The child may be awake and may have cleared ++ * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will ++ * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. + */ +- spin_lock_irq(&task->sighand->siglock); +- if (READ_ONCE(task->__state) == __TASK_TRACED) { ++ if (lock_task_sighand(task, &flags)) { ++ task->jobctl &= ~JOBCTL_PTRACE_FROZEN; + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); +- else +- WRITE_ONCE(task->__state, TASK_TRACED); ++ unlock_task_sighand(task, &flags); + } +- spin_unlock_irq(&task->sighand->siglock); + } + + /** +@@ -256,7 +252,6 @@ static int ptrace_check_attach(struct ta + */ + read_lock(&tasklist_lock); + if (child->ptrace && child->parent == current) { +- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); + /* + * child->sighand can't be NULL, release_task() + * does ptrace_unlink() before __exit_signal(). +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6313,10 +6313,7 @@ static void __sched notrace __schedule(u + + /* + * We must load prev->state once (task_struct::state is volatile), such +- * that: +- * +- * - we form a control dependency vs deactivate_task() below. +- * - ptrace_{,un}freeze_traced() can change ->state underneath us. ++ * that we form a control dependency vs deactivate_task() below. + */ + prev_state = READ_ONCE(prev->__state); + if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2209,14 +2209,12 @@ static int ptrace_stop(int exit_code, in + } + + /* +- * schedule() will not sleep if there is a pending signal that +- * can awaken the task. +- * +- * After this point ptrace_signal_wake_up will clear TASK_TRACED +- * if ptrace_unlink happens. Handle previous ptrace_unlinks +- * here to prevent ptrace_stop sleeping in schedule. ++ * After this point ptrace_signal_wake_up or signal_wake_up ++ * will clear TASK_TRACED if ptrace_unlink happens or a fatal ++ * signal comes in. Handle previous ptrace_unlinks and fatal ++ * signals here to prevent ptrace_stop sleeping in schedule. + */ +- if (!current->ptrace) ++ if (!current->ptrace || __fatal_signal_pending(current)) + return exit_code; + + set_special_state(TASK_TRACED); +@@ -2305,7 +2303,7 @@ static int ptrace_stop(int exit_code, in + current->exit_code = 0; + + /* LISTENING can be set only during STOP traps, clear it */ +- current->jobctl &= ~JOBCTL_LISTENING; ++ current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); + + /* + * Queued signals ignored us while we were stopped for tracing. diff --git a/patches/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch b/patches/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch new file mode 100644 index 000000000000..3d575e50489d --- /dev/null +++ b/patches/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch @@ -0,0 +1,46 @@ +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Thu, 5 May 2022 13:26:44 -0500 +Subject: [PATCH 11/12] ptrace: Always take siglock in ptrace_resume + +Make code analysis simpler and future changes easier by +always taking siglock in ptrace_resume. + +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-11-ebiederm@xmission.com +--- + kernel/ptrace.c | 13 ++----------- + 1 file changed, 2 insertions(+), 11 deletions(-) + +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -837,8 +837,6 @@ static long ptrace_get_rseq_configuratio + static int ptrace_resume(struct task_struct *child, long request, + unsigned long data) + { +- bool need_siglock; +- + if (!valid_signal(data)) + return -EIO; + +@@ -874,18 +872,11 @@ static int ptrace_resume(struct task_str + * Note that we need siglock even if ->exit_code == data and/or this + * status was not reported yet, the new status must not be cleared by + * wait_task_stopped() after resume. +- * +- * If data == 0 we do not care if wait_task_stopped() reports the old +- * status and clears the code too; this can't race with the tracee, it +- * takes siglock after resume. + */ +- need_siglock = data && !thread_group_empty(current); +- if (need_siglock) +- spin_lock_irq(&child->sighand->siglock); ++ spin_lock_irq(&child->sighand->siglock); + child->exit_code = data; + wake_up_state(child, __TASK_TRACED); +- if (need_siglock) +- spin_unlock_irq(&child->sighand->siglock); ++ spin_unlock_irq(&child->sighand->siglock); + + return 0; + } diff --git a/patches/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch b/patches/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch new file mode 100644 index 000000000000..94d2a619af0f --- /dev/null +++ b/patches/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch @@ -0,0 +1,212 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 5 May 2022 13:26:45 -0500 +Subject: [PATCH 12/12] sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED + state + +Currently ptrace_stop() / do_signal_stop() rely on the special states +TASK_TRACED and TASK_STOPPED resp. to keep unique state. That is, this +state exists only in task->__state and nowhere else. + +There's two spots of bother with this: + + - PREEMPT_RT has task->saved_state which complicates matters, + meaning task_is_{traced,stopped}() needs to check an additional + variable. + + - An alternative freezer implementation that itself relies on a + special TASK state would loose TASK_TRACED/TASK_STOPPED and will + result in misbehaviour. + +As such, add additional state to task->jobctl to track this state +outside of task->__state. + +NOTE: this doesn't actually fix anything yet, just adds extra state. + +--EWB + * didn't add a unnecessary newline in signal.h + * Update t->jobctl in signal_wake_up and ptrace_signal_wake_up + instead of in signal_wake_up_state. This prevents the clearing + of TASK_STOPPED and TASK_TRACED from getting lost. + * Added warnings if JOBCTL_STOPPED or JOBCTL_TRACED are not cleared + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Link: https://lkml.kernel.org/r/20220421150654.757693825@infradead.org +Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220505182645.497868-12-ebiederm@xmission.com +--- + include/linux/sched.h | 8 +++----- + include/linux/sched/jobctl.h | 6 ++++++ + include/linux/sched/signal.h | 19 +++++++++++++++---- + kernel/ptrace.c | 16 +++++++++++++--- + kernel/signal.c | 10 ++++++++-- + 5 files changed, 45 insertions(+), 14 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -118,11 +118,9 @@ struct task_group; + + #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) + +-#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) +- +-#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) +- +-#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) ++#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) ++#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) ++#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) + + /* + * Special states are those that do not use the normal wait-loop pattern. See +--- a/include/linux/sched/jobctl.h ++++ b/include/linux/sched/jobctl.h +@@ -21,6 +21,9 @@ struct task_struct; + #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ + #define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ + ++#define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */ ++#define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */ ++ + #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) + #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) + #define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +@@ -31,6 +34,9 @@ struct task_struct; + #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) + #define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) + ++#define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT) ++#define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT) ++ + #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) + #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) + +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -294,8 +294,10 @@ static inline int kernel_dequeue_signal( + static inline void kernel_signal_stop(void) + { + spin_lock_irq(¤t->sighand->siglock); +- if (current->jobctl & JOBCTL_STOP_DEQUEUED) ++ if (current->jobctl & JOBCTL_STOP_DEQUEUED) { ++ current->jobctl |= JOBCTL_STOPPED; + set_special_state(TASK_STOPPED); ++ } + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +@@ -437,12 +439,21 @@ extern void signal_wake_up_state(struct + + static inline void signal_wake_up(struct task_struct *t, bool fatal) + { +- fatal = fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN); +- signal_wake_up_state(t, fatal ? TASK_WAKEKILL | __TASK_TRACED : 0); ++ unsigned int state = 0; ++ if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { ++ t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); ++ state = TASK_WAKEKILL | __TASK_TRACED; ++ } ++ signal_wake_up_state(t, state); + } + static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) + { +- signal_wake_up_state(t, resume ? __TASK_TRACED : 0); ++ unsigned int state = 0; ++ if (resume) { ++ t->jobctl &= ~JOBCTL_TRACED; ++ state = __TASK_TRACED; ++ } ++ signal_wake_up_state(t, state); + } + + void task_join_group_stop(struct task_struct *task); +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(st + return true; + } + +-/* Ensure that nothing can wake it up, even SIGKILL */ ++/* ++ * Ensure that nothing can wake it up, even SIGKILL ++ * ++ * A task is switched to this state while a ptrace operation is in progress; ++ * such that the ptrace operation is uninterruptible. ++ */ + static bool ptrace_freeze_traced(struct task_struct *task) + { + bool ret = false; +@@ -216,8 +221,10 @@ static void ptrace_unfreeze_traced(struc + */ + if (lock_task_sighand(task, &flags)) { + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; +- if (__fatal_signal_pending(task)) ++ if (__fatal_signal_pending(task)) { ++ task->jobctl &= ~TASK_TRACED; + wake_up_state(task, __TASK_TRACED); ++ } + unlock_task_sighand(task, &flags); + } + } +@@ -462,8 +469,10 @@ static int ptrace_attach(struct task_str + * in and out of STOPPED are protected by siglock. + */ + if (task_is_stopped(task) && +- task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) ++ task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { ++ task->jobctl &= ~JOBCTL_STOPPED; + signal_wake_up_state(task, __TASK_STOPPED); ++ } + + spin_unlock(&task->sighand->siglock); + +@@ -875,6 +884,7 @@ static int ptrace_resume(struct task_str + */ + spin_lock_irq(&child->sighand->siglock); + child->exit_code = data; ++ child->jobctl &= ~JOBCTL_TRACED; + wake_up_state(child, __TASK_TRACED); + spin_unlock_irq(&child->sighand->siglock); + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -762,7 +762,10 @@ static int dequeue_synchronous_signal(ke + */ + void signal_wake_up_state(struct task_struct *t, unsigned int state) + { ++ lockdep_assert_held(&t->sighand->siglock); ++ + set_tsk_thread_flag(t, TIF_SIGPENDING); ++ + /* + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable + * case. We don't check t->state here because there is a race with it +@@ -930,9 +933,10 @@ static bool prepare_signal(int sig, stru + for_each_thread(p, t) { + flush_sigqueue_mask(&flush, &t->pending); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); +- if (likely(!(t->ptrace & PT_SEIZED))) ++ if (likely(!(t->ptrace & PT_SEIZED))) { ++ t->jobctl &= ~JOBCTL_STOPPED; + wake_up_state(t, __TASK_STOPPED); +- else ++ } else + ptrace_trap_notify(t); + } + +@@ -2218,6 +2222,7 @@ static int ptrace_stop(int exit_code, in + return exit_code; + + set_special_state(TASK_TRACED); ++ current->jobctl |= JOBCTL_TRACED; + + /* + * We're committing to trapping. TRACED should be visible before +@@ -2436,6 +2441,7 @@ static bool do_signal_stop(int signr) + if (task_participate_group_stop(current)) + notify = CLD_STOPPED; + ++ current->jobctl |= JOBCTL_STOPPED; + set_special_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch index 41fc0b58e69e..f2d35e0c0528 100644 --- a/patches/Add_localversion_for_-RT_release.patch +++ b/patches/Add_localversion_for_-RT_release.patch @@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt4 ++-rt5 diff --git a/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch b/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch deleted file mode 100644 index 846c95920b28..000000000000 --- a/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch +++ /dev/null @@ -1,56 +0,0 @@ -Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable() -From: Josh Cartwright <joshc@ni.com> -Date: Thu Feb 11 11:54:01 2016 -0600 - -From: Josh Cartwright <joshc@ni.com> - -kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating -the vgic and timer states to prevent the calling task from migrating to -another CPU. It does so to prevent the task from writing to the -incorrect per-CPU GIC distributor registers. - -On -rt kernels, it's possible to maintain the same guarantee with the -use of migrate_{disable,enable}(), with the added benefit that the -migrate-disabled region is preemptible. Update -kvm_arch_vcpu_ioctl_run() to do so. - -Cc: Christoffer Dall <christoffer.dall@linaro.org> -Reported-by: Manish Jaggi <Manish.Jaggi@caviumnetworks.com> -Signed-off-by: Josh Cartwright <joshc@ni.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - arch/arm64/kvm/arm.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) ---- ---- a/arch/arm64/kvm/arm.c -+++ b/arch/arm64/kvm/arm.c -@@ -798,7 +798,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - * involves poking the GIC, which must be done in a - * non-preemptible context. - */ -- preempt_disable(); -+ migrate_disable(); - - /* - * The VMID allocator only tracks active VMIDs per -@@ -831,7 +831,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - kvm_timer_sync_user(vcpu); - kvm_vgic_sync_hwstate(vcpu); - local_irq_enable(); -- preempt_enable(); -+ migrate_enable(); - continue; - } - -@@ -903,7 +903,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); - -- preempt_enable(); -+ migrate_enable(); - - /* - * The ARMv8 architecture doesn't give the hypervisor diff --git a/patches/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch b/patches/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch new file mode 100644 index 000000000000..32e08a35e22e --- /dev/null +++ b/patches/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch @@ -0,0 +1,55 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 4 May 2022 19:24:10 +0200 +Subject: [PATCH] SUNRPC: Don't disable preemption while calling + svc_pool_for_cpu(). + +svc_xprt_enqueue() disables preemption via get_cpu() and then asks for a +pool of a specific CPU (current) via svc_pool_for_cpu(). +With disabled preemption it acquires svc_pool::sp_lock, a spinlock_t, +which is a sleeping lock on PREEMPT_RT and can't be acquired with +disabled preemption. + +Disabling preemption is not required here. The pool is protected with a +lock so the following list access is safe even cross-CPU. The following +iteration through svc_pool::sp_all_threads is under RCU-readlock and +remaining operations within the loop are atomic and do not rely on +disabled-preemption. + +Use raw_smp_processor_id() as the argument for the requested CPU in +svc_pool_for_cpu(). + +Reported-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/YnK2ujabd2+oCrT/@linutronix.de +--- + net/sunrpc/svc_xprt.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/sunrpc/svc_xprt.c ++++ b/net/sunrpc/svc_xprt.c +@@ -448,7 +448,6 @@ void svc_xprt_enqueue(struct svc_xprt *x + { + struct svc_pool *pool; + struct svc_rqst *rqstp = NULL; +- int cpu; + + if (!svc_xprt_ready(xprt)) + return; +@@ -461,8 +460,7 @@ void svc_xprt_enqueue(struct svc_xprt *x + if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + +- cpu = get_cpu(); +- pool = svc_pool_for_cpu(xprt->xpt_server, cpu); ++ pool = svc_pool_for_cpu(xprt->xpt_server, raw_smp_processor_id()); + + atomic_long_inc(&pool->sp_stats.packets); + +@@ -485,7 +483,6 @@ void svc_xprt_enqueue(struct svc_xprt *x + rqstp = NULL; + out_unlock: + rcu_read_unlock(); +- put_cpu(); + trace_svc_xprt_enqueue(xprt, rqstp); + } + EXPORT_SYMBOL_GPL(svc_xprt_enqueue); diff --git a/patches/arch_arm64__Add_lazy_preempt_support.patch b/patches/arch_arm64__Add_lazy_preempt_support.patch index 5cb73f4f6bc0..3339620a018a 100644 --- a/patches/arch_arm64__Add_lazy_preempt_support.patch +++ b/patches/arch_arm64__Add_lazy_preempt_support.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -196,6 +196,7 @@ config ARM64 +@@ -194,6 +194,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/patches/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch b/patches/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch new file mode 100644 index 000000000000..0c19a3d95631 --- /dev/null +++ b/patches/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch @@ -0,0 +1,47 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 5 May 2022 19:21:47 +0200 +Subject: [PATCH] blk-mq: Don't disable preemption around + __blk_mq_run_hw_queue(). + +__blk_mq_delay_run_hw_queue() disables preemption to get a stable +current CPU number and then invokes __blk_mq_run_hw_queue() if the CPU +number is part the mask. + +__blk_mq_run_hw_queue() acquires a spin_lock_t which is a sleeping lock +on PREEMPT_RT and can't be acquired with disabled preemption. + +If it is important that the current CPU matches the requested CPU mask +and that the context does not migrate to another CPU while +__blk_mq_run_hw_queue() is invoked then it possible to achieve this by +disabling migration and keeping the context preemptible. + +Disable only migration while testing the CPU mask and invoking +__blk_mq_run_hw_queue(). + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/YnQHqx/5+54jd+U+@linutronix.de +--- + block/blk-mq.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2046,14 +2046,14 @@ static void __blk_mq_delay_run_hw_queue( + return; + + if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { +- int cpu = get_cpu(); +- if (cpumask_test_cpu(cpu, hctx->cpumask)) { ++ migrate_disable(); ++ if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { + __blk_mq_run_hw_queue(hctx); +- put_cpu(); ++ migrate_enable(); + return; + } + +- put_cpu(); ++ migrate_enable(); + } + + kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, diff --git a/patches/block_mq__do_not_invoke_preempt_disable.patch b/patches/block_mq__do_not_invoke_preempt_disable.patch deleted file mode 100644 index dae42f07f446..000000000000 --- a/patches/block_mq__do_not_invoke_preempt_disable.patch +++ /dev/null @@ -1,38 +0,0 @@ -Subject: block/mq: do not invoke preempt_disable() -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue Jul 14 14:26:34 2015 +0200 - -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -preempt_disable() and get_cpu() don't play well together with the sleeping -locks it tries to allocate later. -It seems to be enough to replace it with get_cpu_light() and migrate_disable(). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - block/blk-mq.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) ---- ---- a/block/blk-mq.c -+++ b/block/blk-mq.c -@@ -2053,14 +2053,14 @@ static void __blk_mq_delay_run_hw_queue( - return; - - if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { -- int cpu = get_cpu(); -+ int cpu = get_cpu_light(); - if (cpumask_test_cpu(cpu, hctx->cpumask)) { - __blk_mq_run_hw_queue(hctx); -- put_cpu(); -+ put_cpu_light(); - return; - } - -- put_cpu(); -+ put_cpu_light(); - } - - kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, diff --git a/patches/crypto-cryptd-Protect-per-CPU-resource-by-disabling-.patch b/patches/crypto-cryptd-Protect-per-CPU-resource-by-disabling-.patch new file mode 100644 index 000000000000..cb8e8d588157 --- /dev/null +++ b/patches/crypto-cryptd-Protect-per-CPU-resource-by-disabling-.patch @@ -0,0 +1,89 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 3 May 2022 08:30:01 +0200 +Subject: [PATCH] crypto: cryptd - Protect per-CPU resource by disabling BH. + +The access to cryptd_queue::cpu_queue is synchronized by disabling +preemption in cryptd_enqueue_request() and disabling BH in +cryptd_queue_worker(). This implies that access is allowed from BH. + +If cryptd_enqueue_request() is invoked from preemptible context _and_ +soft interrupt then this can lead to list corruption since +cryptd_enqueue_request() is not protected against access from +soft interrupt. + +Replace get_cpu() in cryptd_enqueue_request() with local_bh_disable() +to ensure BH is always disabled. +Remove preempt_disable() from cryptd_queue_worker() since it is not +needed because local_bh_disable() ensures synchronisation. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/YnKWuLQZdPwSdRTh@linutronix.de +--- + crypto/cryptd.c | 23 +++++++++++------------ + 1 file changed, 11 insertions(+), 12 deletions(-) + +--- a/crypto/cryptd.c ++++ b/crypto/cryptd.c +@@ -39,6 +39,10 @@ struct cryptd_cpu_queue { + }; + + struct cryptd_queue { ++ /* ++ * Protected by disabling BH to allow enqueueing from softinterrupt and ++ * dequeuing from kworker (cryptd_queue_worker()). ++ */ + struct cryptd_cpu_queue __percpu *cpu_queue; + }; + +@@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cry + static int cryptd_enqueue_request(struct cryptd_queue *queue, + struct crypto_async_request *request) + { +- int cpu, err; ++ int err; + struct cryptd_cpu_queue *cpu_queue; + refcount_t *refcnt; + +- cpu = get_cpu(); ++ local_bh_disable(); + cpu_queue = this_cpu_ptr(queue->cpu_queue); + err = crypto_enqueue_request(&cpu_queue->queue, request); + + refcnt = crypto_tfm_ctx(request->tfm); + + if (err == -ENOSPC) +- goto out_put_cpu; ++ goto out; + +- queue_work_on(cpu, cryptd_wq, &cpu_queue->work); ++ queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); + + if (!refcount_read(refcnt)) +- goto out_put_cpu; ++ goto out; + + refcount_inc(refcnt); + +-out_put_cpu: +- put_cpu(); ++out: ++ local_bh_enable(); + + return err; + } +@@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct w + cpu_queue = container_of(work, struct cryptd_cpu_queue, work); + /* + * Only handle one request at a time to avoid hogging crypto workqueue. +- * preempt_disable/enable is used to prevent being preempted by +- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent +- * cryptd_enqueue_request() being accessed from software interrupts. + */ + local_bh_disable(); +- preempt_disable(); + backlog = crypto_get_backlog(&cpu_queue->queue); + req = crypto_dequeue_request(&cpu_queue->queue); +- preempt_enable(); + local_bh_enable(); + + if (!req) diff --git a/patches/crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch b/patches/crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch deleted file mode 100644 index a54667381b6d..000000000000 --- a/patches/crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch +++ /dev/null @@ -1,82 +0,0 @@ -Subject: crypto: cryptd - add a lock instead preempt_disable/local_bh_disable -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu Jul 26 18:52:00 2018 +0200 - -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -cryptd has a per-CPU lock which protected with local_bh_disable() and -preempt_disable(). -Add an explicit spin_lock to make the locking context more obvious and -visible to lockdep. Since it is a per-CPU lock, there should be no lock -contention on the actual spinlock. -There is a small race-window where we could be migrated to another CPU -after the cpu_queue has been obtain. This is not a problem because the -actual ressource is protected by the spinlock. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - crypto/cryptd.c | 19 +++++++++---------- - 1 file changed, 9 insertions(+), 10 deletions(-) ---- ---- a/crypto/cryptd.c -+++ b/crypto/cryptd.c -@@ -36,6 +36,7 @@ static struct workqueue_struct *cryptd_w - struct cryptd_cpu_queue { - struct crypto_queue queue; - struct work_struct work; -+ spinlock_t qlock; - }; - - struct cryptd_queue { -@@ -105,6 +106,7 @@ static int cryptd_init_queue(struct cryp - cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); - crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); - INIT_WORK(&cpu_queue->work, cryptd_queue_worker); -+ spin_lock_init(&cpu_queue->qlock); - } - pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen); - return 0; -@@ -129,8 +131,10 @@ static int cryptd_enqueue_request(struct - struct cryptd_cpu_queue *cpu_queue; - refcount_t *refcnt; - -- cpu = get_cpu(); -- cpu_queue = this_cpu_ptr(queue->cpu_queue); -+ cpu_queue = raw_cpu_ptr(queue->cpu_queue); -+ spin_lock_bh(&cpu_queue->qlock); -+ cpu = smp_processor_id(); -+ - err = crypto_enqueue_request(&cpu_queue->queue, request); - - refcnt = crypto_tfm_ctx(request->tfm); -@@ -146,7 +150,7 @@ static int cryptd_enqueue_request(struct - refcount_inc(refcnt); - - out_put_cpu: -- put_cpu(); -+ spin_unlock_bh(&cpu_queue->qlock); - - return err; - } -@@ -162,16 +166,11 @@ static void cryptd_queue_worker(struct w - cpu_queue = container_of(work, struct cryptd_cpu_queue, work); - /* - * Only handle one request at a time to avoid hogging crypto workqueue. -- * preempt_disable/enable is used to prevent being preempted by -- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent -- * cryptd_enqueue_request() being accessed from software interrupts. - */ -- local_bh_disable(); -- preempt_disable(); -+ spin_lock_bh(&cpu_queue->qlock); - backlog = crypto_get_backlog(&cpu_queue->queue); - req = crypto_dequeue_request(&cpu_queue->queue); -- preempt_enable(); -- local_bh_enable(); -+ spin_unlock_bh(&cpu_queue->qlock); - - if (!req) - return; diff --git a/patches/kernel_sched__add_putget_cpu_light.patch b/patches/kernel_sched__add_putget_cpu_light.patch deleted file mode 100644 index 32d3717dfdec..000000000000 --- a/patches/kernel_sched__add_putget_cpu_light.patch +++ /dev/null @@ -1,26 +0,0 @@ -Subject: kernel/sched: add {put|get}_cpu_light() -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat May 27 19:02:06 2017 +0200 - -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - include/linux/smp.h | 3 +++ - 1 file changed, 3 insertions(+) ---- ---- a/include/linux/smp.h -+++ b/include/linux/smp.h -@@ -267,6 +267,9 @@ static inline int get_boot_cpu_id(void) - #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) - #define put_cpu() preempt_enable() - -+#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); }) -+#define put_cpu_light() migrate_enable() -+ - /* - * Callback to arch code if there's nosmp or maxcpus=0 on the - * boot command line: diff --git a/patches/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch b/patches/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch new file mode 100644 index 000000000000..66616f1b758b --- /dev/null +++ b/patches/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch @@ -0,0 +1,58 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 4 May 2022 19:03:25 +0200 +Subject: [PATCH] mm/vmalloc: Use raw_cpu_ptr() for vmap_block_queue access. + +The per-CPU resource vmap_block_queue is accessed via the +get_cpu_var(). That macro disables preemption and then loads the pointer +from the current CPU. +This doesn't work on PREEMPT_RT because a spinlock_t is later accessed +within the preempt-disable section. + +There is no need to disable preemption while accessing the per-CPU +struct vmap_block_queue because the list is protected with a spinlock_t. +The per-CPU struct is also accessed cross-CPU in +purge_fragmented_blocks(). +It is possible that by using raw_cpu_ptr() the code migrates to another +CPU and uses struct from another CPU. This is fine because the list +locked and the locked section is very short. + +Use raw_cpu_ptr() to access vmap_block_queue. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/YnKx3duAB53P7ojN@linutronix.de +--- + mm/vmalloc.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -1938,11 +1938,10 @@ static void *new_vmap_block(unsigned int + return ERR_PTR(err); + } + +- vbq = &get_cpu_var(vmap_block_queue); ++ vbq = raw_cpu_ptr(&vmap_block_queue); + spin_lock(&vbq->lock); + list_add_tail_rcu(&vb->free_list, &vbq->free); + spin_unlock(&vbq->lock); +- put_cpu_var(vmap_block_queue); + + return vaddr; + } +@@ -2021,7 +2020,7 @@ static void *vb_alloc(unsigned long size + order = get_order(size); + + rcu_read_lock(); +- vbq = &get_cpu_var(vmap_block_queue); ++ vbq = raw_cpu_ptr(&vmap_block_queue); + list_for_each_entry_rcu(vb, &vbq->free, free_list) { + unsigned long pages_off; + +@@ -2044,7 +2043,6 @@ static void *vb_alloc(unsigned long size + break; + } + +- put_cpu_var(vmap_block_queue); + rcu_read_unlock(); + + /* Allocate new block if nothing was found */ diff --git a/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch b/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch deleted file mode 100644 index 27f66c0a898d..000000000000 --- a/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch +++ /dev/null @@ -1,50 +0,0 @@ -Subject: mm/vmalloc: Another preempt disable region which sucks -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue Jul 12 11:39:36 2011 +0200 - -From: Thomas Gleixner <tglx@linutronix.de> - -Avoid the preempt disable version of get_cpu_var(). The inner-lock should -provide enough serialisation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - mm/vmalloc.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) ---- ---- a/mm/vmalloc.c -+++ b/mm/vmalloc.c -@@ -1938,11 +1938,12 @@ static void *new_vmap_block(unsigned int - return ERR_PTR(err); - } - -- vbq = &get_cpu_var(vmap_block_queue); -+ get_cpu_light(); -+ vbq = this_cpu_ptr(&vmap_block_queue); - spin_lock(&vbq->lock); - list_add_tail_rcu(&vb->free_list, &vbq->free); - spin_unlock(&vbq->lock); -- put_cpu_var(vmap_block_queue); -+ put_cpu_light(); - - return vaddr; - } -@@ -2021,7 +2022,8 @@ static void *vb_alloc(unsigned long size - order = get_order(size); - - rcu_read_lock(); -- vbq = &get_cpu_var(vmap_block_queue); -+ get_cpu_light(); -+ vbq = this_cpu_ptr(&vmap_block_queue); - list_for_each_entry_rcu(vb, &vbq->free, free_list) { - unsigned long pages_off; - -@@ -2044,7 +2046,7 @@ static void *vb_alloc(unsigned long size - break; - } - -- put_cpu_var(vmap_block_queue); -+ put_cpu_light(); - rcu_read_unlock(); - - /* Allocate new block if nothing was found */ diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch deleted file mode 100644 index 16f6dbca472b..000000000000 --- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch +++ /dev/null @@ -1,277 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 2 Mar 2022 22:04:23 +0100 -Subject: [PATCH] ptrace: fix ptrace vs tasklist_lock race on PREEMPT_RT. - -As explained by Alexander Fyodorov <halcy@yandex.ru>: - -|read_lock(&tasklist_lock) in ptrace_stop() is converted to sleeping -|lock on a PREEMPT_RT kernel, and it can remove __TASK_TRACED from -|task->__state (by moving it to task->saved_state). If parent does -|wait() on child followed by a sys_ptrace call, the following race can -|happen: -| -|- child sets __TASK_TRACED in ptrace_stop() -|- parent does wait() which eventually calls wait_task_stopped() and returns -| child's pid -|- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves -| __TASK_TRACED flag to saved_state -|- parent calls sys_ptrace, which calls ptrace_check_attach() and -| wait_task_inactive() - -The patch is based on his initial patch where an additional check is -added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is -acquired to have stable view on ->__state and ->saved_state. - -wait_task_inactive() needs to check both task states while waiting for the -expected task state. Should the expected task state be in ->saved_state then -the task is blocked on a sleeping lock. In this case wait_task_inactive() needs -to wait until the lock situtation has been resolved (the expected state is in -->__state). This ensures that the task is idle and does not wakeup as part of -lock resolving and races for instance with __switch_to_xtra() while the -debugger clears TIF_BLOCKSTEP() (noted by Oleg Nesterov). - -[ Fix for ptrace_unfreeze_traced() by Oleg Nesterov ] - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YkW55u6u2fo5QmV7@linutronix.de ---- - include/linux/sched.h | 128 ++++++++++++++++++++++++++++++++++++++++++++++++-- - kernel/ptrace.c | 25 +++++---- - kernel/sched/core.c | 11 +++- - 3 files changed, 146 insertions(+), 18 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -118,12 +118,8 @@ struct task_group; - - #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) - --#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) -- - #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) - --#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) -- - /* - * Special states are those that do not use the normal wait-loop pattern. See - * the comment with set_special_state(). -@@ -2027,6 +2023,130 @@ static inline int test_tsk_need_resched( - return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); - } - -+#ifdef CONFIG_PREEMPT_RT -+ -+static inline bool task_state_match_and(struct task_struct *tsk, long state) -+{ -+ unsigned long flags; -+ bool match = false; -+ -+ raw_spin_lock_irqsave(&tsk->pi_lock, flags); -+ if (READ_ONCE(tsk->__state) & state) -+ match = true; -+ else if (tsk->saved_state & state) -+ match = true; -+ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); -+ return match; -+} -+ -+static inline int __task_state_match_eq(struct task_struct *tsk, long state) -+{ -+ int match = 0; -+ -+ if (READ_ONCE(tsk->__state) == state) -+ match = 1; -+ else if (tsk->saved_state == state) -+ match = -1; -+ -+ return match; -+} -+ -+static inline int task_state_match_eq(struct task_struct *tsk, long state) -+{ -+ unsigned long flags; -+ int match; -+ -+ raw_spin_lock_irqsave(&tsk->pi_lock, flags); -+ match = __task_state_match_eq(tsk, state); -+ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); -+ return match; -+} -+ -+static inline bool task_state_match_and_set(struct task_struct *tsk, long state, -+ long new_state) -+{ -+ unsigned long flags; -+ bool match = false; -+ -+ raw_spin_lock_irqsave(&tsk->pi_lock, flags); -+ if (READ_ONCE(tsk->__state) & state) { -+ WRITE_ONCE(tsk->__state, new_state); -+ match = true; -+ } else if (tsk->saved_state & state) { -+ tsk->saved_state = new_state; -+ match = true; -+ } -+ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); -+ return match; -+} -+ -+static inline bool task_state_match_eq_set(struct task_struct *tsk, long state, -+ long new_state) -+{ -+ unsigned long flags; -+ bool match = false; -+ -+ raw_spin_lock_irqsave(&tsk->pi_lock, flags); -+ if (READ_ONCE(tsk->__state) == state) { -+ WRITE_ONCE(tsk->__state, new_state); -+ match = true; -+ } else if (tsk->saved_state == state) { -+ tsk->saved_state = new_state; -+ match = true; -+ } -+ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); -+ return match; -+} -+ -+#else -+ -+static inline bool task_state_match_and(struct task_struct *tsk, long state) -+{ -+ return READ_ONCE(tsk->__state) & state; -+} -+ -+static inline int __task_state_match_eq(struct task_struct *tsk, long state) -+{ -+ return READ_ONCE(tsk->__state) == state; -+} -+ -+static inline int task_state_match_eq(struct task_struct *tsk, long state) -+{ -+ return __task_state_match_eq(tsk, state); -+} -+ -+static inline bool task_state_match_and_set(struct task_struct *tsk, long state, -+ long new_state) -+{ -+ if (READ_ONCE(tsk->__state) & state) { -+ WRITE_ONCE(tsk->__state, new_state); -+ return true; -+ } -+ return false; -+} -+ -+static inline bool task_state_match_eq_set(struct task_struct *tsk, long state, -+ long new_state) -+{ -+ if (READ_ONCE(tsk->__state) == state) { -+ WRITE_ONCE(tsk->__state, new_state); -+ return true; -+ } -+ return false; -+} -+ -+#endif -+ -+static inline bool task_is_traced(struct task_struct *tsk) -+{ -+ return task_state_match_and(tsk, __TASK_TRACED); -+} -+ -+static inline bool task_is_stopped_or_traced(struct task_struct *tsk) -+{ -+ return task_state_match_and(tsk, __TASK_STOPPED | __TASK_TRACED); -+} -+ - /* - * cond_resched() and cond_resched_lock(): latency reduction via - * explicit rescheduling in places that are safe. The return ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -195,10 +195,10 @@ static bool ptrace_freeze_traced(struct - return ret; - - spin_lock_irq(&task->sighand->siglock); -- if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && -- !__fatal_signal_pending(task)) { -- WRITE_ONCE(task->__state, __TASK_TRACED); -- ret = true; -+ if (!looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { -+ -+ ret = task_state_match_and_set(task, __TASK_TRACED, -+ __TASK_TRACED); - } - spin_unlock_irq(&task->sighand->siglock); - -@@ -207,7 +207,10 @@ static bool ptrace_freeze_traced(struct - - static void ptrace_unfreeze_traced(struct task_struct *task) - { -- if (READ_ONCE(task->__state) != __TASK_TRACED) -+ bool frozen; -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && -+ READ_ONCE(task->__state) != __TASK_TRACED) - return; - - WARN_ON(!task->ptrace || task->parent != current); -@@ -217,12 +220,12 @@ static void ptrace_unfreeze_traced(struc - * Recheck state under the lock to close this race. - */ - spin_lock_irq(&task->sighand->siglock); -- if (READ_ONCE(task->__state) == __TASK_TRACED) { -- if (__fatal_signal_pending(task)) -- wake_up_state(task, __TASK_TRACED); -- else -- WRITE_ONCE(task->__state, TASK_TRACED); -- } -+ -+ frozen = task_state_match_eq_set(task, __TASK_TRACED, TASK_TRACED); -+ -+ if (frozen && __fatal_signal_pending(task)) -+ wake_up_state(task, __TASK_TRACED); -+ - spin_unlock_irq(&task->sighand->siglock); - } - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3287,6 +3287,8 @@ unsigned long wait_task_inactive(struct - struct rq *rq; - - for (;;) { -+ int match_type = 0; -+ - /* - * We do the initial early heuristics without holding - * any task-queue locks at all. We'll only try to get -@@ -3307,7 +3309,8 @@ unsigned long wait_task_inactive(struct - * is actually now running somewhere else! - */ - while (task_running(rq, p)) { -- if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) -+ if (match_state && -+ unlikely(!task_state_match_eq(p, match_state))) - return 0; - cpu_relax(); - } -@@ -3322,7 +3325,9 @@ unsigned long wait_task_inactive(struct - running = task_running(rq, p); - queued = task_on_rq_queued(p); - ncsw = 0; -- if (!match_state || READ_ONCE(p->__state) == match_state) -+ if (match_state) -+ match_type = __task_state_match_eq(p, match_state); -+ if (!match_state || match_type) - ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_rq_unlock(rq, p, &rf); - -@@ -3352,7 +3357,7 @@ unsigned long wait_task_inactive(struct - * running right now), it's preempted, and we should - * yield - it could be a while. - */ -- if (unlikely(queued)) { -+ if (unlikely(queued || match_type < 0)) { - ktime_t to = NSEC_PER_SEC / HZ; - - set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch b/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch new file mode 100644 index 000000000000..03b5fc03d962 --- /dev/null +++ b/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch @@ -0,0 +1,85 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 2 May 2022 13:58:03 +0200 +Subject: [PATCH] sched: Consider task_struct::saved_state in + wait_task_inactive(). + +Ptrace is using wait_task_inactive() to wait for the tracee to reach a +certain task state. On PREEMPT_RT that state may be stored in +task_struct::saved_state while the tracee blocks on a sleeping lock. + +In that case wait_task_inactive() should wait until the requested state +is in task_struct::__state and the task idle. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/sched/core.c | 38 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 34 insertions(+), 4 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3285,6 +3285,8 @@ unsigned long wait_task_inactive(struct + struct rq_flags rf; + unsigned long ncsw; + struct rq *rq; ++ bool saved_state_match; ++ bool update_ncsw; + + for (;;) { + /* +@@ -3307,8 +3309,22 @@ unsigned long wait_task_inactive(struct + * is actually now running somewhere else! + */ + while (task_running(rq, p)) { +- if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) +- return 0; ++ if (match_state) { ++ unsigned long flags; ++ bool missmatch = false; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++#ifdef CONFIG_PREEMPT_RT ++ if ((READ_ONCE(p->__state) != match_state) && ++ (READ_ONCE(p->saved_state) != match_state)) ++#else ++ if (READ_ONCE(p->__state) != match_state) ++#endif ++ missmatch = true; ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ if (missmatch) ++ return 0; ++ } + cpu_relax(); + } + +@@ -3322,7 +3338,21 @@ unsigned long wait_task_inactive(struct + running = task_running(rq, p); + queued = task_on_rq_queued(p); + ncsw = 0; +- if (!match_state || READ_ONCE(p->__state) == match_state) ++ update_ncsw = false; ++ saved_state_match = false; ++ ++ if (!match_state) { ++ update_ncsw = true; ++ } else if (READ_ONCE(p->__state) == match_state) { ++ update_ncsw = true; ++#ifdef CONFIG_PREEMPT_RT ++ } else if (READ_ONCE(p->saved_state) == match_state) { ++ update_ncsw = true; ++ saved_state_match = true; ++#endif ++ } ++ ++ if (update_ncsw) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_rq_unlock(rq, p, &rf); + +@@ -3352,7 +3382,7 @@ unsigned long wait_task_inactive(struct + * running right now), it's preempted, and we should + * yield - it could be a while. + */ +- if (unlikely(queued)) { ++ if (unlikely(queued) || saved_state_match) { + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch index 4ae70ed48047..9cd86c40c4a2 100644 --- a/patches/sched__Add_support_for_lazy_preemption.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -218,9 +218,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +#endif + - #ifdef CONFIG_PREEMPT_RT - - static inline bool task_state_match_and(struct task_struct *tsk, long state) + /* + * cond_resched() and cond_resched_lock(): latency reduction via + * explicit rescheduling in places that are safe. The return --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -177,7 +177,17 @@ static __always_inline unsigned long rea @@ -357,7 +357,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -4519,6 +4561,9 @@ int sched_fork(unsigned long clone_flags +@@ -4544,6 +4586,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -367,7 +367,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6359,6 +6404,7 @@ static void __sched notrace __schedule(u +@@ -6381,6 +6426,7 @@ static void __sched notrace __schedule(u next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -375,7 +375,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; -@@ -6569,6 +6615,30 @@ static void __sched notrace preempt_sche +@@ -6591,6 +6637,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -406,7 +406,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -6582,6 +6652,8 @@ asmlinkage __visible void __sched notrac +@@ -6604,6 +6674,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -415,7 +415,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -6629,6 +6701,9 @@ asmlinkage __visible void __sched notrac +@@ -6651,6 +6723,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -425,7 +425,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -8845,7 +8920,9 @@ void __init init_idle(struct task_struct +@@ -8867,7 +8942,9 @@ void __init init_idle(struct task_struct /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); diff --git a/patches/scsi_fcoe__Make_RT_aware..patch b/patches/scsi_fcoe__Make_RT_aware..patch deleted file mode 100644 index 3d310be4aa97..000000000000 --- a/patches/scsi_fcoe__Make_RT_aware..patch +++ /dev/null @@ -1,109 +0,0 @@ -Subject: scsi/fcoe: Make RT aware. -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat Nov 12 14:00:48 2011 +0100 - -From: Thomas Gleixner <tglx@linutronix.de> - -Do not disable preemption while taking sleeping locks. All user look safe -for migrate_diable() only. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - drivers/scsi/fcoe/fcoe.c | 16 ++++++++-------- - drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++-- - drivers/scsi/libfc/fc_exch.c | 4 ++-- - 3 files changed, 12 insertions(+), 12 deletions(-) ---- ---- a/drivers/scsi/fcoe/fcoe.c -+++ b/drivers/scsi/fcoe/fcoe.c -@@ -1451,11 +1451,11 @@ static int fcoe_rcv(struct sk_buff *skb, - static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) - { - struct fcoe_percpu_s *fps; -- int rc; -+ int rc, cpu = get_cpu_light(); - -- fps = &get_cpu_var(fcoe_percpu); -+ fps = &per_cpu(fcoe_percpu, cpu); - rc = fcoe_get_paged_crc_eof(skb, tlen, fps); -- put_cpu_var(fcoe_percpu); -+ put_cpu_light(); - - return rc; - } -@@ -1640,11 +1640,11 @@ static inline int fcoe_filter_frames(str - return 0; - } - -- stats = per_cpu_ptr(lport->stats, get_cpu()); -+ stats = per_cpu_ptr(lport->stats, get_cpu_light()); - stats->InvalidCRCCount++; - if (stats->InvalidCRCCount < 5) - printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); -- put_cpu(); -+ put_cpu_light(); - return -EINVAL; - } - -@@ -1685,7 +1685,7 @@ static void fcoe_recv_frame(struct sk_bu - */ - hp = (struct fcoe_hdr *) skb_network_header(skb); - -- stats = per_cpu_ptr(lport->stats, get_cpu()); -+ stats = per_cpu_ptr(lport->stats, get_cpu_light()); - if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { - if (stats->ErrorFrames < 5) - printk(KERN_WARNING "fcoe: FCoE version " -@@ -1717,13 +1717,13 @@ static void fcoe_recv_frame(struct sk_bu - goto drop; - - if (!fcoe_filter_frames(lport, fp)) { -- put_cpu(); -+ put_cpu_light(); - fc_exch_recv(lport, fp); - return; - } - drop: - stats->ErrorFrames++; -- put_cpu(); -+ put_cpu_light(); - kfree_skb(skb); - } - ---- a/drivers/scsi/fcoe/fcoe_ctlr.c -+++ b/drivers/scsi/fcoe/fcoe_ctlr.c -@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs( - - INIT_LIST_HEAD(&del_list); - -- stats = per_cpu_ptr(fip->lp->stats, get_cpu()); -+ stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); - - list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { - deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; -@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs( - sel_time = fcf->time; - } - } -- put_cpu(); -+ put_cpu_light(); - - list_for_each_entry_safe(fcf, next, &del_list, list) { - /* Removes fcf from current list */ ---- a/drivers/scsi/libfc/fc_exch.c -+++ b/drivers/scsi/libfc/fc_exch.c -@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc( - } - memset(ep, 0, sizeof(*ep)); - -- cpu = get_cpu(); -+ cpu = get_cpu_light(); - pool = per_cpu_ptr(mp->pool, cpu); - spin_lock_bh(&pool->lock); -- put_cpu(); -+ put_cpu_light(); - - /* peek cache of free slot */ - if (pool->left != FC_XID_UNKNOWN) { diff --git a/patches/series b/patches/series index a7c08e46afec..db6e53b41582 100644 --- a/patches/series +++ b/patches/series @@ -28,7 +28,7 @@ # in -tip or traveling to -tip. lib-irq_poll-Prevent-softirq-pending-leak-in-irq_pol.patch -signal_x86__Delay_calling_signals_in_atomic.patch +# signal_x86__Delay_calling_signals_in_atomic.patch # v5, fixed 3/3 of smp: Make softirq handling RT friendly 0001-sched-Fix-missing-prototype-warnings.patch @@ -38,36 +38,43 @@ signal_x86__Delay_calling_signals_in_atomic.patch ########################################################################### # Posted ########################################################################### -ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch rcu-tasks-Use-schedule_hrtimeout_range-while-waiting.patch +blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch +mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch +SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch +0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch +0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch +0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch +0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch + +# Eric's ptrace, v4 +0001-signal-Rename-send_signal-send_signal_locked.patch +0002-signal-Replace-__group_send_sig_info-with-send_signa.patch +0003-ptrace-um-Replace-PT_DTRACE-with-TIF_SINGLESTEP.patch +0004-ptrace-xtensa-Replace-PT_SINGLESTEP-with-TIF_SINGLES.patch +0005-ptrace-Remove-arch_ptrace_attach.patch +0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch +0007-ptrace-Reimplement-PTRACE_KILL-by-always-sending-SIG.patch +0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch +0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch +0010-ptrace-Don-t-change-__state.patch +0011-ptrace-Always-take-siglock-in-ptrace_resume.patch +0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch +# Hacks to get it to work. +signal__Revert_ptrace_preempt_magic.patch +sched-Consider-task_struct-saved_state-in-wait_task_.patch ########################################################################### # Post ########################################################################### ########################################################################### -# sched: -########################################################################### -# cpu-light -kernel_sched__add_putget_cpu_light.patch -block_mq__do_not_invoke_preempt_disable.patch -scsi_fcoe__Make_RT_aware..patch -mm_vmalloc__Another_preempt_disable_region_which_sucks.patch -sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch -crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch - -########################################################################### # softirq: ########################################################################### softirq__Check_preemption_after_reenabling_interrupts.patch ########################################################################### -# ptrace: Revisit -########################################################################### -signal__Revert_ptrace_preempt_magic.patch - -########################################################################### # fs: The namespace part needs a proper fix ########################################################################### fs_dcache__use_swait_queue_instead_of_waitqueue.patch @@ -87,6 +94,7 @@ tpm_tis__fix_stall_after_iowrites.patch drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch +crypto-cryptd-Protect-per-CPU-resource-by-disabling-.patch ########################################################################### # DRM: @@ -117,11 +125,10 @@ arch_arm64__Add_lazy_preempt_support.patch ########################################################################### jump-label__disable_if_stop_machine_is_used.patch ARM__enable_irq_in_translation_section_permission_fault_handlers.patch -KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch -arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch +# arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch tty_serial_omap__Make_the_locking_RT_aware.patch tty_serial_pl011__Make_the_locking_work_on_RT.patch ARM__Allow_to_enable_RT.patch diff --git a/patches/signal__Revert_ptrace_preempt_magic.patch b/patches/signal__Revert_ptrace_preempt_magic.patch index 071a4dded2bc..8d19e928f270 100644 --- a/patches/signal__Revert_ptrace_preempt_magic.patch +++ b/patches/signal__Revert_ptrace_preempt_magic.patch @@ -17,20 +17,20 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -2320,16 +2320,8 @@ static int ptrace_stop(int exit_code, in - if (gstop_done && ptrace_reparented(current)) - do_notify_parent_cldstop(current, false, why); +@@ -2283,16 +2283,8 @@ static int ptrace_stop(int exit_code, in + if (gstop_done && (!current->ptrace || ptrace_reparented(current))) + do_notify_parent_cldstop(current, false, why); + +- /* +- * Don't want to allow preemption here, because +- * sys_ptrace() needs this task to be inactive. +- * +- * XXX: implement read_unlock_no_resched(). +- */ +- preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); +- preempt_enable_no_resched(); + freezable_schedule(); + cgroup_leave_frozen(true); -- /* -- * Don't want to allow preemption here, because -- * sys_ptrace() needs this task to be inactive. -- * -- * XXX: implement read_unlock_no_resched(). -- */ -- preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); -- preempt_enable_no_resched(); - freezable_schedule(); - cgroup_leave_frozen(true); - } else { diff --git a/patches/sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch b/patches/sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch deleted file mode 100644 index ecb0053bb318..000000000000 --- a/patches/sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch +++ /dev/null @@ -1,58 +0,0 @@ -Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() -From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Wed Feb 18 16:05:28 2015 +0100 - -From: Mike Galbraith <umgwanakikbuti@gmail.com> - -|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 -|in_atomic(): 1, irqs_disabled(): 0, pid: 3194, name: rpc.nfsd -|Preemption disabled at:[<ffffffffa06bf0bb>] svc_xprt_received+0x4b/0xc0 [sunrpc] -|CPU: 6 PID: 3194 Comm: rpc.nfsd Not tainted 3.18.7-rt1 #9 -|Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.404 11/06/2014 -| ffff880409630000 ffff8800d9a33c78 ffffffff815bdeb5 0000000000000002 -| 0000000000000000 ffff8800d9a33c98 ffffffff81073c86 ffff880408dd6008 -| ffff880408dd6000 ffff8800d9a33cb8 ffffffff815c3d84 ffff88040b3ac000 -|Call Trace: -| [<ffffffff815bdeb5>] dump_stack+0x4f/0x9e -| [<ffffffff81073c86>] __might_sleep+0xe6/0x150 -| [<ffffffff815c3d84>] rt_spin_lock+0x24/0x50 -| [<ffffffffa06beec0>] svc_xprt_do_enqueue+0x80/0x230 [sunrpc] -| [<ffffffffa06bf0bb>] svc_xprt_received+0x4b/0xc0 [sunrpc] -| [<ffffffffa06c03ed>] svc_add_new_perm_xprt+0x6d/0x80 [sunrpc] -| [<ffffffffa06b2693>] svc_addsock+0x143/0x200 [sunrpc] -| [<ffffffffa072e69c>] write_ports+0x28c/0x340 [nfsd] -| [<ffffffffa072d2ac>] nfsctl_transaction_write+0x4c/0x80 [nfsd] -| [<ffffffff8117ee83>] vfs_write+0xb3/0x1d0 -| [<ffffffff8117f889>] SyS_write+0x49/0xb0 -| [<ffffffff815c4556>] system_call_fastpath+0x16/0x1b - - -Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - net/sunrpc/svc_xprt.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) ---- ---- a/net/sunrpc/svc_xprt.c -+++ b/net/sunrpc/svc_xprt.c -@@ -461,7 +461,7 @@ void svc_xprt_enqueue(struct svc_xprt *x - if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) - return; - -- cpu = get_cpu(); -+ cpu = get_cpu_light(); - pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - - atomic_long_inc(&pool->sp_stats.packets); -@@ -485,7 +485,7 @@ void svc_xprt_enqueue(struct svc_xprt *x - rqstp = NULL; - out_unlock: - rcu_read_unlock(); -- put_cpu(); -+ put_cpu_light(); - trace_svc_xprt_enqueue(xprt, rqstp); - } - EXPORT_SYMBOL_GPL(svc_xprt_enqueue); diff --git a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch index d989e30aba19..f3fbad78d715 100644 --- a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch +++ b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2270,18 +2270,24 @@ pl011_console_write(struct console *co, +@@ -2275,18 +2275,24 @@ pl011_console_write(struct console *co, { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2307,8 +2313,7 @@ pl011_console_write(struct console *co, +@@ -2312,8 +2318,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/patches/x86__Support_for_lazy_preemption.patch b/patches/x86__Support_for_lazy_preemption.patch index 49ee7bea7da5..88944aef4263 100644 --- a/patches/x86__Support_for_lazy_preemption.patch +++ b/patches/x86__Support_for_lazy_preemption.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -241,6 +241,7 @@ config X86 +@@ -240,6 +240,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -136,7 +136,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -165,7 +165,7 @@ static unsigned long exit_to_user_mode_l +@@ -153,7 +153,7 @@ static unsigned long exit_to_user_mode_l local_irq_enable_exit_to_user(ti_work); @@ -144,4 +144,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + if (ti_work & _TIF_NEED_RESCHED_MASK) schedule(); - raise_delayed_signal(); + if (ti_work & _TIF_UPROBE) diff --git a/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch index c0f85a307273..d2068a6f3bd0 100644 --- a/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch +++ b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -395,7 +395,7 @@ void raw_irqentry_exit_cond_resched(void +@@ -381,7 +381,7 @@ void raw_irqentry_exit_cond_resched(void rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); |