diff options
author | Ingo Molnar <mingo@kernel.org> | 2021-03-30 14:50:13 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2021-03-30 14:50:13 +0200 |
commit | cf28dfd89ba01639accfdc2b509849c4656198cf (patch) | |
tree | 180690eb164838a7ff951354a9e1a5406754961a | |
parent | dd85f61b3ad167364f402df62748b4befb27ef7c (diff) | |
parent | 883ccef355b910398b99dfaf96d40557479a7e9b (diff) | |
download | linux-next-cf28dfd89ba01639accfdc2b509849c4656198cf.tar.gz |
Merge branch 'irq/core'
45 files changed, 428 insertions, 166 deletions
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index b574cce98dc3..422753d52244 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc,struct sk_buff *skb) } submitted++; ATM_SKB(skb)->vcc = vcc; - tasklet_disable(&ENI_DEV(vcc->dev)->task); + tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task); res = do_tx(skb); tasklet_enable(&ENI_DEV(vcc->dev)->task); if (res == enq_ok) return 0; diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9811c40956e5..17c9d825188b 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; - tasklet_disable(&ctx->tasklet); + tasklet_disable_in_atomic(&ctx->tasklet); if (packet->ack != 0) goto out; @@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(struct fw_iso_context *base) struct iso_context *ctx = container_of(base, struct iso_context, base); int ret = 0; - tasklet_disable(&ctx->context.tasklet); + tasklet_disable_in_atomic(&ctx->context.tasklet); if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) { context_tasklet((unsigned long)&ctx->context); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index e622aee6e4be..440c35f1abc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -105,7 +105,7 @@ static inline bool tasklet_is_locked(const struct tasklet_struct *t) static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) - tasklet_unlock_wait(t); + tasklet_unlock_spin_wait(t); } static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c index 6567ed782f82..58717cd44f99 100644 --- a/drivers/irqchip/irq-aspeed-vic.c +++ b/drivers/irqchip/irq-aspeed-vic.c @@ -71,7 +71,7 @@ static void vic_init_hw(struct aspeed_vic *vic) writel(0, vic->base + AVIC_INT_SELECT); writel(0, vic->base + AVIC_INT_SELECT + 4); - /* Some interrupts have a programable high/low level trigger + /* Some interrupts have a programmable high/low level trigger * (4 GPIO direct inputs), for now we assume this was configured * by firmware. We read which ones are edge now. */ @@ -203,7 +203,7 @@ static int __init avic_of_init(struct device_node *node, } vic->base = regs; - /* Initialize soures, all masked */ + /* Initialize sources, all masked */ vic_init_hw(vic); /* Ready to receive interrupts */ diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index c7c9e976acbb..ad59656ccc28 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -309,7 +309,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, if (data->can_wake) { /* This IRQ chip can wake the system, set all - * relevant child interupts in wake_enabled mask + * relevant child interrupts in wake_enabled mask */ gc->wake_enabled = 0xffffffff; gc->wake_enabled &= ~gc->unused; diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c index 5a2ec43b7ddd..ab91afa86755 100644 --- a/drivers/irqchip/irq-csky-apb-intc.c +++ b/drivers/irqchip/irq-csky-apb-intc.c @@ -176,7 +176,7 @@ gx_intc_init(struct device_node *node, struct device_node *parent) writel(0x0, reg_base + GX_INTC_NEN63_32); /* - * Initial mask reg with all unmasked, because we only use enalbe reg + * Initial mask reg with all unmasked, because we only use enable reg */ writel(0x0, reg_base + GX_INTC_NMASK31_00); writel(0x0, reg_base + GX_INTC_NMASK63_32); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index fbec07d634ad..4116b48e60af 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -371,7 +371,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, * the MSI data is the absolute value within the range from * spi_start to (spi_start + num_spis). * - * Broadom NS2 GICv2m implementation has an erratum where the MSI data + * Broadcom NS2 GICv2m implementation has an erratum where the MSI data * is 'spi_number - 32' * * Reading that register fails on the Graviton implementation diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ed46e6057e33..c3485b230d70 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1492,7 +1492,7 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable) * * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI * value or to 1023, depending on the enable bit. But that - * would be issueing a mapping for an /existing/ DevID+EventID + * would be issuing a mapping for an /existing/ DevID+EventID * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI * to the /same/ vPE, using this opportunity to adjust the * doorbell. Mouahahahaha. We loves it, Precious. @@ -3122,7 +3122,7 @@ static void its_cpu_init_lpis(void) /* * It's possible for CPU to receive VLPIs before it is - * sheduled as a vPE, especially for the first CPU, and the + * scheduled as a vPE, especially for the first CPU, and the * VLPI with INTID larger than 2^(IDbits+1) will be considered * as out of range and dropped by GIC. * So we initialize IDbits to known value to avoid VLPI drop. @@ -3616,7 +3616,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, /* * If all interrupts have been freed, start mopping the - * floor. This is conditionned on the device not being shared. + * floor. This is conditioned on the device not being shared. */ if (!its_dev->shared && bitmap_empty(its_dev->event_map.lpi_map, @@ -4194,7 +4194,7 @@ static int its_sgi_set_affinity(struct irq_data *d, { /* * There is no notion of affinity for virtual SGIs, at least - * not on the host (since they can only be targetting a vPE). + * not on the host (since they can only be targeting a vPE). * Tell the kernel we've done whatever it asked for. */ irq_data_update_effective_affinity(d, mask_val); @@ -4239,7 +4239,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, /* * Locking galore! We can race against two different events: * - * - Concurent vPE affinity change: we must make sure it cannot + * - Concurrent vPE affinity change: we must make sure it cannot * happen, or we'll talk to the wrong redistributor. This is * identical to what happens with vLPIs. * diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eb0ee356a629..94b89258d045 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1379,7 +1379,7 @@ static int gic_irq_domain_translate(struct irq_domain *d, /* * Make it clear that broken DTs are... broken. - * Partitionned PPIs are an unfortunate exception. + * Partitioned PPIs are an unfortunate exception. */ WARN_ON(*type == IRQ_TYPE_NONE && fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index 9bf6b9a5f734..f790ca6d78aa 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -163,7 +163,7 @@ static void pch_pic_reset(struct pch_pic *priv) int i; for (i = 0; i < PIC_COUNT; i++) { - /* Write vectore ID */ + /* Write vectored ID */ writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i)); /* Hardcode route to HT0 Lo */ writeb(1, priv->base + PCH_INT_ROUTE(i)); diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index bc7aebcc96e9..e50676ce2ec8 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -227,7 +227,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* * Get the hwirq number assigned to this channel through - * a pointer the channel_irq table. The added benifit of this + * a pointer the channel_irq table. The added benefit of this * method is that we can also retrieve the channel index with * it, using the table base. */ diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c index 69ba8ce3c178..9bca0918078e 100644 --- a/drivers/irqchip/irq-mtk-cirq.c +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -217,7 +217,7 @@ static void mtk_cirq_resume(void) { u32 value; - /* flush recored interrupts, will send signals to parent controller */ + /* flush recorded interrupts, will send signals to parent controller */ value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index a671938fd97f..d1f5740cd575 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -58,7 +58,7 @@ struct icoll_priv { static struct icoll_priv icoll_priv; static struct irq_domain *icoll_domain; -/* calculate bit offset depending on number of intterupt per register */ +/* calculate bit offset depending on number of interrupt per register */ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) { /* @@ -68,7 +68,7 @@ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) return bit << ((d->hwirq & 3) << 3); } -/* calculate mem offset depending on number of intterupt per register */ +/* calculate mem offset depending on number of interrupt per register */ static void __iomem *icoll_intr_reg(struct irq_data *d) { /* offset = hwirq / intr_per_reg * 0x10 */ diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index fb78d6623556..9ea94456b178 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -189,7 +189,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) * 3) spurious irq * So if we immediately get a reading of 0, check the irq-pending reg * to differentiate between 2 and 3. We only do this once to avoid - * the extra check in the common case of 1 hapening after having + * the extra check in the common case of 1 happening after having * read the vector-reg once. */ hwirq = readl(irq_ic_data->irq_base + SUN4I_IRQ_VECTOR_REG) >> 2; diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 532d0ae172d9..ca1f593f4d13 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -78,7 +78,7 @@ struct ti_sci_inta_vint_desc { * struct ti_sci_inta_irq_domain - Structure representing a TISCI based * Interrupt Aggregator IRQ domain. * @sci: Pointer to TISCI handle - * @vint: TISCI resource pointer representing IA inerrupts. + * @vint: TISCI resource pointer representing IA interrupts. * @global_event: TISCI resource pointer representing global events. * @vint_list: List of the vints active in the system * @vint_mutex: Mutex to protect vint_list diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index e46036374227..62f3d29f9042 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -163,7 +163,7 @@ static struct syscore_ops vic_syscore_ops = { }; /** - * vic_pm_init - initicall to register VIC pm + * vic_pm_init - initcall to register VIC pm * * This is called via late_initcall() to register * the resources for the VICs due to the early @@ -397,7 +397,7 @@ static void __init vic_clear_interrupts(void __iomem *base) /* * The PL190 cell from ARM has been modified by ST to handle 64 interrupts. * The original cell has 32 interrupts, while the modified one has 64, - * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case + * replicating two blocks 0x00..0x1f in 0x20..0x3f. In that case * the probe function is called twice, with base set to offset 000 * and 020 within the page. We call this "second block". */ diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 1d3d273309bd..8cd1bfc73057 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -210,7 +210,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, /* * Disable all external interrupts until they are - * explicity requested. + * explicitly requested. */ xintc_write(irqc, IER, 0); diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index e3a8858915b3..df0eab479d51 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -963,7 +963,7 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue) unsigned long flag; netif_stop_queue(dev); - tasklet_disable(&np->tx_tasklet); + tasklet_disable_in_atomic(&np->tx_tasklet); iowrite16(0, ioaddr + IntrEnable); printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " "TxFrameId %2.2x," diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index e9efe074edc1..f1b9284e0bea 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme) jwrite32f(jme, JME_APMC, apmc); } -static void jme_link_change_tasklet(struct tasklet_struct *t) +static void jme_link_change_work(struct work_struct *work) { - struct jme_adapter *jme = from_tasklet(jme, t, linkch_task); + struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task); struct net_device *netdev = jme->dev; int rc; @@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u32 intrstat) * all other events are ignored */ jwrite32(jme, JME_IEVE, intrstat); - tasklet_schedule(&jme->linkch_task); + schedule_work(&jme->linkch_task); goto out_reenable; } @@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev) jme_clear_pm_disable_wol(jme); JME_NAPI_ENABLE(jme); - tasklet_setup(&jme->linkch_task, jme_link_change_tasklet); tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet); tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet); tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet); @@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev) JME_NAPI_DISABLE(jme); - tasklet_kill(&jme->linkch_task); + cancel_work_sync(&jme->linkch_task); tasklet_kill(&jme->txclean_task); tasklet_kill(&jme->rxclean_task); tasklet_kill(&jme->rxempty_task); @@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev, atomic_set(&jme->rx_empty, 1); tasklet_setup(&jme->pcc_task, jme_pcc_tasklet); + INIT_WORK(&jme->linkch_task, jme_link_change_work); jme->dpi.cur = PCC_P1; jme->reg_ghc = 0; diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h index a2c3b00d939d..2af76329b4a2 100644 --- a/drivers/net/ethernet/jme.h +++ b/drivers/net/ethernet/jme.h @@ -411,7 +411,7 @@ struct jme_adapter { struct tasklet_struct rxempty_task; struct tasklet_struct rxclean_task; struct tasklet_struct txclean_task; - struct tasklet_struct linkch_task; + struct work_struct linkch_task; struct tasklet_struct pcc_task; unsigned long flags; u32 reg_txcs; diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 71e2ada86793..72e2e71aac0e 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc) int first_slot = ATH_BCBUF; int slot; - tasklet_disable(&sc->bcon_tasklet); + tasklet_disable_in_atomic(&sc->bcon_tasklet); /* Find first taken slot. */ for (slot = 0; slot < ATH_BCBUF; slot++) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 27a17a1e4a7c..a313708bcf75 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * Prevents hv_pci_onchannelcallback() from running concurrently * in the tasklet. */ - tasklet_disable(&channel->callback_event); + tasklet_disable_in_atomic(&channel->callback_event); /* * Since this function is called with IRQ locks held, can't diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index a19519f4241d..eed86eb0a1de 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -4,7 +4,7 @@ #include <linux/preempt.h> -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) @@ -32,4 +32,10 @@ static inline void local_bh_enable(void) __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } +#ifdef CONFIG_PREEMPT_RT +extern bool local_bh_blocked(void); +#else +static inline bool local_bh_blocked(void) { return false; } +#endif + #endif /* _LINUX_BH_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7c9d6a2d7e90..69bc86ea382c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -6,6 +6,7 @@ #include <linux/preempt.h> #include <linux/lockdep.h> #include <linux/ftrace_irq.h> +#include <linux/sched.h> #include <linux/vtime.h> #include <asm/hardirq.h> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 967e25767153..4777850a6dc7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -61,6 +61,9 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it. + * Users will enable it explicitly by enable_irq() or enable_nmi() + * later. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_NO_AUTOEN 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -654,26 +658,21 @@ enum TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ }; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock(struct tasklet_struct *t) -{ - smp_mb__before_atomic(); - clear_bit(TASKLET_STATE_RUN, &(t)->state); -} +void tasklet_unlock(struct tasklet_struct *t); +void tasklet_unlock_wait(struct tasklet_struct *t); +void tasklet_unlock_spin_wait(struct tasklet_struct *t); -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } -} #else -#define tasklet_trylock(t) 1 -#define tasklet_unlock_wait(t) do { } while (0) -#define tasklet_unlock(t) do { } while (0) +static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } +static inline void tasklet_unlock(struct tasklet_struct *t) { } +static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } +static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { } #endif extern void __tasklet_schedule(struct tasklet_struct *t); @@ -698,6 +697,17 @@ static inline void tasklet_disable_nosync(struct tasklet_struct *t) smp_mb__after_atomic(); } +/* + * Do not use in new code. Disabling tasklets from atomic contexts is + * error prone and should be avoided. + */ +static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) +{ + tasklet_disable_nosync(t); + tasklet_unlock_spin_wait(t); + smp_mb(); +} + static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); @@ -712,7 +722,6 @@ static inline void tasklet_enable(struct tasklet_struct *t) } extern void tasklet_kill(struct tasklet_struct *t); -extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); extern void tasklet_setup(struct tasklet_struct *t, diff --git a/include/linux/irq.h b/include/linux/irq.h index 2efde6a79b7e..bee82809107c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -116,7 +116,7 @@ enum { * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping - * all descendent irqchips. + * all descendant irqchips. */ enum { IRQ_SET_MASK_OK = 0, @@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d) /* * Must only be called of irqchip.irq_set_affinity() or low level - * hieararchy domain allocation functions. + * hierarchy domain allocation functions. */ static inline void irqd_set_single_target(struct irq_data *d) { diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 891b323266df..df4651250785 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -32,7 +32,7 @@ struct pt_regs; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers - * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers + * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 69cc8b64aa3a..9881eac0698f 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -79,7 +79,11 @@ #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#ifdef CONFIG_PREEMPT_RT +# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +#else +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#endif #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bd04f722714f..6d855ef091ba 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -334,7 +334,8 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 05572e2140ad..0c56cc99e3da 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1043,6 +1043,9 @@ struct task_struct { int softirq_context; int irq_config; #endif +#ifdef CONFIG_PREEMPT_RT + int softirq_disable_cnt; +#endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6d89e33fe3aa..8cc8e5713287 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -761,7 +761,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq * - * Interrupt occures on the falling and/or rising edge of a hardware + * Interrupt occurs on the falling and/or rising edge of a hardware * signal. The occurrence is latched into the irq controller hardware * and must be acked in order to be reenabled. After the ack another * interrupt can happen on the same source even before the first one @@ -808,7 +808,7 @@ void handle_edge_irq(struct irq_desc *desc) /* * When another irq arrived while we were handling * one, we could have masked the irq. - * Renable it, if it was not disabled in meantime. + * Reenable it, if it was not disabled in meantime. */ if (unlikely(desc->istate & IRQS_PENDING)) { if (!irqd_irq_disabled(&desc->irq_data) && @@ -1419,7 +1419,7 @@ EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); * @dest: The affinity mask to set * @force: Flag to enforce setting (disable online checks) * - * Conditinal, as the underlying parent chip might not implement it. + * Conditional, as the underlying parent chip might not implement it. */ int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force) @@ -1531,7 +1531,7 @@ EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** - * irq_chip_compose_msi_msg - Componse msi message for a irq chip + * irq_chip_compose_msi_msg - Compose msi message for a irq chip * @data: Pointer to interrupt specific data * @msg: Pointer to the MSI message * diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 0b0cdf206dc4..7fe6cffe7d0d 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -13,7 +13,7 @@ /* * What should we do if we get a hw irq event on an illegal vector? - * Each architecture has to answer this themself. + * Each architecture has to answer this themselves. */ static void ack_bad(struct irq_data *data) { diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 43e3d1be622c..52f11c791bf8 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -107,7 +107,7 @@ free_descs: * @irq: linux irq number to be destroyed * @dest: cpumask of cpus which should have the IPI removed * - * The IPIs allocated with irq_reserve_ipi() are retuerned to the system + * The IPIs allocated with irq_reserve_ipi() are returned to the system * destroying all virqs associated with them. * * Return 0 on success or error code on failure. diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 40880c350b95..0cd02efa3a74 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -24,10 +24,6 @@ struct irq_sim_irq_ctx { struct irq_sim_work_ctx *work_ctx; }; -struct irq_sim_devres { - struct irq_domain *domain; -}; - static void irq_sim_irqmask(struct irq_data *data) { struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); @@ -216,11 +212,11 @@ void irq_domain_remove_sim(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_domain_remove_sim); -static void devm_irq_domain_release_sim(struct device *dev, void *res) +static void devm_irq_domain_remove_sim(void *data) { - struct irq_sim_devres *this = res; + struct irq_domain *domain = data; - irq_domain_remove_sim(this->domain); + irq_domain_remove_sim(domain); } /** @@ -238,20 +234,17 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev, struct fwnode_handle *fwnode, unsigned int num_irqs) { - struct irq_sim_devres *dr; + struct irq_domain *domain; + int ret; - dr = devres_alloc(devm_irq_domain_release_sim, - sizeof(*dr), GFP_KERNEL); - if (!dr) - return ERR_PTR(-ENOMEM); + domain = irq_domain_create_sim(fwnode, num_irqs); + if (IS_ERR(domain)) + return domain; - dr->domain = irq_domain_create_sim(fwnode, num_irqs); - if (IS_ERR(dr->domain)) { - devres_free(dr); - return dr->domain; - } + ret = devm_add_action_or_reset(dev, devm_irq_domain_remove_sim, domain); + if (ret) + return ERR_PTR(ret); - devres_add(dev, dr); - return dr->domain; + return domain; } EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cc1a09406c6e..4a617d7312a4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str) cpulist_parse(str, irq_default_affinity); /* * Set at least the boot cpu. We don't want to end up with - * bugreports caused by random comandline masks + * bugreports caused by random commandline masks */ cpumask_set_cpu(smp_processor_id(), irq_default_affinity); return 1; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d10ab1d689d5..1d5d57d1ad0b 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); * @name: Optional user provided domain name * @pa: Optional user-provided physical address * - * Allocate a struct irqchip_fwid, and return a poiner to the embedded + * Allocate a struct irqchip_fwid, and return a pointer to the embedded * fwnode_handle (or NULL on failure). * * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are @@ -665,7 +665,7 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) { @@ -906,7 +906,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, { struct irq_data *data; - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) @@ -1436,7 +1436,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, * The whole process to setup an IRQ has been split into two steps. * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ * descriptor and required hardware resources. The second step, - * irq_domain_activate_irq(), is to program hardwares with preallocated + * irq_domain_activate_irq(), is to program the hardware with preallocated * resources. In this way, it's easier to rollback when failing to * allocate resources. */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 21ea370fccda..4c14356543d9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -179,7 +179,7 @@ bool irq_can_set_affinity_usr(unsigned int irq) /** * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affitnity changed + * @desc: irq descriptor which has affinity changed * * We just set IRQTF_AFFINITY and delegate the affinity setting * to the interrupt thread itself. We can not call @@ -326,7 +326,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a - * useable state so startup works. + * usable state so startup works. */ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data) || !irqd_affinity_on_activate(data)) @@ -1054,7 +1054,7 @@ again: * to IRQS_INPROGRESS and the irq line is masked forever. * * This also serializes the state of shared oneshot handlers - * versus "desc->threads_onehsot |= action->thread_mask;" in + * versus "desc->threads_oneshot |= action->thread_mask;" in * irq_wake_thread(). See the comment there which explains the * serialization. */ @@ -1157,7 +1157,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) /* * Interrupts explicitly requested as threaded interrupts want to be - * preemtible - many of them need to sleep and wait for slow busses to + * preemptible - many of them need to sleep and wait for slow busses to * complete. */ static irqreturn_t irq_thread_fn(struct irq_desc *desc, @@ -1697,7 +1697,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - if (irq_settings_can_autoenable(desc)) { + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); } else { /* @@ -1912,7 +1913,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) /* Last action releases resources */ if (!desc->action) { /* - * Reaquire bus lock as irq_release_resources() might + * Reacquire bus lock as irq_release_resources() might * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); @@ -2090,10 +2091,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * which interrupt is which (messes up the interrupt freeing * logic etc). * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and * it cannot be set along with IRQF_NO_SUSPEND. */ if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; @@ -2249,7 +2255,8 @@ int request_nmi(unsigned int irq, irq_handler_t handler, desc = irq_to_desc(irq); - if (!desc || irq_settings_can_autoenable(desc) || + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || !irq_settings_can_request(desc) || WARN_ON(irq_settings_is_per_cpu_devid(desc)) || !irq_supports_nmi(desc)) @@ -2746,7 +2753,7 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM * @which: One of IRQCHIP_STATE_* the caller wants to know about - * @state: a pointer to a boolean where the state is to be storeed + * @state: a pointer to a boolean where the state is to be stored * * This call snapshots the internal irqchip state of an * interrupt, returning into @state the bit corresponding to diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 651a4ad6d711..578596e41cb6 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -337,15 +337,14 @@ void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) * irq_matrix_reserve - Reserve interrupts * @m: Matrix pointer * - * This is merily a book keeping call. It increments the number of globally + * This is merely a book keeping call. It increments the number of globally * reserved interrupt bits w/o actually allocating them. This allows to * setup interrupt descriptors w/o assigning low level resources to it. * The actual allocation happens when the interrupt gets activated. */ void irq_matrix_reserve(struct irq_matrix *m) { - if (m->global_reserved <= m->global_available && - m->global_reserved + 1 > m->global_available) + if (m->global_reserved == m->global_available) pr_warn("Interrupt reservation exceeds available resources\n"); m->global_reserved++; @@ -356,7 +355,7 @@ void irq_matrix_reserve(struct irq_matrix *m) * irq_matrix_remove_reserved - Remove interrupt reservation * @m: Matrix pointer * - * This is merily a book keeping call. It decrements the number of globally + * This is merely a book keeping call. It decrements the number of globally * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the * interrupt was never in use and a real vector allocated, which undid the * reservation. @@ -423,7 +422,9 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; - clear_bit(bit, cm->alloc_map); + if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) + return; + cm->allocated--; if(managed) cm->managed_allocated--; diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index def48589ea48..61ca924ef4b4 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -7,7 +7,7 @@ /** * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU - * @desc: Interrupt descpriptor to clean up + * @desc: Interrupt descriptor to clean up * @force_clear: If set clear the move pending bit unconditionally. * If not set, clear it only when the dying CPU is the * last one in the pending mask. diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index b338d622f26e..c41965e348b5 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -5,7 +5,7 @@ * * This file is licensed under GPLv2. * - * This file contains common code to support Message Signalled Interrupt for + * This file contains common code to support Message Signaled Interrupts for * PCI compatible and non PCI compatible devices. */ #include <linux/types.h> diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index bd1d85c610aa..0c46e9fe3a89 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -128,7 +128,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject) if (!try_retrigger(desc)) err = irq_sw_resend(desc); - /* If the retrigger was successfull, mark it with the REPLAY bit */ + /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 773b6105c4ae..d309d6fbf5bd 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -84,7 +84,7 @@ void irq_timings_disable(void) * 2. Log interval * * We saw the irq timings allow to compute the interval of the - * occurrences for a specific interrupt. We can reasonibly assume the + * occurrences for a specific interrupt. We can reasonably assume the * longer is the interval, the higher is the error for the next event * and we can consider storing those interval values into an array * where each slot in the array correspond to an interval at the power @@ -416,7 +416,7 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) * Copy the content of the circular buffer into another buffer * in order to linearize the buffer instead of dealing with * wrapping indexes and shifted array which will be prone to - * error and extremelly difficult to debug. + * error and extremely difficult to debug. */ for (i = 0; i < count; i++) { int index = (start + i) & IRQ_TIMINGS_MASK; @@ -485,7 +485,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) /* * The interrupt triggered more than one second apart, that - * ends the sequence as predictible for our purpose. In this + * ends the sequence as predictable for our purpose. In this * case, assume we have the beginning of a sequence and the * timestamp is the first value. As it is impossible to * predict anything at this point, return. @@ -514,7 +514,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) * If more than the array size interrupts happened during the * last busy/idle cycle, the index wrapped up and we have to * begin with the next element in the array which is the last one - * in the sequence, otherwise it is a the index 0. + * in the sequence, otherwise it is at the index 0. * * - have an indication of the interrupts activity on this CPU * (eg. irq/sec) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3b36644fa80a..872e481d5098 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset) cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; - pc = preempt_count() - offset; + pc = irq_count() - offset; /* * We do not account for softirq time from ksoftirqd here. @@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struct *prev) void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { - unsigned int pc = preempt_count() - offset; + unsigned int pc = irq_count() - offset; if (pc & HARDIRQ_OFFSET) { vtime_account_hardirq(tsk); diff --git a/kernel/softirq.c b/kernel/softirq.c index 9908ec4a9bfe..5a99696da86a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -13,6 +13,7 @@ #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/local_lock.h> #include <linux/mm.h> #include <linux/notifier.h> #include <linux/percpu.h> @@ -25,6 +26,7 @@ #include <linux/smpboot.h> #include <linux/tick.h> #include <linux/irq.h> +#include <linux/wait_bit.h> #include <asm/softirq_stack.h> @@ -102,20 +104,204 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); #endif /* - * preempt_count and SOFTIRQ_OFFSET usage: - * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving - * softirq processing. - * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) + * SOFTIRQ_OFFSET usage: + * + * On !RT kernels 'count' is the preempt counter, on RT kernels this applies + * to a per CPU counter and to task::softirqs_disabled_cnt. + * + * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq + * processing. + * + * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) * on local_bh_disable or local_bh_enable. + * * This lets us distinguish between whether we are currently processing * softirq and whether we just have bh disabled. */ +#ifdef CONFIG_PREEMPT_RT + +/* + * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and + * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a + * softirq disabled section to be preempted. + * + * The per task counter is used for softirq_count(), in_softirq() and + * in_serving_softirqs() because these counts are only valid when the task + * holding softirq_ctrl::lock is running. + * + * The per CPU counter prevents pointless wakeups of ksoftirqd in case that + * the task which is in a softirq disabled section is preempted or blocks. + */ +struct softirq_ctrl { + local_lock_t lock; + int cnt; +}; + +static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { + .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), +}; + +/** + * local_bh_blocked() - Check for idle whether BH processing is blocked + * + * Returns false if the per CPU softirq::cnt is 0 otherwise true. + * + * This is invoked from the idle task to guard against false positive + * softirq pending warnings, which would happen when the task which holds + * softirq_ctrl::lock was the only running task on the CPU and blocks on + * some other lock. + */ +bool local_bh_blocked(void) +{ + return __this_cpu_read(softirq_ctrl.cnt) != 0; +} + +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +{ + unsigned long flags; + int newcnt; + + WARN_ON_ONCE(in_hardirq()); + + /* First entry of a task into a BH disabled section? */ + if (!current->softirq_disable_cnt) { + if (preemptible()) { + local_lock(&softirq_ctrl.lock); + /* Required to meet the RCU bottomhalf requirements. */ + rcu_read_lock(); + } else { + DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); + } + } + + /* + * Track the per CPU softirq disabled state. On RT this is per CPU + * state to allow preemption of bottom half disabled sections. + */ + newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__local_bh_disable_ip); + +static void __local_bh_enable(unsigned int cnt, bool unlock) +{ + unsigned long flags; + int newcnt; + + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_on(_RET_IP_); + raw_local_irq_restore(flags); + } + + newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; + + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } +} + +void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) +{ + bool preempt_on = preemptible(); + unsigned long flags; + u32 pending; + int curcnt; + + WARN_ON_ONCE(in_irq()); + lockdep_assert_irqs_enabled(); + + local_irq_save(flags); + curcnt = __this_cpu_read(softirq_ctrl.cnt); + + /* + * If this is not reenabling soft interrupts, no point in trying to + * run pending ones. + */ + if (curcnt != cnt) + goto out; + + pending = local_softirq_pending(); + if (!pending || ksoftirqd_running(pending)) + goto out; + + /* + * If this was called from non preemptible context, wake up the + * softirq daemon. + */ + if (!preempt_on) { + wakeup_softirqd(); + goto out; + } + + /* + * Adjust softirq count to SOFTIRQ_OFFSET which makes + * in_serving_softirq() become true. + */ + cnt = SOFTIRQ_OFFSET; + __local_bh_enable(cnt, false); + __do_softirq(); + +out: + __local_bh_enable(cnt, preempt_on); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__local_bh_enable_ip); + +/* + * Invoked from ksoftirqd_run() outside of the interrupt disabled section + * to acquire the per CPU local lock for reentrancy protection. + */ +static inline void ksoftirqd_run_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + local_irq_disable(); +} + +/* Counterpart to ksoftirqd_run_begin() */ +static inline void ksoftirqd_run_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET, true); + WARN_ON_ONCE(in_interrupt()); + local_irq_enable(); +} + +static inline void softirq_handle_begin(void) { } +static inline void softirq_handle_end(void) { } + +static inline bool should_wake_ksoftirqd(void) +{ + return !this_cpu_read(softirq_ctrl.cnt); +} + +static inline void invoke_softirq(void) +{ + if (should_wake_ksoftirqd()) + wakeup_softirqd(); +} + +#else /* CONFIG_PREEMPT_RT */ -#ifdef CONFIG_TRACE_IRQFLAGS /* - * This is for softirq.c-internal use, where hardirqs are disabled + * This one is for softirq.c-internal use, where hardirqs are disabled * legitimately: */ +#ifdef CONFIG_TRACE_IRQFLAGS void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; @@ -206,6 +392,32 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) } EXPORT_SYMBOL(__local_bh_enable_ip); +static inline void softirq_handle_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); +} + +static inline void softirq_handle_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET); + WARN_ON_ONCE(in_interrupt()); +} + +static inline void ksoftirqd_run_begin(void) +{ + local_irq_disable(); +} + +static inline void ksoftirqd_run_end(void) +{ + local_irq_enable(); +} + +static inline bool should_wake_ksoftirqd(void) +{ + return true; +} + static inline void invoke_softirq(void) { if (ksoftirqd_running(local_softirq_pending())) @@ -250,6 +462,8 @@ asmlinkage __visible void do_softirq(void) local_irq_restore(flags); } +#endif /* !CONFIG_PREEMPT_RT */ + /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, * but break the loop if need_resched() is set or after 2 ms. @@ -318,7 +532,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending = local_softirq_pending(); - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + softirq_handle_begin(); in_hardirq = lockdep_softirq_start(); account_softirq_enter(current); @@ -354,8 +568,10 @@ restart: pending >>= softirq_bit; } - if (__this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && + __this_cpu_read(ksoftirqd) == current) rcu_softirq_qs(); + local_irq_disable(); pending = local_softirq_pending(); @@ -369,8 +585,7 @@ restart: account_softirq_exit(current); lockdep_softirq_end(in_hardirq); - __local_bh_enable(SOFTIRQ_OFFSET); - WARN_ON_ONCE(in_interrupt()); + softirq_handle_end(); current_restore_flags(old_flags, PF_MEMALLOC); } @@ -465,7 +680,7 @@ inline void raise_softirq_irqoff(unsigned int nr) * Otherwise we wake up ksoftirqd to make sure we * schedule the softirq soon. */ - if (!in_interrupt()) + if (!in_interrupt() && should_wake_ksoftirqd()) wakeup_softirqd(); } @@ -531,6 +746,20 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); +static bool tasklet_clear_sched(struct tasklet_struct *t) +{ + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { + wake_up_var(&t->state); + return true; + } + + WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", + t->use_callback ? "callback" : "func", + t->use_callback ? (void *)t->callback : (void *)t->func); + + return false; +} + static void tasklet_action_common(struct softirq_action *a, struct tasklet_head *tl_head, unsigned int softirq_nr) @@ -550,13 +779,12 @@ static void tasklet_action_common(struct softirq_action *a, if (tasklet_trylock(t)) { if (!atomic_read(&t->count)) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, - &t->state)) - BUG(); - if (t->use_callback) - t->callback(t); - else - t->func(t->data); + if (tasklet_clear_sched(t)) { + if (t->use_callback) + t->callback(t); + else + t->func(t->data); + } tasklet_unlock(t); continue; } @@ -606,21 +834,62 @@ void tasklet_init(struct tasklet_struct *t, } EXPORT_SYMBOL(tasklet_init); +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +/* + * Do not use in new code. Waiting for tasklets from atomic contexts is + * error prone and should be avoided. + */ +void tasklet_unlock_spin_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Prevent a live lock when current preempted soft + * interrupt processing or prevents ksoftirqd from + * running. If the tasklet runs on a different CPU + * then this has no effect other than doing the BH + * disable/enable dance for nothing. + */ + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } + } +} +EXPORT_SYMBOL(tasklet_unlock_spin_wait); +#endif + void tasklet_kill(struct tasklet_struct *t) { if (in_interrupt()) pr_notice("Attempt to kill tasklet from interrupt\n"); - while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - do { - yield(); - } while (test_bit(TASKLET_STATE_SCHED, &t->state)); - } + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); + tasklet_unlock_wait(t); - clear_bit(TASKLET_STATE_SCHED, &t->state); + tasklet_clear_sched(t); } EXPORT_SYMBOL(tasklet_kill); +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +void tasklet_unlock(struct tasklet_struct *t) +{ + smp_mb__before_atomic(); + clear_bit(TASKLET_STATE_RUN, &t->state); + smp_mb__after_atomic(); + wake_up_var(&t->state); +} +EXPORT_SYMBOL_GPL(tasklet_unlock); + +void tasklet_unlock_wait(struct tasklet_struct *t) +{ + wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); +} +EXPORT_SYMBOL_GPL(tasklet_unlock_wait); +#endif + void __init softirq_init(void) { int cpu; @@ -643,53 +912,21 @@ static int ksoftirqd_should_run(unsigned int cpu) static void run_ksoftirqd(unsigned int cpu) { - local_irq_disable(); + ksoftirqd_run_begin(); if (local_softirq_pending()) { /* * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ __do_softirq(); - local_irq_enable(); + ksoftirqd_run_end(); cond_resched(); return; } - local_irq_enable(); + ksoftirqd_run_end(); } #ifdef CONFIG_HOTPLUG_CPU -/* - * tasklet_kill_immediate is called to remove a tasklet which can already be - * scheduled for execution on @cpu. - * - * Unlike tasklet_kill, this function removes the tasklet - * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. - * - * When this function is called, @cpu must be in the CPU_DEAD state. - */ -void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) -{ - struct tasklet_struct **i; - - BUG_ON(cpu_online(cpu)); - BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); - - if (!test_bit(TASKLET_STATE_SCHED, &t->state)) - return; - - /* CPU is dead, so no lock needed. */ - for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { - if (*i == t) { - *i = t->next; - /* If this was the tail element, move the tail ptr */ - if (*i == NULL) - per_cpu(tasklet_vec, cpu).tail = i; - return; - } - } - BUG(); -} - static int takeover_tasklets(unsigned int cpu) { /* CPU is dead, so no lock needed. */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 128735e3e77e..d34894f3862a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending())) { static int ratelimit; - if (ratelimit < 10 && + if (ratelimit < 10 && !local_bh_blocked() && (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", (unsigned int) local_softirq_pending()); |