diff options
Diffstat (limited to 'arch')
68 files changed, 592 insertions, 227 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 659bdd079277..099fc0f5155e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE + depends on !PREEMPT_RT_FULL select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help @@ -52,6 +53,7 @@ config KPROBES config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) help This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b5d529fdffab..5715844e83e3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,7 @@ config ARM select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) @@ -75,6 +75,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index e53638c8ed8a..6095a1649865 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -22,6 +22,8 @@ #endif #ifndef __ASSEMBLY__ +#include <linux/cpumask.h> + struct irqaction; struct pt_regs; extern void migrate_irqs(void); diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index 12ebfcc1d539..c962084605bc 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -3,6 +3,13 @@ #include <linux/thread_info.h> +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * For v7 SMP cores running a preemptible kernel we may be pre-empted * during a TLB maintenance operation, so execute an inner-shareable dsb @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ + switch_kmaps(prev, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 776757d1604a..1f36a4eccc72 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -49,6 +49,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ +#define TIF_NEED_RESCHED_LAZY 7 #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED_LAZY) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 608008229c7d..3866da3f7bb7 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -65,6 +65,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 9f157e7c51e7..468e224d76aa 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -220,11 +220,18 @@ __irq_svc: #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + bne 1f @ return from exeption + ldr r0, [tsk, #TI_FLAGS] @ get flags + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set + blne svc_preempt @ preempt! + + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r8, #0 @ if preempt lazy count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED_LAZY blne svc_preempt +1: #endif svc_exit r5, irq = 1 @ return from exception @@ -239,8 +246,14 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + bne 1b + tst r0, #_TIF_NEED_RESCHED_LAZY reteq r8 @ go again - b 1b + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r0, #0 @ if preempt lazy count != 0 + beq 1b + ret r8 @ go again + #endif __und_fault: diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 10c3283d6c19..8872937862cc 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -36,7 +36,9 @@ ret_fast_syscall: UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne fast_work_pending + tst r1, #_TIF_SECCOMP bne fast_work_pending /* perform architecture specific actions before user return */ @@ -62,8 +64,11 @@ ret_fast_syscall: str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne do_slower_path + tst r1, #_TIF_SECCOMP beq no_work_pending +do_slower_path: UNWIND(.fnend ) ENDPROC(ret_fast_syscall) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index 69bda1a5707e..1f665acaa6a9 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -15,7 +15,7 @@ struct patch { unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 91d2d5b01414..750550098b59 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU +/* + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not + * initialized by pgtable_page_ctor() then a coredump of the vector page will + * fail. + */ +static int __init vectors_user_mapping_init_page(void) +{ + struct page *page; + unsigned long addr = 0xffff0000; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + page = pmd_page(*(pmd)); + + pgtable_page_ctor(page); + + return 0; +} +late_initcall(vectors_user_mapping_init_page); + #ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 7b8f2141427b..96541e00b74a 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) */ trace_hardirqs_off(); do { - if (likely(thread_flags & _TIF_NEED_RESCHED)) { + if (likely(thread_flags & (_TIF_NEED_RESCHED | + _TIF_NEED_RESCHED_LAZY))) { schedule(); } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7dd14e8395e6..4cd7e3d98035 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -234,8 +234,6 @@ int __cpu_disable(void) flush_cache_louis(); local_flush_tlb_all(); - clear_tasks_mm_cpumask(cpu); - return 0; } @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu) pr_err("CPU%u: cpu didn't die\n", cpu); return; } + + clear_tasks_mm_cpumask(cpu); + pr_notice("CPU%u: shutdown\n", cpu); /* diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 0bee233fef9a..314cfb232a63 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_SPINLOCK(unwind_lock); +static DEFINE_RAW_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) /* module unwind tables */ struct unwind_table *table; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) break; } } - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) if (!tab) return; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 19b5f5c1c0ff..82aa639e6737 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * involves poking the GIC, which must be done in a * non-preemptible context. */ - preempt_disable(); + migrate_disable(); kvm_pmu_flush_hwstate(vcpu); kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); @@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); + migrate_enable(); continue; } @@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); + migrate_enable(); ret = handle_exit(vcpu, run, ret); } diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 98ffe1e62ad5..df9769ddece5 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)(S5P_VA_SCU); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void exynos_secondary_init(unsigned int cpu) { @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * calibrations, then wait for it to finish */ fail: - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? ret : 0; } diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 4b653a8cb75c..b03d5a922cb1 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -61,7 +61,7 @@ static void __iomem *sysctrl, *fabric; static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static u32 fabric_phys_addr; /* * [0]: bootwrapper physical address @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) return -EINVAL; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); if (hip04_cpu_table[cluster][cpu]) goto out; @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) out: hip04_cpu_table[cluster][cpu]++; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { /* A power_up request went ahead of us. */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return; } else if (hip04_cpu_table[cluster][cpu] > 1) { pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) } last_man = hip04_cluster_is_down(cluster); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); if (last_man) { /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { if (hip04_cpu_table[cluster][cpu]) goto err; @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) break; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); /* Wait for clean L2 when the whole cluster is down. */ msleep(POLL_MSEC); - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); } if (tries >= count) goto err; @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) goto err; if (hip04_cluster_is_down(cluster)) hip04_set_snoop_filter(cluster, 0); - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 1; err: - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } #endif diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index b4de3da6dffa..b52893319d75 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { .startup_addr = omap5_secondary_startup, }; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return 0; } diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 0875b99add18..18b6d98d2581 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -22,7 +22,7 @@ static void __iomem *clk_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sirfsoc_secondary_init(unsigned int cpu) { @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static const struct of_device_id clk_ids[] = { @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) /* make sure write buffer is drained */ mb(); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index 5494c9e0c909..e8ce157d3548 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -46,7 +46,7 @@ extern void secondary_startup_arm(void); -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); #ifdef CONFIG_HOTPLUG_CPU static void qcom_cpu_die(unsigned int cpu) @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int scss_release_secondary(unsigned int cpu) @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Send the secondary CPU a soft interrupt, thereby causing @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return ret; } diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 8d1e2d551786..7fa56cc78118 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -32,7 +32,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c index ea5a2277ee46..b988e081ac79 100644 --- a/arch/arm/mach-sti/platsmp.c +++ b/arch/arm/mach-sti/platsmp.c @@ -35,7 +35,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sti_secondary_init(unsigned int cpu) { @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index f7861dc83182..ce47dfe25fb0 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -433,6 +433,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -500,6 +503,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index d02f8187b1cc..542692dbd40a 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) return *ptep; } +static unsigned int fixmap_idx(int type) +{ + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); +} + void *kmap(struct page *page) { might_sleep(); @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); void *kmap_atomic(struct page *page) { + pte_t pte = mk_pte(page, kmap_prot); unsigned int idx; unsigned long vaddr; void *kmap; int type; - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) * in place, so the contained TLB flush ensures the TLB is updated * with the new mapping. */ - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(idx)); - set_fixmap_pte(idx, __pte(0)); #else (void) idx; /* to kill a warning */ #endif + set_fixmap_pte(idx, __pte(0)); kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); - preempt_enable(); + preempt_enable_nort(); } EXPORT_SYMBOL(__kunmap_atomic); void *kmap_atomic_pfn(unsigned long pfn) { + pte_t pte = pfn_pte(pfn, kmap_prot); unsigned long vaddr; int idx, type; struct page *page = pfn_to_page(pfn); - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } +#if defined CONFIG_PREEMPT_RT_FULL +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = fixmap_idx(i); + + set_fixmap_pte(idx, __pte(0)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = fixmap_idx(i); + + if (!pte_none(next_p->kmap_pte[i])) + set_fixmap_pte(idx, next_p->kmap_pte[i]); + } +} +#endif diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index c2366510187a..6b60f582b738 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -32,7 +32,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void versatile_secondary_init(unsigned int cpu) { @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cf57a7799a0f..78d1b49fbed5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -91,6 +91,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS @@ -704,7 +705,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" - depends on ARM64 && OF + depends on ARM64 && OF && !PREEMPT_RT_FULL select SWIOTLB_XEN select PARAVIRT help diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e9ea5a6bd449..6c500ad63c6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -49,6 +49,7 @@ struct thread_info { mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ }; @@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_NEED_RESCHED_LAZY 4 #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) @@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_NEED_RESCHED_LAZY) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c58ddf8c4062..a8f2f7c1fe12 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -38,6 +38,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b4c7db434654..433d846f4f51 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -430,11 +430,16 @@ el1_irq: #ifdef CONFIG_PREEMPT ldr w24, [tsk, #TI_PREEMPT] // get preempt count - cbnz w24, 1f // preempt count != 0 + cbnz w24, 2f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? - bl el1_preempt + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? + + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count + cbnz w24, 2f // preempt lazy count != 0 + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? 1: + bl el1_preempt +2: #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on @@ -448,6 +453,7 @@ el1_preempt: 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? ret x24 #endif diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 404dd67080b9..639dc6d12e72 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, */ trace_hardirqs_off(); do { - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { schedule(); } else { local_irq_enable(); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5e844f68e847..dc613cc10f54 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2516,7 +2516,7 @@ config MIPS_ASID_BITS_VARIABLE # config HIGHMEM bool "High Memory Support" - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8f01f21e78f1..619485b777d2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool + default y if PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM bool - default y + default y if !PREEMPT_RT_FULL config GENERIC_LOCKBREAK bool @@ -134,6 +135,7 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_PREEMPT_LAZY select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS @@ -321,7 +323,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" - depends on PPC32 + depends on PPC32 && !PREEMPT_RT_FULL source kernel/Kconfig.hz source kernel/Kconfig.preempt diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 87e4b2d8dcd4..981e501a4359 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -43,6 +43,8 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_RESTORE_TM) + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c833d88c423d..96e9fbc3f684 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -156,6 +156,7 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3841d749a430..6dbaeff192b9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -835,7 +835,14 @@ resume_kernel: cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED + bne+ 1f + lwz r0,TI_PREEMPT_LAZY(r9) + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ + bne restore + lwz r0,TI_FLAGS(r9) + andi. r0,r0,_TIF_NEED_RESCHED_LAZY beq+ restore +1: lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ @@ -846,11 +853,11 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq +2: bl preempt_schedule_irq CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + andi. r0,r3,_TIF_NEED_RESCHED_MASK + bne- 2b #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -1171,7 +1178,7 @@ global_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -1192,7 +1199,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index caa659671599..891080c4a41e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite) bl restore_math b restore #endif -1: andi. r0,r4,_TIF_NEED_RESCHED +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK beq 2f bl restore_interrupts SCHEDULE_USER @@ -718,10 +718,18 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ + lwz r8,TI_PREEMPT(r9) + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ + bne restore andi. r0,r4,_TIF_NEED_RESCHED + bne+ check_count + + andi. r0,r4,_TIF_NEED_RESCHED_LAZY beq+ restore + lwz r8,TI_PREEMPT_LAZY(r9) + /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) +check_count: cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 @@ -738,7 +746,7 @@ resume_kernel: /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,_TIF_NEED_RESCHED_MASK bne 1b /* diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 028a22bfa90c..a75e2dd3e71f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -651,6 +651,7 @@ void irq_ctx_init(void) } } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; @@ -668,6 +669,7 @@ void do_softirq_own_stack(void) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } +#endif irq_hw_number_t virq_to_hw(unsigned int virq) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 030d72df5dd5..b471a709e100 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -41,6 +41,7 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq) stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr +#endif /* * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4cefe6888b18..cb2ee4be999a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -31,6 +31,7 @@ .text +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq) ld r0,16(r1) mtlr r0 blr +#endif _GLOBAL(call_do_irq) mflr r0 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 029be26b5a17..9528089ea142 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -175,6 +175,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 + depends on !PREEMPT_RT_FULL select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e48462447ff0..2670cee66064 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, } pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); - res = wait_event_interruptible(dev->done.wait, + res = swait_event_interruptible(dev->done.wait, dev->done.done || kthread_should_stop()); if (kthread_should_stop()) res = -EINTR; diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 6c0378c0b8b5..abd58b4dff97 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curctx; @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" ); } +#endif #else static inline void handle_one_irq(unsigned int irq) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b27e48e25841..2965bf76d070 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -194,12 +194,10 @@ config NR_CPUS source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK - bool - default y if SPARC32 + def_bool PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM - bool - default y if SPARC64 + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_HWEIGHT bool diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 5cbf03c14981..6067d9379e5b 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { void *orig_sp, *sp = softirq_stack[smp_processor_id()]; @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); } +#endif #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..f8a995c90c01 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select HAVE_PREEMPT_LAZY select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ANON_INODES @@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API +config RWSEM_GENERIC_SPINLOCK + def_bool PREEMPT_RT_FULL + config RWSEM_XCHGADD_ALGORITHM - def_bool y + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_CALIBRATE_DELAY def_bool y @@ -897,7 +901,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index aa8b0672f87a..2429414bfc71 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 8648158f3916..d7699130ee36 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, bool enc) { - bool fpu_enabled = false; + bool fpu_enabled; struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -227,7 +226,7 @@ done: static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __cbc_decrypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -311,7 +309,7 @@ done: static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __ctr_crypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - cast5_fpu_end(fpu_enabled); - if (walk.nbytes) { ctr_crypt_final(desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 6a85598931b5..3a506ce7ed93 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; + bool fpu_enabled; int err; err = blkcipher_walk_virt(desc, walk); @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, } done: + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); - if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, + desc, false, nbytes < bsize ? bsize : nbytes); - /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); + glue_fpu_end(fpu_enabled); while (nbytes) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, false, nbytes); nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); nbytes = walk.nbytes; } - - glue_fpu_end(fpu_enabled); - return err; } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdd9cc59d20f..56d01a339ba4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs) #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { @@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* We have work to do. */ local_irq_enable(); - if (cached_flags & _TIF_NEED_RESCHED) + if (cached_flags & _TIF_NEED_RESCHED_MASK) schedule(); +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif if (cached_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index edba8606b99a..4a3389535fc6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -308,8 +308,25 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) need_resched: + # preempt count == 0 + NEED_RS set? cmpl $0, PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz restore_all +#else + jz test_int_off + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jne restore_all + + movl PER_CPU_VAR(current_task), %ebp + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) + jz restore_all +test_int_off: +#endif testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7b0e7ff4c58..65916d49dbc9 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -546,7 +546,23 @@ retint_kernel: bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz 1f +#else + jz do_preempt_schedule_irq + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jnz 1f + + movq PER_CPU_VAR(current_task), %rcx + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) + jnz 1f + + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) + jnc 1f +do_preempt_schedule_irq: +#endif call preempt_schedule_irq jmp 0b 1: @@ -894,6 +910,7 @@ bad_gs: jmp 2b .previous +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) pushq %rbp @@ -906,6 +923,7 @@ ENTRY(do_softirq_own_stack) decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) +#endif #ifdef CONFIG_XEN idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 17f218645701..11bd1b7ee6eb 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) * a decrement which hits zero means we have no preempt_count and should * reschedule. */ -static __always_inline bool __preempt_count_dec_and_test(void) +static __always_inline bool ____preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } +static __always_inline bool __preempt_count_dec_and_test(void) +{ + if (____preempt_count_dec_and_test()) + return true; +#ifdef CONFIG_PREEMPT_LAZY + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else + return false; +#endif +} + /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { +#ifdef CONFIG_PREEMPT_LAZY + u32 tmp; + + tmp = raw_cpu_read_4(__preempt_count); + if (tmp == preempt_offset) + return true; + + /* preempt count == 0 ? */ + tmp &= ~PREEMPT_NEED_RESCHED; + if (tmp) + return false; + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); +#endif } #ifdef CONFIG_PREEMPT diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 8af22be0fe61..d1328789b759 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -27,6 +27,19 @@ typedef struct { #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u +/* + * Because some traps use the IST stack, we must keep preemption + * disabled while calling do_trap(), but do_trap() may call + * force_sig_info() which will grab the signal spin_locks for the + * task, which in PREEMPT_RT_FULL are mutexes. By defining + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the + * trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 58505f01962f..02fa39652cd6 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -59,7 +59,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = rdtsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..5ceb3a1c2b1a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,11 +54,14 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + int preempt_lazy_count; /* 0 => lazy preemptable + <0 => BUG */ }; #define INIT_THREAD_INFO(tsk) \ { \ .flags = 0, \ + .preempt_lazy_count = 0, \ } #define init_stack (init_thread_union.stack) @@ -67,6 +70,10 @@ struct thread_info { #include <asm/asm-offsets.h> +#define GET_THREAD_INFO(reg) \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ + _ASM_SUB $(THREAD_SIZE),reg ; + #endif /* @@ -85,6 +92,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -108,6 +116,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -143,6 +152,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #define STACK_WARN (THREAD_SIZE/8) /* diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 57ab86d94d64..35d25e27180f 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -624,9 +624,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; - spinlock_t uvhub_lock; - spinlock_t queue_lock; - spinlock_t disable_lock; + raw_spinlock_t uvhub_lock; + raw_spinlock_t queue_lock; + raw_spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v) * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) { - spin_lock(lock); + raw_spin_lock(lock); if (atomic_read(v) >= u) { - spin_unlock(lock); + raw_spin_unlock(lock); return 0; } atomic_inc(v); - spin_unlock(lock); + raw_spin_unlock(lock); return 1; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b89bef95f63b..c3c1ad2fce5c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; * ->ioapic_mutex * ->ioapic_lock */ +#ifdef CONFIG_X86_IO_APIC static DEFINE_MUTEX(acpi_ioapic_lock); +#endif /* -------------------------------------------------------------------------- Boot-time Configuration diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cf89928dbd46..18b5ec2a71df 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic_irq(data); return true; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index c62e015b126c..0cc71257fca6 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -36,6 +36,7 @@ void common(void) { BLANK(); OFFSET(TASK_TI_flags, task_struct, thread_info.flags); + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); @@ -91,4 +92,5 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ca5f8ad008e..edcbd18b3189 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -41,6 +41,8 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> +#include <linux/swork.h> #include <linux/jump_label.h> #include <asm/processor.h> @@ -1306,7 +1308,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(struct hrtimer, mce_timer); static unsigned long mce_adjust_timer_default(unsigned long interval) { @@ -1315,32 +1317,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void __restart_timer(struct timer_list *t, unsigned long interval) +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) { - unsigned long when = jiffies + interval; - unsigned long flags; - - local_irq_save(flags); - - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer(t, when); - } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); - } - - local_irq_restore(flags); + if (!interval) + return HRTIMER_NORESTART; + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); + return HRTIMER_RESTART; } -static void mce_timer_fn(unsigned long data) +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) { - struct timer_list *t = this_cpu_ptr(&mce_timer); - int cpu = smp_processor_id(); unsigned long iv; - WARN_ON(cpu != data); - iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { @@ -1363,7 +1351,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); - __restart_timer(t, iv); + return __restart_timer(timer, iv); } /* @@ -1371,7 +1359,7 @@ done: */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = this_cpu_ptr(&mce_timer); + struct hrtimer *t = this_cpu_ptr(&mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); __restart_timer(t, interval); @@ -1386,7 +1374,7 @@ static void mce_timer_delete_all(void) int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1396,6 +1384,56 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); +static void __mce_notify_work(struct swork_event *event) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (mce_helper[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + pr_info(HW_ERR "Machine check events logged\n"); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static bool notify_work_ready __read_mostly; +static struct swork_event notify_work; + +static int mce_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (err) + return err; + + INIT_SWORK(¬ify_work, __mce_notify_work); + notify_work_ready = true; + return 0; +} + +static void mce_notify_work(void) +{ + if (notify_work_ready) + swork_queue(¬ify_work); +} +#else +static void mce_notify_work(void) +{ + __mce_notify_work(NULL); +} +static inline int mce_notify_work_init(void) { return 0; } +#endif + /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI @@ -1403,19 +1441,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); */ int mce_notify_irq(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - if (test_and_clear_bit(0, &mce_need_notify)) { - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - if (mce_helper[0]) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - + mce_notify_work(); return 1; } return 0; @@ -1721,7 +1748,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) { unsigned long iv = check_interval * HZ; @@ -1730,16 +1757,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) per_cpu(mce_next_interval, cpu) = iv; - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, cpu); + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = this_cpu_ptr(&mce_timer); + struct hrtimer *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); - setup_pinned_timer(t, mce_timer_fn, cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; mce_start_timer(cpu, t); } @@ -2464,6 +2492,8 @@ static void mce_disable_cpu(void *h) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(this_cpu_ptr(&mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); @@ -2486,6 +2516,7 @@ static void mce_reenable_cpu(void *h) if (b->init) wrmsrl(msr_ops.ctl(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2493,7 +2524,6 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: @@ -2513,11 +2543,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); break; } @@ -2556,6 +2584,10 @@ static __init int mcheck_init_device(void) goto err_out; } + err = mce_notify_work_init(); + if (err) + goto err_out; + if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { err = -ENOMEM; goto err_out; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9de..053bf3b2ef39 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct irq_stack *irqstk; @@ -143,6 +144,7 @@ void do_softirq_own_stack(void) call_on_stack(__do_softirq, isp); } +#endif bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd7be8efdc4c..b3b0a7f7b1ca 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +#ifdef CONFIG_PREEMPT_RT_FULL +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + pte_t *ptep = kmap_pte - idx; + + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } +} +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * switch_to(x,y) should switch tasks from x to y. @@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); + switch_kmaps(prev_p, next_p); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3f05c044720b..fe68afd37162 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1939,6 +1939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); apic->lapic_timer.timer.function = apic_timer_fn; + apic->lapic_timer.timer.irqsafe = 1; /* * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 595f8149c0d9..31b15149f412 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5961,6 +5961,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out_free_percpu; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 6d18b70ed5a9..f752724c22e8 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); */ void *kmap_atomic_prot(struct page *page, pgprot_t prot) { + pte_t pte = mk_pte(page, prot); unsigned long vaddr; int idx, type; - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte-idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); arch_flush_lazy_mmu_mode(); @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) #endif pagefault_enable(); - preempt_enable(); + preempt_enable_nort(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ada98b39b8ad..585f6829653b 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { + pte_t pte = pfn_pte(pfn, prot); unsigned long vaddr; int idx, type; @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + WARN_ON(!pte_none(*(kmap_pte - idx))); + +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte - idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e3353c97d086..01664968555c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -214,7 +214,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; +#ifdef CONFIG_PREEMPT + /* + * Avoid wbinvd() because it causes latencies on all CPUs, + * regardless of any CPU isolation that may be in effect. + */ + unsigned long do_wbinvd = 0; +#else unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ +#endif BUG_ON(irqs_disabled()); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 9e42842e924a..5398f97172f9 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) cycles_t tm1; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (!bcp->baudisabled) { stat->s_bau_disabled++; tm1 = get_cycles(); @@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) } } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); } static void count_max_concurr(int stat, struct bau_control *bcp, @@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, */ static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) { - spinlock_t *lock = &hmaster->uvhub_lock; + raw_spinlock_t *lock = &hmaster->uvhub_lock; atomic_t *v; v = &hmaster->active_descriptor_count; @@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) struct bau_control *hmaster; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { stat->s_bau_reenabled++; for_each_present_cpu(tcpu) { @@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) tbcp->period_giveups = 0; } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return 0; } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return -1; } @@ -1940,9 +1940,9 @@ static void __init init_per_cpu_tunables(void) bcp->cong_reps = congested_reps; bcp->disabled_period = sec_2_cycles(disabled_period); bcp->giveup_limit = giveup_limit; - spin_lock_init(&bcp->queue_lock); - spin_lock_init(&bcp->uvhub_lock); - spin_lock_init(&bcp->disable_lock); + raw_spin_lock_init(&bcp->queue_lock); + raw_spin_lock_init(&bcp->uvhub_lock); + raw_spin_lock_init(&bcp->disable_lock); } } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index b333fc45f9ec..8b85916e6986 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); /* There is one of these allocated per node */ struct uv_rtc_timer_head { - spinlock_t lock; + raw_spinlock_t lock; /* next cpu waiting for timer, local node relative: */ int next_cpu; /* number of cpus on this node: */ @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) uv_rtc_deallocate_timers(); return -ENOMEM; } - spin_lock_init(&head->lock); + raw_spin_lock_init(&head->lock); head->ncpus = uv_blade_nr_possible_cpus(bid); head->next_cpu = -1; blade_info[bid] = head; @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) unsigned long flags; int next_cpu; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); next_cpu = head->next_cpu; *t = expires; @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) if (uv_setup_intr(cpu, expires)) { *t = ULLONG_MAX; uv_rtc_find_next_timer(head, pnode); - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return -ETIME; } } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return 0; } @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) unsigned long flags; int rc = 0; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) rc = 1; @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) uv_rtc_find_next_timer(head, pnode); } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return rc; } @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) static cycle_t uv_read_rtc(struct clocksource *cs) { unsigned long offset; + cycle_t cycles; + preempt_disable(); if (uv_get_min_hub_revision_id() == 1) offset = 0; else offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + preempt_enable(); + + return cycles; } /* |