diff options
32 files changed, 74 insertions, 803 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cfbb164acd20..4e62a0e67df9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6104,12 +6104,6 @@ S: Maintained F: Documentation/cdrom/ide-cd F: drivers/ide/ide-cd* -IDLE-I7300 -M: Andy Henroid <andrew.d.henroid@intel.com> -L: linux-pm@vger.kernel.org -S: Supported -F: drivers/idle/i7300_idle.c - IEEE 802.15.4 SUBSYSTEM M: Alexander Aring <aar@pengutronix.de> M: Stefan Schmidt <stefan@osg.samsung.com> diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5391b0ae7cc3..395b69551fce 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; - else if (amd_e400_c1e_detected) + else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) return 1; else return max_cstate; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a5a0bcfde76a..0c5fbc68e82d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -11,7 +11,6 @@ #include <asm/fixmap.h> #include <asm/mpspec.h> #include <asm/msr.h> -#include <asm/idle.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -640,7 +639,6 @@ extern void irq_exit(void); static inline void entering_irq(void) { irq_enter(); - exit_idle(); } static inline void entering_ack_irq(void) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d2b69fc0ceb..d59c15c3defd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -204,6 +204,7 @@ static __always_inline __pure bool _static_cpu_has(u16 bit) #define static_cpu_has_bug(bit) static_cpu_has((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) #define cpu_have_feature boot_cpu_has diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e83f972b0a14..59ac427960d4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -314,4 +314,6 @@ #define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h deleted file mode 100644 index c5d1785373ed..000000000000 --- a/arch/x86/include/asm/idle.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_X86_IDLE_H -#define _ASM_X86_IDLE_H - -#define IDLE_START 1 -#define IDLE_END 2 - -struct notifier_block; -void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); - -#ifdef CONFIG_X86_64 -void enter_idle(void); -void exit_idle(void); -#else /* !CONFIG_X86_64 */ -static inline void enter_idle(void) { } -static inline void exit_idle(void) { } -static inline void __exit_idle(void) { } -#endif /* CONFIG_X86_64 */ - -void amd_e400_remove_cpu(int cpu); - -#endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 84f58de08c2b..9fa03604b2b3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -507,17 +507,6 @@ do { \ #endif -/* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define x86_test_and_clear_bit_percpu(bit, var) \ -({ \ - bool old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\t" \ - CC_SET(c) \ - : CC_OUT(c) (old__), "+m" (var) \ - : "dIr" (bit)); \ - old__; \ -}) - static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, const unsigned long __percpu *addr) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1f6a92903b09..6aa741fbe1df 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -633,10 +633,9 @@ static inline void sync_core(void) } extern void select_idle_routine(const struct cpuinfo_x86 *c); -extern void init_amd_e400_c1e_mask(void); +extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; -extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2686894350a4..bb47e5eacd44 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -48,7 +48,6 @@ #include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> -#include <asm/idle.h> #include <asm/mtrr.h> #include <asm/time.h> #include <asm/smp.h> @@ -894,11 +893,13 @@ void __init setup_boot_APIC_clock(void) /* Setup the lapic or request the broadcast */ setup_APIC_timer(); + amd_e400_c1e_apic_setup(); } void setup_secondary_APIC_clock(void) { setup_APIC_timer(); + amd_e400_c1e_apic_setup(); } /* diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 48e6d84f173e..945e512a112a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -48,7 +48,6 @@ #include <linux/bootmem.h> #include <asm/irqdomain.h> -#include <asm/idle.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/cpu.h> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4daad1e39352..71cae73a5076 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,10 @@ #include "cpu.h" +static const int amd_erratum_383[]; +static const int amd_erratum_400[]; +static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX @@ -592,11 +596,16 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); -} -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* + * Check whether the machine is affected by erratum 400. This is + * used to select the proper idle routine and to enable the check + * whether the machine is affected in arch_post_acpi_init(), which + * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (cpu_has_amd_erratum(c, amd_erratum_400)) + set_cpu_bug(c, X86_BUG_AMD_E400); +} static void init_amd_k8(struct cpuinfo_x86 *c) { @@ -777,9 +786,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (cpu_has_amd_erratum(c, amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* 3DNow or LM implies PREFETCHW */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9886cf490479..87f563946e6b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1172,7 +1172,6 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); - init_amd_e400_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1a0b0a0e5e2b..51ad0086b0bb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -24,7 +24,6 @@ #include <asm/amd_nb.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/msr.h> #include <asm/trace/irq_vectors.h> diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6b9dc4d18ccc..2f5c3418b18b 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -26,7 +26,6 @@ #include <asm/processor.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/msr.h> #include <asm/trace/irq_vectors.h> diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index fcf9ae9384f4..9beb092d68a5 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -6,7 +6,6 @@ #include <asm/irq_vectors.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/trace/irq_vectors.h> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 8f44c5a50ab8..6c044543545e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -25,7 +25,6 @@ #include <asm/hyperv.h> #include <asm/mshyperv.h> #include <asm/desc.h> -#include <asm/idle.h> #include <asm/irq_regs.h> #include <asm/i8259.h> #include <asm/apic.h> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9f669fdd2010..7c6e9ffe4424 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -14,7 +14,6 @@ #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/irq.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/hw_irq.h> #include <asm/desc.h> diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 9ebd0b0e73d9..6b0678a541e2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -16,7 +16,6 @@ #include <linux/uaccess.h> #include <linux/smp.h> #include <asm/io_apic.h> -#include <asm/idle.h> #include <asm/apic.h> int sysctl_panic_on_stackoverflow; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index fb5afc6356b2..36bc66416021 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -42,7 +42,6 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/tlbflush.h> -#include <asm/idle.h> #include <asm/apic.h> #include <asm/apicdef.h> #include <asm/hypervisor.h> @@ -267,13 +266,11 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); - exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); rcu_irq_exit(); break; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0888a879120f..43c36d8a6ae2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -23,7 +23,6 @@ #include <asm/cpu.h> #include <asm/apic.h> #include <asm/syscalls.h> -#include <asm/idle.h> #include <asm/uaccess.h> #include <asm/mwait.h> #include <asm/fpu/internal.h> @@ -65,23 +64,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { }; EXPORT_PER_CPU_SYMBOL(cpu_tss); -#ifdef CONFIG_X86_64 -static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); -#endif - /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. @@ -251,39 +233,9 @@ static inline void play_dead(void) } #endif -#ifdef CONFIG_X86_64 -void enter_idle(void) -{ - this_cpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); -} - -static void __exit_idle(void) -{ - if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) - return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); -} - -/* Called from interrupts to signify idle end */ -void exit_idle(void) -{ - /* idle loop has pid 0 */ - if (current->pid) - return; - __exit_idle(); -} -#endif - void arch_cpu_idle_enter(void) { local_touch_nmi(); - enter_idle(); -} - -void arch_cpu_idle_exit(void) -{ - __exit_idle(); } void arch_cpu_idle_dead(void) @@ -336,59 +288,33 @@ void stop_this_cpu(void *dummy) halt(); } -bool amd_e400_c1e_detected; -EXPORT_SYMBOL(amd_e400_c1e_detected); - -static cpumask_var_t amd_e400_c1e_mask; - -void amd_e400_remove_cpu(int cpu) -{ - if (amd_e400_c1e_mask != NULL) - cpumask_clear_cpu(cpu, amd_e400_c1e_mask); -} - /* - * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt - * pending message MSR. If we detect C1E, then we handle it the same - * way as C3 power states (local apic timer and TSC stop) + * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power + * states (local apic timer and TSC stop). */ static void amd_e400_idle(void) { - if (!amd_e400_c1e_detected) { - u32 lo, hi; - - rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); - - if (lo & K8_INTP_C1E_ACTIVE_MASK) { - amd_e400_c1e_detected = true; - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halt in AMD C1E"); - pr_info("System has AMD C1E enabled\n"); - } + /* + * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E + * gets set after static_cpu_has() places have been converted via + * alternatives. + */ + if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { + default_idle(); + return; } - if (amd_e400_c1e_detected) { - int cpu = smp_processor_id(); + tick_broadcast_enter(); - if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { - cpumask_set_cpu(cpu, amd_e400_c1e_mask); - /* Force broadcast so ACPI can not interfere. */ - tick_broadcast_force(); - pr_info("Switch to broadcast mode on CPU%d\n", cpu); - } - tick_broadcast_enter(); - - default_idle(); + default_idle(); - /* - * The switch back from broadcast mode needs to be - * called with interrupts disabled. - */ - local_irq_disable(); - tick_broadcast_exit(); - local_irq_enable(); - } else - default_idle(); + /* + * The switch back from broadcast mode needs to be called with + * interrupts disabled. + */ + local_irq_disable(); + tick_broadcast_exit(); + local_irq_enable(); } /* @@ -448,8 +374,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (x86_idle || boot_option_idle_override == IDLE_POLL) return; - if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { - /* E400: APIC timer interrupt does not wake up CPU from C1e */ + if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; } else if (prefer_mwait_c1_over_halt(c)) { @@ -459,11 +384,37 @@ void select_idle_routine(const struct cpuinfo_x86 *c) x86_idle = default_idle; } -void __init init_amd_e400_c1e_mask(void) +void amd_e400_c1e_apic_setup(void) +{ + if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { + pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id()); + local_irq_disable(); + tick_broadcast_force(); + local_irq_enable(); + } +} + +void __init arch_post_acpi_subsys_init(void) { - /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ - if (x86_idle == amd_e400_idle) - zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); + u32 lo, hi; + + if (!boot_cpu_has_bug(X86_BUG_AMD_E400)) + return; + + /* + * AMD E400 detection needs to happen after ACPI has been enabled. If + * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in + * MSR_K8_INT_PENDING_MSG. + */ + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (!(lo & K8_INTP_C1E_ACTIVE_MASK)) + return; + + boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E); + + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + pr_info("System has AMD C1E enabled\n"); } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f854404be1c6..d0d744108594 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -49,7 +49,6 @@ #include <asm/tlbflush.h> #include <asm/cpu.h> -#include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c1b43eab80c..a76b65e3e615 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -44,7 +44,6 @@ #include <asm/desc.h> #include <asm/proto.h> #include <asm/ia32.h> -#include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a501abe5000..0c37d4fd01b2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -58,7 +58,6 @@ #include <asm/desc.h> #include <asm/nmi.h> #include <asm/irq.h> -#include <asm/idle.h> #include <asm/realmode.h> #include <asm/cpu.h> #include <asm/numa.h> @@ -1596,7 +1595,6 @@ void play_dead_common(void) { idle_task_exit(); reset_lazy_tlbstate(); - amd_e400_remove_cpu(raw_smp_processor_id()); /* Ack it */ (void)cpu_report_death(); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 9e42842e924a..766d4d3529a1 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -19,7 +19,6 @@ #include <asm/uv/uv_hub.h> #include <asm/uv/uv_bau.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/tsc.h> #include <asm/irq_vectors.h> #include <asm/timer.h> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 2237d3f24f0e..5c8aa9cf62d7 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -141,7 +141,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; - if (amd_e400_c1e_detected) + if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) type = ACPI_STATE_C1; /* diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 48fa4cf9f64a..2f3bbc88ff2a 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -106,8 +106,6 @@ #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ #define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ -/* IOAT1 define left for i7300_idle driver to not fail compiling */ -#define IOAT1_CHANSTS_OFFSET 0x04 #define IOAT_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) #define IOAT_CHANSTS_SOFT_ERR 0x10ULL diff --git a/drivers/idle/Kconfig b/drivers/idle/Kconfig index 4732dfc15447..55bcf803841e 100644 --- a/drivers/idle/Kconfig +++ b/drivers/idle/Kconfig @@ -8,20 +8,3 @@ config INTEL_IDLE native Intel hardware idle features. The acpi_idle driver can be configured at the same time, in order to handle processors intel_idle does not support. - -menu "Memory power savings" -depends on X86_64 - -config I7300_IDLE_IOAT_CHANNEL - bool - -config I7300_IDLE - tristate "Intel chipset idle memory power saving driver" - select I7300_IDLE_IOAT_CHANNEL - help - Enable memory power savings when idle with certain Intel server - chipsets. The chipset must have I/O AT support, such as the - Intel 7300. The power savings depends on the type and quantity of - DRAM devices. - -endmenu diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile index 23d295cf10f2..0007111d73e9 100644 --- a/drivers/idle/Makefile +++ b/drivers/idle/Makefile @@ -1,3 +1,2 @@ -obj-$(CONFIG_I7300_IDLE) += i7300_idle.o obj-$(CONFIG_INTEL_IDLE) += intel_idle.o diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c deleted file mode 100644 index ffeebc7e9f1c..000000000000 --- a/drivers/idle/i7300_idle.c +++ /dev/null @@ -1,612 +0,0 @@ -/* - * (C) Copyright 2008 Intel Corporation - * Authors: - * Andy Henroid <andrew.d.henroid@intel.com> - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - */ - -/* - * Save DIMM power on Intel 7300-based platforms when all CPUs/cores - * are idle, using the DIMM thermal throttling capability. - * - * This driver depends on the Intel integrated DMA controller (I/O AT). - * If the driver for I/O AT (drivers/dma/ioatdma*) is also enabled, - * this driver should work cooperatively. - */ - -/* #define DEBUG */ - -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/sched.h> -#include <linux/notifier.h> -#include <linux/cpumask.h> -#include <linux/ktime.h> -#include <linux/delay.h> -#include <linux/debugfs.h> -#include <linux/stop_machine.h> -#include <linux/i7300_idle.h> - -#include <asm/idle.h> - -#include "../dma/ioat/hw.h" -#include "../dma/ioat/registers.h" - -#define I7300_IDLE_DRIVER_VERSION "1.55" -#define I7300_PRINT "i7300_idle:" - -#define MAX_STOP_RETRIES 10 - -static int debug; -module_param_named(debug, debug, uint, 0644); -MODULE_PARM_DESC(debug, "Enable debug printks in this driver"); - -static int forceload; -module_param_named(forceload, forceload, uint, 0644); -MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000"); - -#define dprintk(fmt, arg...) \ - do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0) - -/* - * Value to set THRTLOW to when initiating throttling - * 0 = No throttling - * 1 = Throttle when > 4 activations per eval window (Maximum throttling) - * 2 = Throttle when > 8 activations - * 168 = Throttle when > 672 activations (Minimum throttling) - */ -#define MAX_THROTTLE_LOW_LIMIT 168 -static uint throttle_low_limit = 1; -module_param_named(throttle_low_limit, throttle_low_limit, uint, 0644); -MODULE_PARM_DESC(throttle_low_limit, - "Value for THRTLOWLM activation field " - "(0 = disable throttle, 1 = Max throttle, 168 = Min throttle)"); - -/* - * simple invocation and duration statistics - */ -static unsigned long total_starts; -static unsigned long total_us; - -#ifdef DEBUG -static unsigned long past_skip; -#endif - -static struct pci_dev *fbd_dev; - -static raw_spinlock_t i7300_idle_lock; -static int i7300_idle_active; - -static u8 i7300_idle_thrtctl_saved; -static u8 i7300_idle_thrtlow_saved; -static u32 i7300_idle_mc_saved; - -static cpumask_var_t idle_cpumask; -static ktime_t start_ktime; -static unsigned long avg_idle_us; - -static struct dentry *debugfs_dir; - -/* Begin: I/O AT Helper routines */ - -#define IOAT_CHANBASE(ioat_ctl, chan) (ioat_ctl + 0x80 + 0x80 * chan) -/* Snoop control (disable snoops when coherency is not important) */ -#define IOAT_DESC_SADDR_SNP_CTL (1UL << 1) -#define IOAT_DESC_DADDR_SNP_CTL (1UL << 2) - -static struct pci_dev *ioat_dev; -static struct ioat_dma_descriptor *ioat_desc; /* I/O AT desc & data (1 page) */ -static unsigned long ioat_desc_phys; -static u8 *ioat_iomap; /* I/O AT memory-mapped control regs (aka CB_BAR) */ -static u8 *ioat_chanbase; - -/* Start I/O AT memory copy */ -static int i7300_idle_ioat_start(void) -{ - u32 err; - /* Clear error (due to circular descriptor pointer) */ - err = readl(ioat_chanbase + IOAT_CHANERR_OFFSET); - if (err) - writel(err, ioat_chanbase + IOAT_CHANERR_OFFSET); - - writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET); - return 0; -} - -/* Stop I/O AT memory copy */ -static void i7300_idle_ioat_stop(void) -{ - int i; - u64 sts; - - for (i = 0; i < MAX_STOP_RETRIES; i++) { - writeb(IOAT_CHANCMD_RESET, - ioat_chanbase + IOAT1_CHANCMD_OFFSET); - - udelay(10); - - sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_STATUS; - - if (sts != IOAT_CHANSTS_ACTIVE) - break; - - } - - if (i == MAX_STOP_RETRIES) { - dprintk("failed to stop I/O AT after %d retries\n", - MAX_STOP_RETRIES); - } -} - -/* Test I/O AT by copying 1024 byte from 2k to 1k */ -static int __init i7300_idle_ioat_selftest(u8 *ctl, - struct ioat_dma_descriptor *desc, unsigned long desc_phys) -{ - u64 chan_sts; - - memset(desc, 0, 2048); - memset((u8 *) desc + 2048, 0xab, 1024); - - desc[0].size = 1024; - desc[0].ctl = 0; - desc[0].src_addr = desc_phys + 2048; - desc[0].dst_addr = desc_phys + 1024; - desc[0].next = 0; - - writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET); - writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET); - - udelay(1000); - - chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_STATUS; - - if (chan_sts != IOAT_CHANSTS_DONE) { - /* Not complete, reset the channel */ - writeb(IOAT_CHANCMD_RESET, - ioat_chanbase + IOAT1_CHANCMD_OFFSET); - return -1; - } - - if (*(u32 *) ((u8 *) desc + 3068) != 0xabababab || - *(u32 *) ((u8 *) desc + 2044) != 0xabababab) { - dprintk("Data values src 0x%x, dest 0x%x, memset 0x%x\n", - *(u32 *) ((u8 *) desc + 2048), - *(u32 *) ((u8 *) desc + 1024), - *(u32 *) ((u8 *) desc + 3072)); - return -1; - } - return 0; -} - -static struct device dummy_dma_dev = { - .init_name = "fallback device", - .coherent_dma_mask = DMA_BIT_MASK(64), - .dma_mask = &dummy_dma_dev.coherent_dma_mask, -}; - -/* Setup and initialize I/O AT */ -/* This driver needs I/O AT as the throttling takes effect only when there is - * some memory activity. We use I/O AT to set up a dummy copy, while all CPUs - * go idle and memory is throttled. - */ -static int __init i7300_idle_ioat_init(void) -{ - u8 ver, chan_count, ioat_chan; - u16 chan_ctl; - - ioat_iomap = (u8 *) ioremap_nocache(pci_resource_start(ioat_dev, 0), - pci_resource_len(ioat_dev, 0)); - - if (!ioat_iomap) { - printk(KERN_ERR I7300_PRINT "failed to map I/O AT registers\n"); - goto err_ret; - } - - ver = readb(ioat_iomap + IOAT_VER_OFFSET); - if (ver != IOAT_VER_1_2) { - printk(KERN_ERR I7300_PRINT "unknown I/O AT version (%u.%u)\n", - ver >> 4, ver & 0xf); - goto err_unmap; - } - - chan_count = readb(ioat_iomap + IOAT_CHANCNT_OFFSET); - if (!chan_count) { - printk(KERN_ERR I7300_PRINT "unexpected # of I/O AT channels " - "(%u)\n", - chan_count); - goto err_unmap; - } - - ioat_chan = chan_count - 1; - ioat_chanbase = IOAT_CHANBASE(ioat_iomap, ioat_chan); - - chan_ctl = readw(ioat_chanbase + IOAT_CHANCTRL_OFFSET); - if (chan_ctl & IOAT_CHANCTRL_CHANNEL_IN_USE) { - printk(KERN_ERR I7300_PRINT "channel %d in use\n", ioat_chan); - goto err_unmap; - } - - writew(IOAT_CHANCTRL_CHANNEL_IN_USE, - ioat_chanbase + IOAT_CHANCTRL_OFFSET); - - ioat_desc = (struct ioat_dma_descriptor *)dma_alloc_coherent( - &dummy_dma_dev, 4096, - (dma_addr_t *)&ioat_desc_phys, GFP_KERNEL); - if (!ioat_desc) { - printk(KERN_ERR I7300_PRINT "failed to allocate I/O AT desc\n"); - goto err_mark_unused; - } - - writel(ioat_desc_phys & 0xffffffffUL, - ioat_chanbase + IOAT1_CHAINADDR_OFFSET_LOW); - writel(ioat_desc_phys >> 32, - ioat_chanbase + IOAT1_CHAINADDR_OFFSET_HIGH); - - if (i7300_idle_ioat_selftest(ioat_iomap, ioat_desc, ioat_desc_phys)) { - printk(KERN_ERR I7300_PRINT "I/O AT self-test failed\n"); - goto err_free; - } - - /* Setup circular I/O AT descriptor chain */ - ioat_desc[0].ctl = IOAT_DESC_SADDR_SNP_CTL | IOAT_DESC_DADDR_SNP_CTL; - ioat_desc[0].src_addr = ioat_desc_phys + 2048; - ioat_desc[0].dst_addr = ioat_desc_phys + 3072; - ioat_desc[0].size = 128; - ioat_desc[0].next = ioat_desc_phys + sizeof(struct ioat_dma_descriptor); - - ioat_desc[1].ctl = ioat_desc[0].ctl; - ioat_desc[1].src_addr = ioat_desc[0].src_addr; - ioat_desc[1].dst_addr = ioat_desc[0].dst_addr; - ioat_desc[1].size = ioat_desc[0].size; - ioat_desc[1].next = ioat_desc_phys; - - return 0; - -err_free: - dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0); -err_mark_unused: - writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); -err_unmap: - iounmap(ioat_iomap); -err_ret: - return -ENODEV; -} - -/* Cleanup I/O AT */ -static void __exit i7300_idle_ioat_exit(void) -{ - int i; - u64 chan_sts; - - i7300_idle_ioat_stop(); - - /* Wait for a while for the channel to halt before releasing */ - for (i = 0; i < MAX_STOP_RETRIES; i++) { - writeb(IOAT_CHANCMD_RESET, - ioat_chanbase + IOAT1_CHANCMD_OFFSET); - - chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_STATUS; - - if (chan_sts != IOAT_CHANSTS_ACTIVE) { - writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); - break; - } - udelay(1000); - } - - chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_STATUS; - - /* - * We tried to reset multiple times. If IO A/T channel is still active - * flag an error and return without cleanup. Memory leak is better - * than random corruption in that extreme error situation. - */ - if (chan_sts == IOAT_CHANSTS_ACTIVE) { - printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." - " Not freeing resources\n"); - return; - } - - dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0); - iounmap(ioat_iomap); -} - -/* End: I/O AT Helper routines */ - -#define DIMM_THRTLOW 0x64 -#define DIMM_THRTCTL 0x67 -#define DIMM_THRTCTL_THRMHUNT (1UL << 0) -#define DIMM_MC 0x40 -#define DIMM_GTW_MODE (1UL << 17) -#define DIMM_GBLACT 0x60 - -/* - * Keep track of an exponential-decaying average of recent idle durations. - * The latest duration gets DURATION_WEIGHT_PCT percentage weight - * in this average, with the old average getting the remaining weight. - * - * High weights emphasize recent history, low weights include long history. - */ -#define DURATION_WEIGHT_PCT 55 - -/* - * When the decaying average of recent durations or the predicted duration - * of the next timer interrupt is shorter than duration_threshold, the - * driver will decline to throttle. - */ -#define DURATION_THRESHOLD_US 100 - - -/* Store DIMM thermal throttle configuration */ -static int i7300_idle_thrt_save(void) -{ - u32 new_mc_val; - u8 gblactlm; - - pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &i7300_idle_thrtctl_saved); - pci_read_config_byte(fbd_dev, DIMM_THRTLOW, &i7300_idle_thrtlow_saved); - pci_read_config_dword(fbd_dev, DIMM_MC, &i7300_idle_mc_saved); - /* - * Make sure we have Global Throttling Window Mode set to have a - * "short" window. This (mostly) works around an issue where - * throttling persists until the end of the global throttling window - * size. On the tested system, this was resulting in a maximum of - * 64 ms to exit throttling (average 32 ms). The actual numbers - * depends on system frequencies. Setting the short window reduces - * this by a factor of 4096. - * - * We will only do this only if the system is set for - * unlimited-activations while in open-loop throttling (i.e., when - * Global Activation Throttle Limit is zero). - */ - pci_read_config_byte(fbd_dev, DIMM_GBLACT, &gblactlm); - dprintk("thrtctl_saved = 0x%02x, thrtlow_saved = 0x%02x\n", - i7300_idle_thrtctl_saved, - i7300_idle_thrtlow_saved); - dprintk("mc_saved = 0x%08x, gblactlm = 0x%02x\n", - i7300_idle_mc_saved, - gblactlm); - if (gblactlm == 0) { - new_mc_val = i7300_idle_mc_saved | DIMM_GTW_MODE; - pci_write_config_dword(fbd_dev, DIMM_MC, new_mc_val); - return 0; - } else { - dprintk("could not set GTW_MODE = 1 (OLTT enabled)\n"); - return -ENODEV; - } -} - -/* Restore DIMM thermal throttle configuration */ -static void i7300_idle_thrt_restore(void) -{ - pci_write_config_dword(fbd_dev, DIMM_MC, i7300_idle_mc_saved); - pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved); - pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved); -} - -/* Enable DIMM thermal throttling */ -static void i7300_idle_start(void) -{ - u8 new_ctl; - u8 limit; - - new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT; - pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); - - limit = throttle_low_limit; - if (unlikely(limit > MAX_THROTTLE_LOW_LIMIT)) - limit = MAX_THROTTLE_LOW_LIMIT; - - pci_write_config_byte(fbd_dev, DIMM_THRTLOW, limit); - - new_ctl = i7300_idle_thrtctl_saved | DIMM_THRTCTL_THRMHUNT; - pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); -} - -/* Disable DIMM thermal throttling */ -static void i7300_idle_stop(void) -{ - u8 new_ctl; - u8 got_ctl; - - new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT; - pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl); - - pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved); - pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved); - pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &got_ctl); - WARN_ON_ONCE(got_ctl != i7300_idle_thrtctl_saved); -} - - -/* - * i7300_avg_duration_check() - * return 0 if the decaying average of recent idle durations is - * more than DURATION_THRESHOLD_US - */ -static int i7300_avg_duration_check(void) -{ - if (avg_idle_us >= DURATION_THRESHOLD_US) - return 0; - -#ifdef DEBUG - past_skip++; -#endif - return 1; -} - -/* Idle notifier to look at idle CPUs */ -static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - unsigned long flags; - ktime_t now_ktime; - static ktime_t idle_begin_time; - static int time_init = 1; - - if (!throttle_low_limit) - return 0; - - if (unlikely(time_init)) { - time_init = 0; - idle_begin_time = ktime_get(); - } - - raw_spin_lock_irqsave(&i7300_idle_lock, flags); - if (val == IDLE_START) { - - cpumask_set_cpu(smp_processor_id(), idle_cpumask); - - if (cpumask_weight(idle_cpumask) != num_online_cpus()) - goto end; - - now_ktime = ktime_get(); - idle_begin_time = now_ktime; - - if (i7300_avg_duration_check()) - goto end; - - i7300_idle_active = 1; - total_starts++; - start_ktime = now_ktime; - - i7300_idle_start(); - i7300_idle_ioat_start(); - - } else if (val == IDLE_END) { - cpumask_clear_cpu(smp_processor_id(), idle_cpumask); - if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) { - /* First CPU coming out of idle */ - u64 idle_duration_us; - - now_ktime = ktime_get(); - - idle_duration_us = ktime_to_us(ktime_sub - (now_ktime, idle_begin_time)); - - avg_idle_us = - ((100 - DURATION_WEIGHT_PCT) * avg_idle_us + - DURATION_WEIGHT_PCT * idle_duration_us) / 100; - - if (i7300_idle_active) { - ktime_t idle_ktime; - - idle_ktime = ktime_sub(now_ktime, start_ktime); - total_us += ktime_to_us(idle_ktime); - - i7300_idle_ioat_stop(); - i7300_idle_stop(); - i7300_idle_active = 0; - } - } - } -end: - raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); - return 0; -} - -static struct notifier_block i7300_idle_nb = { - .notifier_call = i7300_idle_notifier, -}; - -MODULE_DEVICE_TABLE(pci, pci_tbl); - -static ssize_t stats_read_ul(struct file *fp, char __user *ubuf, size_t count, - loff_t *off) -{ - unsigned long *p = fp->private_data; - char buf[32]; - int len; - - len = snprintf(buf, 32, "%lu\n", *p); - return simple_read_from_buffer(ubuf, count, off, buf, len); -} - -static const struct file_operations idle_fops = { - .open = simple_open, - .read = stats_read_ul, - .llseek = default_llseek, -}; - -struct debugfs_file_info { - void *ptr; - char name[32]; - struct dentry *file; -} debugfs_file_list[] = { - {&total_starts, "total_starts", NULL}, - {&total_us, "total_us", NULL}, -#ifdef DEBUG - {&past_skip, "past_skip", NULL}, -#endif - {NULL, "", NULL} - }; - -static int __init i7300_idle_init(void) -{ - raw_spin_lock_init(&i7300_idle_lock); - total_us = 0; - - if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) - return -ENODEV; - - if (i7300_idle_thrt_save()) - return -ENODEV; - - if (i7300_idle_ioat_init()) - return -ENODEV; - - if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL)) - return -ENOMEM; - - debugfs_dir = debugfs_create_dir("i7300_idle", NULL); - if (debugfs_dir) { - int i = 0; - - while (debugfs_file_list[i].ptr != NULL) { - debugfs_file_list[i].file = debugfs_create_file( - debugfs_file_list[i].name, - S_IRUSR, - debugfs_dir, - debugfs_file_list[i].ptr, - &idle_fops); - i++; - } - } - - idle_notifier_register(&i7300_idle_nb); - - printk(KERN_INFO "i7300_idle: loaded v%s\n", I7300_IDLE_DRIVER_VERSION); - return 0; -} - -static void __exit i7300_idle_exit(void) -{ - idle_notifier_unregister(&i7300_idle_nb); - free_cpumask_var(idle_cpumask); - - if (debugfs_dir) { - int i = 0; - - while (debugfs_file_list[i].file != NULL) { - debugfs_remove(debugfs_file_list[i].file); - i++; - } - - debugfs_remove(debugfs_dir); - } - i7300_idle_thrt_restore(); - i7300_idle_ioat_exit(); -} - -module_init(i7300_idle_init); -module_exit(i7300_idle_exit); - -MODULE_AUTHOR("Andy Henroid <andrew.d.henroid@intel.com>"); -MODULE_DESCRIPTION("Intel Chipset DIMM Idle Power Saving Driver v" - I7300_IDLE_DRIVER_VERSION); -MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index afada655f861..350cb5e22ff3 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -56,7 +56,6 @@ #include <asm/msr.h> #include <asm/mwait.h> #include <asm/cpu_device_id.h> -#include <asm/idle.h> #include <asm/hardirq.h> #define MAX_TARGET_RATIO (50U) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 9ecfcdcdd6d6..adc19ce3cc66 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -37,7 +37,6 @@ #include <asm/desc.h> #include <asm/ptrace.h> #include <asm/irq.h> -#include <asm/idle.h> #include <asm/io_apic.h> #include <asm/i8259.h> #include <asm/xen/pci.h> @@ -1256,7 +1255,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) irq_enter(); #ifdef CONFIG_X86 - exit_idle(); inc_irq_stat(irq_hv_callback_count); #endif diff --git a/init/main.c b/init/main.c index 2858be732f6d..1d7038c1ee19 100644 --- a/init/main.c +++ b/init/main.c @@ -448,6 +448,8 @@ void __init parse_early_param(void) done = 1; } +void __init __weak arch_post_acpi_subsys_init(void) { } + void __init __weak smp_setup_processor_id(void) { } @@ -649,6 +651,7 @@ asmlinkage __visible void __init start_kernel(void) check_bugs(); acpi_subsystem_init(); + arch_post_acpi_subsys_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { |