From f039b754714a422959027cb18bb33760eb8153f0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Don't use MWAIT on AMD Family 10 It doesn't put the CPU into deeper sleep states, so it's better to use the standard idle loop to save power. But allow to reenable it anyways for benchmarking. I also removed the obsolete idle=halt on i386 Cc: andreas.herrmann@amd.com Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/i386/kernel/process.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 393a67d5d943..7e8e129b3d7d 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -272,25 +272,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c) } } -static int __init idle_setup (char *str) +static int __init idle_setup(char *str) { - if (!strncmp(str, "poll", 4)) { + if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; #ifdef CONFIG_X86_SMP if (smp_num_siblings > 1) printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); #endif - } else if (!strncmp(str, "halt", 4)) { - printk("using halt in idle threads.\n"); - pm_idle = default_idle; - } + } else if (!strcmp(str, "mwait")) + force_mwait = 1; + else + return -1; boot_option_idle_override = 1; - return 1; + return 0; } - -__setup("idle=", idle_setup); +early_param("idle", idle_setup); void show_regs(struct pt_regs * regs) { -- cgit v1.2.1 From a75c54f933bd8db9f4a609bd128663c179b3e6a1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: i386 separate hardware-defined TSS from Linux additions On Thu, 2007-03-29 at 13:16 +0200, Andi Kleen wrote: > Please clean it up properly with two structs. Not sure about this, now I've done it. Running it here. If you like it, I can do x86-64 as well. == lguest defines its own TSS struct because the "struct tss_struct" contains linux-specific additions. Andi asked me to split the struct in processor.h. Unfortunately it makes usage a little awkward. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel/process.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 7e8e129b3d7d..5fb9524c6f4b 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -375,7 +375,7 @@ void exit_thread(void) t->io_bitmap_max = 0; tss->io_bitmap_owner = NULL; tss->io_bitmap_max = 0; - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } } @@ -554,7 +554,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, * Disable the bitmap via an invalid offset. We still cache * the previous bitmap owner and the IO bitmap contents: */ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; return; } @@ -564,7 +564,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, * matches the next task, we dont have to do anything but * to set a valid offset in the TSS: */ - tss->io_bitmap_base = IO_BITMAP_OFFSET; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; return; } /* @@ -576,7 +576,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, * redundant copies when the currently switched task does not * perform any I/O during its timeslice. */ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; + tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } /* -- cgit v1.2.1 From 7c3576d261ce046789a7db14f43303f8120910c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert PDA into the percpu section Currently x86 (similar to x84-64) has a special per-cpu structure called "i386_pda" which can be easily and efficiently referenced via the %fs register. An ELF section is more flexible than a structure, allowing any piece of code to use this area. Indeed, such a section already exists: the per-cpu area. So this patch: (1) Removes the PDA and uses per-cpu variables for each current member. (2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU. (3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which can be used to calculate addresses for this CPU's variables. (4) Simplifies startup, because %fs doesn't need to be loaded with a special segment at early boot; it can be deferred until the first percpu area is allocated (or never for UP). The result is less code and one less x86-specific concept. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- arch/i386/kernel/process.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel/process.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5fb9524c6f4b..61999479b7a4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -57,7 +58,6 @@ #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -66,6 +66,12 @@ static int hlt_counter; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + /* * Return saved PC of a blocked thread. */ @@ -342,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; - regs.xfs = __KERNEL_PDA; + regs.xfs = __KERNEL_PERCPU; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -711,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (prev->gs | next->gs) loadsegment(gs, next->gs); - write_pda(pcurrent, next_p); + x86_write_percpu(current_task, next_p); return prev_p; } -- cgit v1.2.1