summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlec Berg <alecaberg@chromium.org>2014-04-15 13:53:57 -0700
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-04-18 18:58:36 +0000
commitb610695b61db3b3784e2a516c91f429139616100 (patch)
treedd381367d143e3531080bd2dc7caf85a309f4fe2
parentd899fdaaeef175e97923b954342dad3a33c5c387 (diff)
downloadchrome-ec-b610695b61db3b3784e2a516c91f429139616100.tar.gz
cortex-m: store FPU regs on context switch
Added storing of FPU regs on context switches when CONFIG_FPU is defined. On context switches, EXC_RETURN[4] is checked in order to tell which tasks have used floating point and which have not. The FPU regs are only stored on task stacks for tasks that use the floating point. Tasks that use floating point will therefore require roughly an additional 128 bytes of stack space, and context switches will take about 32 clock cycles longer for each task involved in the switch that uses FP. For tasks that don't use floating point, the stack usage actually decreases by 64 bytes because previously we were reserving stack space for FPU regs S0-S15 on every context switch for every task, even though we weren't doing anything with them. If a task only uses the FPU for a brief window, it can call task_clear_fp_used() in order to clear the FP used bit so that context switches using that task will not backup FP regs anymore. BUG=chrome-os-partner:27971 BRANCH=none TEST=Tested on glimmer and peppy. Added the following code, which uses the FPU in both the hooks task and the console task. Note, I tested this for a handful of registers, notably registers in the group s0-s15 which are backed up by lazy stacking, and registers in the group s16-s31 which are backed up manually. float dummy = 2.0f; static void hook_fpu(void) { union { float f; int i; } tmp; /* do a dummy FP calculation to set CONTROL.FPCA high. */ dummy = 2.3f*7.8f; /* read and print FP reg. */ asm volatile("vmov %0, s29" : "=r"(tmp.f)); ccprintf("Hook float 0x%08x\n", tmp.i); /* write FP reg. */ tmp.i = 0x1234; asm volatile("vmov s29, %0" : : "r"(tmp.f)); } DECLARE_HOOK(HOOK_SECOND, hook_fpu, HOOK_PRIO_DEFAULT); static int command_fpu_test(int argc, char **argv) { union { float f; int i; } tmp; /* do a dummy FP calculation to set CONTROL.FPCA high. */ dummy = 2.7f*7.8f; /* read and print FP reg. */ asm volatile("vmov %0, s29" : "=r"(tmp.f)); ccprintf("Console float 0x%08x\n", tmp.i); if (argc == 2) { char *e; tmp.i = strtoi(argv[1], &e, 0); if (*e) return EC_ERROR_PARAM1; /* write FP reg. */ asm volatile("vmov s29, %0" : : "r"(tmp.f)); } else { task_clear_fp_used(); } return EC_SUCCESS; } DECLARE_CONSOLE_COMMAND(fputest, command_fpu_test, "", "", NULL); When you call fputest 5 from EC console before this CL, then on the next HOOK_SECOND, the value of register s29 is 5, instead of 0x1234 because register s29 is not saved on context switches: Hook float 0x00001234 > fputest 5 Console float 0x00001234 Hook float 0x00000005 When this CL is in use, the register holds the correct value for each task: Hook float 0x00001234 > fputest 5 Console float 0x00001234 Hook float 0x00001234 > fputest Console float 0x00000005 Hook float 0x00001234 Change-Id: Ifb1b5cbf1c6fc9193f165f8d69c96443b35bf981 Signed-off-by: Alec Berg <alecaberg@chromium.org> Reviewed-on: https://chromium-review.googlesource.com/194949 Reviewed-by: Vincent Palatin <vpalatin@chromium.org>
-rw-r--r--core/cortex-m/switch.S38
-rw-r--r--core/cortex-m/task.c25
-rw-r--r--include/task.h9
3 files changed, 58 insertions, 14 deletions
diff --git a/core/cortex-m/switch.S b/core/cortex-m/switch.S
index 92c7e51247..d5d49a8827 100644
--- a/core/cortex-m/switch.S
+++ b/core/cortex-m/switch.S
@@ -29,6 +29,19 @@
* the structure of the saved context on the stack is :
* r0, r1, r2, r3, r12, lr, pc, psr, r4, r5, r6, r7, r8, r9, r10, r11
* exception frame <|> additional registers
+ *
+ * if using the FPU, then to store FPU context, add FP regs to the stack. in
+ * this case the exception frame by default contains:
+ * r0, r1, r2, r3, r12, lr, pc, psr,
+ * s0 - s15, FPSCR, +1 word for 64-bit alignment
+ * then in addition we store the following registers:
+ * r4, r5, r6, r7, r8, r9, r10, r11
+ * s16 - s31 (stored iff FP was used by the task (see EXC_RETURN[4]))
+ * note that for the context switch to know if the next task has the extra FP
+ * regs on the stack or not, we make use of the least significant bit of the
+ * stack pointer. lsb of stack pointer is 1 if task has FP regs on stack, and
+ * 0 otherwise.
+ *
*/
.global __switchto
.thumb_func
@@ -36,6 +49,25 @@ __switchto:
mrs r3, psp @ get the task stack where the context has been saved
ldr r2, [r1] @ get the new scheduled task stack pointer
stmdb r3!, {r4-r11} @ save additional r4-r11 in the task stack
+
+#ifdef CONFIG_FPU
+ tst lr, #(1<<4) @ test EXC_RETURN[4] for old task
+ itt eq @ if EXC_RETURN[4] is zero, add FP regs to stack
+ vstmdbeq r3!, {s16-s31}@ save additional FP s16-s31 in the task stack.
+ @ if using lazy stacking, this will trigger saving
+ @ s0-s15 in the reserved stack space.
+ orreq r3, #1 @ set lsb of old stack pointer high to represent this
+ @ task uses FPU. note stack pointer should be 64-bit
+ @ aligned, so using this bit should be safe.
+
+ tst r2, #1 @ test lsb of next stack pointer
+ ittte ne @ if lsb is 1, then next task has FP regs on stack
+ bicne r2, #1 @ clear lsb of new stack pointer
+ bicne lr, #(1<<4) @ clear EXC_RETURN[4] for next task
+ vldmiane r2!, {s16-s31}@ restore s16-s31 for the next task context
+ orreq lr, #(1<<4) @ else if new stack doesn't use FP, set EXC_RETURN[4]
+#endif
+
ldmia r2!, {r4-r11} @ restore r4-r11 for the next task context
str r3, [r0] @ save the task stack pointer in its context
msr psp, r2 @ set the process stack pointer to exception context
@@ -49,11 +81,9 @@ __switchto:
.thumb_func
__task_start:
ldr r2,=scratchpad @ area used as dummy thread stack for the first switch
-#ifdef CONFIG_FPU
- mov r3, #6 @ use : priv. mode / thread stack / floating point on
-#else
mov r3, #2 @ use : priv. mode / thread stack / no floating point
-#endif
+ @ setting FP to unused here means first context switch
+ @ will not store FP regs
add r2, #17*4 @ put the pointer at the top of the stack
mov r1, #0 @ __Schedule parameter : re-schedule nothing
msr psp, r2 @ setup a thread stack up to the first context switch
diff --git a/core/cortex-m/task.c b/core/cortex-m/task.c
index 5d2f967693..980c4b38c5 100644
--- a/core/cortex-m/task.c
+++ b/core/cortex-m/task.c
@@ -118,11 +118,7 @@ uint8_t task_stacks[0
#undef TASK
/* Reserve space to discard context on first context switch. */
-#ifdef CONFIG_FPU
-uint32_t scratchpad[17+18];
-#else
uint32_t scratchpad[17];
-#endif
static task_ *current_task = (task_ *)scratchpad;
@@ -635,14 +631,10 @@ void task_pre_init(void)
/*
* Update stack used by first frame: 8 words for the normal
- * stack, plus 8 for R4-R11. With FP enabled, we need another
- * 18 words for S0-S15 and FPCSR and to align to 64-bit.
+ * stack, plus 8 for R4-R11. Even if using FPU, the first frame
+ * does not store FP regs.
*/
-#ifdef CONFIG_FPU
- sp = stack_next + ssize - 16 - 18;
-#else
sp = stack_next + ssize - 16;
-#endif
tasks[i].sp = (uint32_t)sp;
/* Initial context on stack (see __switchto()) */
@@ -671,6 +663,19 @@ void task_pre_init(void)
__nvic_init_irqs();
}
+void task_clear_fp_used(void)
+{
+ int ctrl;
+
+ /* Clear the CONTROL.FPCA bit, which represents FP context active. */
+ asm volatile("mrs %0, control" : "=r"(ctrl));
+ ctrl &= ~0x4;
+ asm volatile("msr control, %0" : : "r"(ctrl));
+
+ /* Flush pipeline before returning. */
+ asm volatile("isb");
+}
+
int task_start(void)
{
#ifdef CONFIG_TASK_PROFILING
diff --git a/include/task.h b/include/task.h
index 00368fbc34..1d80a0b2f7 100644
--- a/include/task.h
+++ b/include/task.h
@@ -171,6 +171,15 @@ int task_start(void);
*/
int task_start_called(void);
+#ifdef CONFIG_FPU
+/**
+ * Clear floating-point used flag for currently executing task. This means the
+ * FPU regs will not be stored on context switches until the next time floating
+ * point is used for currently executing task.
+ */
+void task_clear_fp_used(void);
+#endif
+
/**
* Enable an interrupt.
*/