diff options
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 106 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/signal.c | 49 |
2 files changed, 110 insertions, 45 deletions
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 739ca3ae2bdc..ce243f76bdb7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -102,23 +102,20 @@ static void __kernel_fpu_begin(void) kernel_fpu_disable(); if (current->mm) { - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } else { - __cpu_invalidate_fpregs_state(); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(fpu); + } } + __cpu_invalidate_fpregs_state(); } static void __kernel_fpu_end(void) { - struct fpu *fpu = ¤t->thread.fpu; - - if (current->mm) - copy_kernel_to_fpregs(&fpu->state); - kernel_fpu_enable(); } @@ -145,14 +142,17 @@ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - preempt_disable(); + fpregs_lock(); trace_x86_fpu_before_save(fpu); - if (!copy_fpregs_to_fpstate(fpu)) - copy_kernel_to_fpregs(&fpu->state); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + if (!copy_fpregs_to_fpstate(fpu)) { + copy_kernel_to_fpregs(&fpu->state); + } + } trace_x86_fpu_after_save(fpu); - preempt_enable(); + fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -185,8 +185,11 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + dst_fpu->last_cpu = -1; if (!static_cpu_has(X86_FEATURE_FPU)) @@ -201,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* - * Save current FPU registers directly into the child - * FPU context, without any memory-to-memory copying. + * If the FPU registers are not current just memcpy() the state. + * Otherwise save current FPU registers directly into the child's FPU + * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys - * register contents so we have to copy them back. ) + * register contents so we have to load them back. ) */ - if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); - copy_kernel_to_fpregs(&src_fpu->state); - } + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); + + else if (!copy_fpregs_to_fpstate(dst_fpu)) + copy_kernel_to_fpregs(&dst_fpu->state); + + fpregs_unlock(); + + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -226,10 +236,9 @@ static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); + set_thread_flag(TIF_NEED_FPU_LOAD); fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); } /* @@ -308,6 +317,8 @@ void fpu__drop(struct fpu *fpu) */ static inline void copy_init_fpstate_to_fpregs(void) { + fpregs_lock(); + if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); else if (static_cpu_has(X86_FEATURE_FXSR)) @@ -317,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void) if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); + + fpregs_mark_activate(); + fpregs_unlock(); } /* @@ -340,6 +354,46 @@ void fpu__clear(struct fpu *fpu) } /* + * Load FPU context before returning to userspace. + */ +void switch_fpu_return(void) +{ + if (!static_cpu_has(X86_FEATURE_FPU)) + return; + + __fpregs_load_activate(); +} +EXPORT_SYMBOL_GPL(switch_fpu_return); + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * If current FPU state according to its tracking (loaded FPU context on this + * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is + * loaded on return to userland. + */ +void fpregs_assert_state_consistent(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return; + + WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); +} +EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +#endif + +void fpregs_mark_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_activate(fpu); + fpu->last_cpu = smp_processor_id(); + clear_thread_flag(TIF_NEED_FPU_LOAD); +} +EXPORT_SYMBOL_GPL(fpregs_mark_activate); + +/* * x87 math exception handling: */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index b13e86b29426..6df1f15e0cd5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -269,11 +269,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; - union fpregs_state *state; u64 xfeatures = 0; int fx_only = 0; int ret = 0; - void *tmp; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -308,14 +306,18 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } } - tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - state = PTR_ALIGN(tmp, 64); + /* + * The current state of the FPU registers does not matter. By setting + * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate + * is not modified on context switch and that the xstate is considered + * to be loaded again on return to userland (overriding last_cpu avoids + * the optimisation). + */ + set_thread_flag(TIF_NEED_FPU_LOAD); + __fpu_invalidate_fpregs_state(fpu); if ((unsigned long)buf_fx % 64) fx_only = 1; - /* * For 32-bit frames with fxstate, copy the fxstate so it can be * reconstructed later. @@ -331,43 +333,52 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) u64 init_bv = xfeatures_mask & ~xfeatures; if (using_compacted_format()) { - ret = copy_user_to_xstate(&state->xsave, buf_fx); + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); } else { - ret = __copy_from_user(&state->xsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_xstate_header(&state->xsave.header); + ret = validate_xstate_header(&fpu->state.xsave.header); } if (ret) goto err_out; - sanitize_restored_xstate(state, envp, xfeatures, fx_only); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + fpregs_lock(); if (unlikely(init_bv)) copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_kernel_to_xregs_err(&state->xsave, xfeatures); + ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures); } else if (use_fxsr()) { - ret = __copy_from_user(&state->fxsave, buf_fx, state_size); - if (ret) + ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); + if (ret) { + ret = -EFAULT; goto err_out; + } + + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); - sanitize_restored_xstate(state, envp, xfeatures, fx_only); + fpregs_lock(); if (use_xsave()) { u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } - ret = copy_kernel_to_fxregs_err(&state->fxsave); + ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); } else { - ret = __copy_from_user(&state->fsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) goto err_out; - ret = copy_kernel_to_fregs_err(&state->fsave); + + fpregs_lock(); + ret = copy_kernel_to_fregs_err(&fpu->state.fsave); } + if (!ret) + fpregs_mark_activate(); + fpregs_unlock(); err_out: - kfree(tmp); if (ret) fpu__clear(fpu); return ret; |