diff options
Diffstat (limited to 'arch/x86')
28 files changed, 391 insertions, 258 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c2fb8a87dccb..b7d31ca55187 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -499,6 +499,7 @@ config X86_INTEL_QUARK depends on X86_IO_APIC select IOSF_MBI select INTEL_IMR + select COMMON_CLK ---help--- Select to include support for Quark X1000 SoC. Say Y here if you have a Quark based system such as the Arduino diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39cc385..30b28dc76411 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } -#define SELECTOR_TI_MASK (1 << 2) -#define SELECTOR_RPL_MASK 0x03 - -#define IOPL_SHIFT 12 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 @@ -933,6 +928,7 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_vcpu_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e62cf897f781..c1adf33fdd0d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void) static inline bool kvm_para_available(void) { - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..25b1cc07d496 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; + u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 5fa9770035dc..c9a6d68b8d62 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -82,18 +82,15 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XSAVES"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XSAVE"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); - - asm volatile(xstate_fault - : "0" (0) - : "memory"); - return err; } @@ -112,18 +109,15 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XRSTORS"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XRSTOR"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); - - asm volatile(xstate_fault - : "0" (0) - : "memory"); - return err; } @@ -149,9 +143,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask) */ alternative_input_2( "1:"XSAVE, - "1:"XSAVEOPT, + XSAVEOPT, X86_FEATURE_XSAVEOPT, - "1:"XSAVES, + XSAVES, X86_FEATURE_XSAVES, [fx] "D" (fx), "a" (lmask), "d" (hmask) : "memory"); @@ -178,7 +172,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) */ alternative_input( "1: " XRSTOR, - "1: " XRSTORS, + XRSTORS, X86_FEATURE_XSAVES, "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1deb91a..1fe92181ee9e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b5c8ff5e9dfc..2346c95c6ab1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1396,6 +1396,12 @@ void cpu_init(void) wait_for_master_cpu(cpu); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); + show_ucode_info_early(); printk(KERN_INFO "Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 94d7dcb12145..50163fa9034f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -565,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, - { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, + { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, + { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 000d4199b03e..31e2d5bf3e38 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -982,6 +982,9 @@ ENTRY(xen_hypervisor_callback) ENTRY(xen_do_upcall) 1: mov %esp, %eax call xen_evtchn_do_upcall +#ifndef CONFIG_PREEMPT + call xen_maybe_preempt_hcall +#endif jmp ret_from_intr CFI_ENDPROC ENDPROC(xen_hypervisor_callback) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index db13655c3a2a..1d74d161687c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -269,11 +269,14 @@ ENTRY(ret_from_fork) testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? jz 1f - testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET - jnz int_ret_from_sys_call - - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call # go to the SYSRET fastpath + /* + * By the time we get here, we have no idea whether our pt_regs, + * ti flags, and ti status came from the 64-bit SYSCALL fast path, + * the slow path, or one of the ia32entry paths. + * Use int_ret_from_sys_call to return, since it can safely handle + * all of the above. + */ + jmp int_ret_from_sys_call 1: subq $REST_SKIP, %rsp # leave space for volatiles @@ -1208,6 +1211,9 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) popq %rsp CFI_DEF_CFA_REGISTER rsp decl PER_CPU_VAR(irq_count) +#ifndef CONFIG_PREEMPT + call xen_maybe_preempt_hcall +#endif jmp error_exit CFI_ENDPROC END(xen_do_hypervisor_callback) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6a1146ea4d4d..4e3d5a9621fe 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -223,27 +223,48 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; + unsigned long faddr; kp = get_kprobe((void *)addr); - /* There is no probe, return original address */ - if (!kp) + faddr = ftrace_location(addr); + /* + * Addresses inside the ftrace location are refused by + * arch_check_ftrace_location(). Something went terribly wrong + * if such an address is checked here. + */ + if (WARN_ON(faddr && faddr != addr)) + return 0UL; + /* + * Use the current code if it is not modified by Kprobe + * and it cannot be modified by ftrace. + */ + if (!kp && !faddr) return addr; /* - * Basically, kp->ainsn.insn has an original instruction. - * However, RIP-relative instruction can not do single-stepping - * at different place, __copy_instruction() tweaks the displacement of - * that instruction. In that case, we can't recover the instruction - * from the kp->ainsn.insn. + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, __copy_instruction() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. * - * On the other hand, kp->opcode has a copy of the first byte of - * the probed instruction, which is overwritten by int3. And - * the instruction at kp->addr is not modified by kprobes except - * for the first byte, we can recover the original instruction - * from it and kp->opcode. + * On the other hand, in case on normal Kprobe, kp->opcode has a copy + * of the first byte of the probed instruction, which is overwritten + * by int3. And the instruction at kp->addr is not modified by kprobes + * except for the first byte, we can recover the original instruction + * from it and kp->opcode. + * + * In case of Kprobes using ftrace, we do not have a copy of + * the original instruction. In fact, the ftrace location might + * be modified at anytime and even could be in an inconsistent state. + * Fortunately, we know that the original code is the ideal 5-byte + * long NOP. */ - memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - buf[0] = kp->opcode; + memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (faddr) + memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); + else + buf[0] = kp->opcode; return (unsigned long)buf; } @@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Recover the probed instruction at addr for further analysis. * Caller must lock kprobes by kprobe_mutex, or disable preemption * for preventing to release referencing kprobes. + * Returns zero if the instruction can not get recovered. */ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) { @@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr) * normally used, we just go through if there is no kprobe. */ __addr = recover_probed_instruction(buf, addr); + if (!__addr) + return 0; kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); insn_get_length(&insn); @@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src) unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); + if (!recovered_insn) + return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint, failed to recover */ diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 0dd8d089c315..7b3b9d15c47a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr) */ return 0; recovered_insn = recover_probed_instruction(buf, addr); + if (!recovered_insn) + return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint */ diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..e5ecd20e72dd 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } +static struct pvclock_vsyscall_time_info *pvclock_vdso_info; + +static struct pvclock_vsyscall_time_info * +pvclock_get_vsyscall_user_time_info(int cpu) +{ + if (!pvclock_vdso_info) { + BUG(); + return NULL; + } + + return &pvclock_vdso_info[cpu]; +} + +struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) +{ + return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; +} + #ifdef CONFIG_X86_64 +static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, + void *v) +{ + struct task_migration_notifier *mn = v; + struct pvclock_vsyscall_time_info *pvti; + + pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); + + /* this is NULL when pvclock vsyscall is not initialized */ + if (unlikely(pvti == NULL)) + return NOTIFY_DONE; + + pvti->migrate_count++; + + return NOTIFY_DONE; +} + +static struct notifier_block pvclock_migrate = { + .notifier_call = pvclock_task_migrate, +}; + /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); + pvclock_vdso_info = i; + for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } + + register_task_migration_notifier(&pvclock_migrate); + return 0; } #endif diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 4452eedfaedd..26228466f3f8 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -26,7 +26,7 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *best; if (!static_cpu_has(X86_FEATURE_XSAVE)) - return 0; + return false; best = kvm_find_cpuid_entry(vcpu, 1, 0); return best && (best->ecx & bit(X86_FEATURE_XSAVE)); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 106c01557f2b..b304728aabe3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -248,27 +248,7 @@ struct mode_dual { struct opcode mode64; }; -/* EFLAGS bit definitions. */ -#define EFLG_ID (1<<21) -#define EFLG_VIP (1<<20) -#define EFLG_VIF (1<<19) -#define EFLG_AC (1<<18) -#define EFLG_VM (1<<17) -#define EFLG_RF (1<<16) -#define EFLG_IOPL (3<<12) -#define EFLG_NT (1<<14) -#define EFLG_OF (1<<11) -#define EFLG_DF (1<<10) -#define EFLG_IF (1<<9) -#define EFLG_TF (1<<8) -#define EFLG_SF (1<<7) -#define EFLG_ZF (1<<6) -#define EFLG_AF (1<<4) -#define EFLG_PF (1<<2) -#define EFLG_CF (1<<0) - #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a -#define EFLG_RESERVED_ONE_MASK 2 enum x86_transfer_type { X86_TRANSFER_NONE, @@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * These EFLAGS bits are restored from saved value during emulation, and * any changes are written back to the saved value after emulation. */ -#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) +#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\ + X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 #define ON64(x) x @@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask) *dest = (*dest & ~mask) | (src & mask); } +static void assign_register(unsigned long *reg, u64 val, int bytes) +{ + /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ + switch (bytes) { + case 1: + *(u8 *)reg = (u8)val; + break; + case 2: + *(u16 *)reg = (u16)val; + break; + case 4: + *reg = (u32)val; + break; /* 64b: zero-extend */ + case 8: + *reg = val; + break; + } +} + static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) { return (1UL << (ctxt->ad_bytes << 3)) - 1; @@ -943,6 +943,22 @@ FASTOP2(xadd); FASTOP2R(cmp, cmp_r); +static int em_bsf_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsf); +} + +static int em_bsr_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsr); +} + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, unsigned int in_page, n; unsigned int count = ctxt->rep_prefix ? address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; - in_page = (ctxt->eflags & EFLG_DF) ? + in_page = (ctxt->eflags & X86_EFLAGS_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); @@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, } if (ctxt->rep_prefix && (ctxt->d & String) && - !(ctxt->eflags & EFLG_DF)) { + !(ctxt->eflags & X86_EFLAGS_DF)) { ctxt->dst.data = rc->data + rc->pos; ctxt->dst.type = OP_MEM_STR; ctxt->dst.count = (rc->end - rc->pos) / size; @@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, static void write_register_operand(struct operand *op) { - /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ - switch (op->bytes) { - case 1: - *(u8 *)op->addr.reg = (u8)op->val; - break; - case 2: - *(u16 *)op->addr.reg = (u16)op->val; - break; - case 4: - *op->addr.reg = (u32)op->val; - break; /* 64b: zero-extend */ - case 8: - *op->addr.reg = op->val; - break; - } + return assign_register(op->addr.reg, op->val, op->bytes); } static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) @@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, { int rc; unsigned long val, change_mask; - int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; - change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF - | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; + change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF | + X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT | + X86_EFLAGS_AC | X86_EFLAGS_ID; switch(ctxt->mode) { case X86EMUL_MODE_PROT64: case X86EMUL_MODE_PROT32: case X86EMUL_MODE_PROT16: if (cpl == 0) - change_mask |= EFLG_IOPL; + change_mask |= X86_EFLAGS_IOPL; if (cpl <= iopl) - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; case X86EMUL_MODE_VM86: if (iopl < 3) return emulate_gp(ctxt, 0); - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; default: /* real mode */ - change_mask |= (EFLG_IOPL | EFLG_IF); + change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF); break; } @@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) static int em_pushf(struct x86_emulate_ctxt *ctxt) { - ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; + ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM; return em_push(ctxt); } @@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; + u32 val; while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { @@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); + rc = emulate_pop(ctxt, &val, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) break; + assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes); --reg; } return rc; @@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); + ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC); ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); @@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) unsigned long temp_eip = 0; unsigned long temp_eflags = 0; unsigned long cs = 0; - unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF | - EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF | - EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */ - unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP; + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF | + X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF | + X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF | + X86_EFLAGS_AC | X86_EFLAGS_ID | + X86_EFLAGS_FIXED_BIT; + unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF | + X86_EFLAGS_VIP; /* TODO: Add stack limit check */ @@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) ctxt->_eip = temp_eip; - if (ctxt->op_bytes == 4) ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); else if (ctxt->op_bytes == 2) { @@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) } ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED_BIT; ctxt->ops->set_nmi_mask(ctxt, false); return rc; @@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); - ctxt->eflags &= ~EFLG_ZF; + ctxt->eflags &= ~X86_EFLAGS_ZF; } else { ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | (u32) reg_read(ctxt, VCPU_REGS_RBX); - ctxt->eflags |= EFLG_ZF; + ctxt->eflags |= X86_EFLAGS_ZF; } return X86EMUL_CONTINUE; } @@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); - if (ctxt->eflags & EFLG_ZF) { + if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ ctxt->src.type = OP_NONE; ctxt->dst.val = ctxt->src.orig_val; @@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED_BIT; #endif } else { /* legacy mode */ ops->get_msr(ctxt, MSR_STAR, &msr_data); ctxt->_eip = (u32)msr_data; - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } return X86EMUL_CONTINUE; @@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); - cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); + cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; ss_sel = cs_sel + 8; if (efer & EFER_LMA) { cs.d = 0; @@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); break; } - cs_sel |= SELECTOR_RPL_MASK; - ss_sel |= SELECTOR_RPL_MASK; + cs_sel |= SEGMENT_RPL_MASK; + ss_sel |= SEGMENT_RPL_MASK; ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); @@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) return false; if (ctxt->mode == X86EMUL_MODE_VM86) return true; - iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; return ctxt->ops->cpl(ctxt) > iopl; } @@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, return ret; ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, X86_TRANSFER_TASK_SWITCH, NULL); - if (ret != X86EMUL_CONTINUE) - return ret; - return X86EMUL_CONTINUE; + return ret; } static int task_switch_32(struct x86_emulate_ctxt *ctxt, @@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, struct operand *op) { - int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; + int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count; register_address_increment(ctxt, reg, df * op->bytes); op->addr.mem.ea = register_address(ctxt, reg); @@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_vmcall(struct x86_emulate_ctxt *ctxt) +static int em_hypercall(struct x86_emulate_ctxt *ctxt) { int rc = ctxt->ops->fix_hypercall(ctxt); @@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) return em_lgdt_lidt(ctxt, true); } -static int em_vmmcall(struct x86_emulate_ctxt *ctxt) -{ - int rc; - - rc = ctxt->ops->fix_hypercall(ctxt); - - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - return rc; -} - static int em_lidt(struct x86_emulate_ctxt *ctxt) { return em_lgdt_lidt(ctxt, false); @@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt) { u32 flags; - flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; + flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | + X86_EFLAGS_SF; flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; ctxt->eflags &= ~0xffUL; @@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) static const struct opcode group7_rm0[] = { N, - I(SrcNone | Priv | EmulateOnUD, em_vmcall), + I(SrcNone | Priv | EmulateOnUD, em_hypercall), N, N, N, N, N, N, }; @@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = { static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), - II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), + II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | Prot | Priv, stgi, check_svme), @@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = { N, N, G(BitOp, group8), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), - F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), + I(DstReg | SrcMem | ModRM, em_bsf_c), + I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), @@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || (ctxt->b == 0xae) || (ctxt->b == 0xaf)) && (((ctxt->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) + ((ctxt->eflags & X86_EFLAGS_ZF) == 0)) || ((ctxt->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) + ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF)))) return true; return false; @@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* All REP prefixes have the same first termination condition */ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; goto done; } } @@ -4963,9 +4961,9 @@ special_insn: } if (ctxt->rep_prefix && (ctxt->d & String)) - ctxt->eflags |= EFLG_RF; + ctxt->eflags |= X86_EFLAGS_RF; else - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { if (ctxt->d & Fastop) { @@ -5014,7 +5012,7 @@ special_insn: rc = emulate_int(ctxt, ctxt->src.val); break; case 0xce: /* into */ - if (ctxt->eflags & EFLG_OF) + if (ctxt->eflags & X86_EFLAGS_OF) rc = emulate_int(ctxt, 4); break; case 0xe9: /* jmp rel */ @@ -5027,19 +5025,19 @@ special_insn: break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ - ctxt->eflags ^= EFLG_CF; + ctxt->eflags ^= X86_EFLAGS_CF; break; case 0xf8: /* clc */ - ctxt->eflags &= ~EFLG_CF; + ctxt->eflags &= ~X86_EFLAGS_CF; break; case 0xf9: /* stc */ - ctxt->eflags |= EFLG_CF; + ctxt->eflags |= X86_EFLAGS_CF; break; case 0xfc: /* cld */ - ctxt->eflags &= ~EFLG_DF; + ctxt->eflags &= ~X86_EFLAGS_DF; break; case 0xfd: /* std */ - ctxt->eflags |= EFLG_DF; + ctxt->eflags |= X86_EFLAGS_DF; break; default: goto cannot_emulate; @@ -5100,7 +5098,7 @@ writeback: } goto done; /* skip rip writeback */ } - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; } ctxt->eip = ctxt->_eip; @@ -5137,8 +5135,7 @@ twobyte_insn: case 0x40 ... 0x4f: /* cmov */ if (test_cc(ctxt->b, ctxt->eflags)) ctxt->dst.val = ctxt->src.val; - else if (ctxt->mode != X86EMUL_MODE_PROT64 || - ctxt->op_bytes != 4) + else if (ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8ff4eaa2bf19..fef922ff2635 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s, return -EOPNOTSUPP; if (len != 1) { + memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 8bf2e49708e3..51889ec847b0 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; + if (edge) + ioapic->irr_delivered &= ~mask; if ((edge && old_irr == ioapic->irr) || (!edge && entry.fields.remote_irr)) { ret = 0; @@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.shorthand = 0; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) - ioapic->irr &= ~(1 << irq); + ioapic->irr_delivered |= 1 << irq; if (irq == RTC_GSI && line_status) { /* @@ -471,13 +473,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, } } -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - return test_bit(vector, ioapic->handled_vectors); -} - void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; @@ -597,6 +592,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->ioregsel = 0; ioapic->irr = 0; + ioapic->irr_delivered = 0; ioapic->id = 0; memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); @@ -654,6 +650,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + state->irr &= ~ioapic->irr_delivered; spin_unlock(&ioapic->lock); return 0; } @@ -667,6 +664,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); ioapic->irr = 0; + ioapic->irr_delivered = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); kvm_ioapic_inject_all(ioapic, state->irr); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index d9e02cab7b96..ca0b0b4e6256 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -77,6 +77,7 @@ struct kvm_ioapic { struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; + u32 irr_delivered; }; #ifdef DEBUG @@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ba57bb79e795..44f7b9afbedb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1572,7 +1572,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1782,7 +1782,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; if (kvm_x86_ops->hwapic_irr_update) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 8e6b7d869d2f..29fbf9dfdc54 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping { }; /* mapping between fixed pmc index and arch_events array */ -int fixed_pmc_events[] = {1, 0, 7}; +static int fixed_pmc_events[] = {1, 0, 7}; static bool pmc_is_gp(struct kvm_pmc *pmc) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c24758..155534c0f5e8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; - skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); kvm_emulate_hypercall(&svm->vcpu); return 1; } @@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], - vcpu->arch.regs[VCPU_REGS_RAX]); + trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), + kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); + kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(&svm->vcpu); + return 1; +} + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(&svm->vcpu); @@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm) return 1; } -bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, + unsigned long val) { unsigned long cr0 = svm->vcpu.arch.cr0; bool ret = false; @@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; - cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; + if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) + cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; + else + cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr >= 16) { /* mov to cr */ @@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); u64 data; if (svm_get_msr(&svm->vcpu, ecx, &data)) { @@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, data); - svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; - svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); + kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; skip_emulated_instruction(&svm->vcpu); } @@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; - u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) - | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u64 data = kvm_read_edx_eax(&svm->vcpu); msr.data = data; msr.index = ecx; @@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, @@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, @@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); + WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -3649,11 +3656,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_hwapic_isr_update(struct kvm *kvm, int isr) -{ - return; -} - static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) { return; @@ -4403,7 +4405,6 @@ static struct kvm_x86_ops svm_x86_ops = { .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, - .hwapic_isr_update = svm_hwapic_isr_update, .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 317da9bde728..b5a6425d8d97 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -3262,8 +3263,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, * default value. */ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SELECTOR_RPL_MASK; - save->dpl = save->selector & SELECTOR_RPL_MASK; + save->selector &= ~SEGMENT_RPL_MASK; + save->dpl = save->selector & SEGMENT_RPL_MASK; save->s = 1; } vmx_set_segment(vcpu, save, seg); @@ -3836,7 +3837,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) unsigned int cs_rpl; vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SELECTOR_RPL_MASK; + cs_rpl = cs.selector & SEGMENT_RPL_MASK; if (cs.unusable) return false; @@ -3864,7 +3865,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) unsigned int ss_rpl; vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SELECTOR_RPL_MASK; + ss_rpl = ss.selector & SEGMENT_RPL_MASK; if (ss.unusable) return true; @@ -3886,7 +3887,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) unsigned int rpl; vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SELECTOR_RPL_MASK; + rpl = var.selector & SEGMENT_RPL_MASK; if (var.unusable) return true; @@ -3913,7 +3914,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu) if (tr.unusable) return false; - if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; @@ -3931,7 +3932,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) if (ldtr.unusable) return true; - if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) return false; @@ -3948,8 +3949,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - return ((cs.selector & SELECTOR_RPL_MASK) == - (ss.selector & SELECTOR_RPL_MASK)); + return ((cs.selector & SEGMENT_RPL_MASK) == + (ss.selector & SEGMENT_RPL_MASK)); } /* @@ -5000,7 +5001,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); } return 1; } @@ -5065,6 +5066,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_invalid_opcode(intr_info)) { + if (is_guest_mode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); @@ -5527,13 +5532,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) static int handle_halt(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } @@ -5564,7 +5567,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_wbinvd(vcpu); return 1; } @@ -5903,7 +5905,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_emulate_halt(vcpu); + ret = kvm_vcpu_halt(vcpu); goto out; } @@ -7312,21 +7314,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, else if (port < 0x10000) bitmap = vmcs12->io_bitmap_b; else - return 1; + return true; bitmap += (port & 0x7fff) / 8; if (last_bitmap != bitmap) if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) - return 1; + return true; if (b & (1 << (port & 7))) - return 1; + return true; port++; size--; last_bitmap = bitmap; } - return 0; + return false; } /* @@ -7342,7 +7344,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) - return 1; + return true; /* * The MSR_BITMAP page is divided into four 1024-byte bitmaps, @@ -7361,10 +7363,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, if (msr_index < 1024*8) { unsigned char b; if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) - return 1; + return true; return 1 & (b >> (msr_index & 7)); } else - return 1; /* let L1 handle the wrong parameter */ + return true; /* let L1 handle the wrong parameter */ } /* @@ -7386,7 +7388,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, case 0: if (vmcs12->cr0_guest_host_mask & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; break; case 3: if ((vmcs12->cr3_target_count >= 1 && @@ -7397,37 +7399,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, vmcs12->cr3_target_value2 == val) || (vmcs12->cr3_target_count >= 4 && vmcs12->cr3_target_value3 == val)) - return 0; + return false; if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) - return 1; + return true; break; case 4: if (vmcs12->cr4_guest_host_mask & (vmcs12->cr4_read_shadow ^ val)) - return 1; + return true; break; case 8: if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) - return 1; + return true; break; } break; case 2: /* clts */ if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && (vmcs12->cr0_read_shadow & X86_CR0_TS)) - return 1; + return true; break; case 1: /* mov from cr */ switch (cr) { case 3: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR3_STORE_EXITING) - return 1; + return true; break; case 8: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR8_STORE_EXITING) - return 1; + return true; break; } break; @@ -7438,14 +7440,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; if ((vmcs12->cr0_guest_host_mask & 0x1) && !(vmcs12->cr0_read_shadow & 0x1) && (val & 0x1)) - return 1; + return true; break; } - return 0; + return false; } /* @@ -7468,48 +7470,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) KVM_ISA_VMX); if (vmx->nested.nested_run_pending) - return 0; + return false; if (unlikely(vmx->fail)) { pr_info_ratelimited("%s failed vm entry %x\n", __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); - return 1; + return true; } switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (!is_exception(intr_info)) - return 0; + return false; else if (is_page_fault(intr_info)) return enable_ept; else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) - return 0; + return false; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: - return 0; + return false; case EXIT_REASON_TRIPLE_FAULT: - return 1; + return true; case EXIT_REASON_PENDING_INTERRUPT: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: - return 1; + return true; case EXIT_REASON_CPUID: if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) - return 0; - return 1; + return false; + return true; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); case EXIT_REASON_INVD: - return 1; + return true; case EXIT_REASON_INVLPG: return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -7521,7 +7523,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! */ - return 1; + return true; case EXIT_REASON_CR_ACCESS: return nested_vmx_exit_handled_cr(vcpu, vmcs12); case EXIT_REASON_DR_ACCESS: @@ -7532,7 +7534,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); case EXIT_REASON_INVALID_STATE: - return 1; + return true; case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_INSTRUCTION: @@ -7542,7 +7544,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) nested_cpu_has2(vmcs12, SECONDARY_EXEC_PAUSE_LOOP_EXITING); case EXIT_REASON_MCE_DURING_VMENTRY: - return 0; + return false; case EXIT_REASON_TPR_BELOW_THRESHOLD: return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: @@ -7551,7 +7553,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_APIC_WRITE: case EXIT_REASON_EOI_INDUCED: /* apic_write and eoi_induced should exit unconditionally. */ - return 1; + return true; case EXIT_REASON_EPT_VIOLATION: /* * L0 always deals with the EPT violation. If nested EPT is @@ -7559,7 +7561,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * missing in the guest EPT table (EPT12), the EPT violation * will be injected with nested_ept_inject_page_fault() */ - return 0; + return false; case EXIT_REASON_EPT_MISCONFIG: /* * L2 never uses directly L1's EPT, but rather L0's own EPT @@ -7567,11 +7569,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * (EPT on EPT). So any problems with the structure of the * table is L0's fault. */ - return 0; + return false; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: - return 1; + return true; case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: /* * This should never happen, since it is not possible to @@ -7581,7 +7583,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); default: - return 1; + return true; } } @@ -8516,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (nested && !vmx->rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ @@ -9145,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control &= ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; @@ -9518,7 +9524,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12->launch_state = 1; if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5573d633144c..a284c927551e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4477,7 +4477,8 @@ mmio: return X86EMUL_CONTINUE; } -int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, +static int emulator_read_write(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, const struct read_write_emulator_ops *ops) @@ -4540,7 +4541,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, exception, &read_emultor); } -int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, +static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, const void *val, unsigned int bytes, @@ -4707,7 +4708,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } -int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) { if (!need_emulate_wbinvd(vcpu)) return X86EMUL_CONTINUE; @@ -4724,19 +4725,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) wbinvd(); return X86EMUL_CONTINUE; } + +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_emulate_wbinvd_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); + + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); + kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) +static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long *dest) { return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) +static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long value) { return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); @@ -5818,7 +5829,7 @@ void kvm_arch_exit(void) free_percpu(shared_msrs); } -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; if (irqchip_in_kernel(vcpu->kvm)) { @@ -5829,6 +5840,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) return 0; } } +EXPORT_SYMBOL_GPL(kvm_vcpu_halt); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_vcpu_halt(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_halt); int kvm_hv_hypercall(struct kvm_vcpu *vcpu) @@ -5905,7 +5923,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.dest_id = apicid; lapic_irq.delivery_mode = APIC_DM_REMRD; - kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -5913,6 +5931,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit, r = 1; + kvm_x86_ops->skip_emulated_instruction(vcpu); + if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -7430,7 +7450,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { - kvm_kvfree(free->arch.rmap[i]); + kvfree(free->arch.rmap[i]); free->arch.rmap[i] = NULL; } if (i == 0) @@ -7438,7 +7458,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, if (!dont || free->arch.lpage_info[i - 1] != dont->arch.lpage_info[i - 1]) { - kvm_kvfree(free->arch.lpage_info[i - 1]); + kvfree(free->arch.lpage_info[i - 1]); free->arch.lpage_info[i - 1] = NULL; } } @@ -7492,12 +7512,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, out_free: for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - kvm_kvfree(slot->arch.rmap[i]); + kvfree(slot->arch.rmap[i]); slot->arch.rmap[i] = NULL; if (i == 0) continue; - kvm_kvfree(slot->arch.lpage_info[i - 1]); + kvfree(slot->arch.lpage_info[i - 1]); slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 4a0890f815c4..08f41caada45 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST_GUEST bool "Lguest guest support" - depends on X86_32 && PARAVIRT + depends on X86_32 && PARAVIRT && PCI select TTY select VIRTUALIZATION select VIRTIO @@ -8,7 +8,7 @@ config LGUEST_GUEST help Lguest is a tiny in-kernel hypervisor. Selecting this will allow your kernel to boot under lguest. This option will increase - your kernel size by about 6k. If in doubt, say N. + your kernel size by about 10k. If in doubt, say N. If you say Y here, make sure you say Y (or M) to the virtio block and net drivers which lguest needs. diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 6ac273832f28..e4695985f9de 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -331,7 +331,7 @@ static void probe_pci_root_info(struct pci_root_info *info, struct list_head *list) { int ret; - struct resource_entry *entry; + struct resource_entry *entry, *tmp; sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); info->bridge = device; @@ -345,8 +345,13 @@ static void probe_pci_root_info(struct pci_root_info *info, dev_dbg(&device->dev, "no IO and memory resources present in _CRS\n"); else - resource_list_for_each_entry(entry, list) - entry->res->name = info->name; + resource_list_for_each_entry_safe(entry, tmp, list) { + if ((entry->res->flags & IORESOURCE_WINDOW) == 0 || + (entry->res->flags & IORESOURCE_DISABLED)) + resource_list_destroy_entry(entry); + else + entry->res->name = info->name; + } } struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 1bbedc4b0f88..3005f0c89f2e 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -130,7 +130,7 @@ static void intel_mid_arch_setup(void) intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); else { intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); - pr_info("ARCH: Uknown SoC, assuming PENWELL!\n"); + pr_info("ARCH: Unknown SoC, assuming PENWELL!\n"); } out: diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322751e0..30933760ee5f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; + u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * Note: hypervisor must guarantee that: - * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. - * 2. that per-CPU pvclock time info is updated if the - * underlying CPU changes. - * 3. that version is increased whenever underlying CPU - * changes. - * + * When looping to get a consistent (time-info, tsc) pair, we + * also need to deal with the possibility we can switch vcpus, + * so make sure we always re-fetch time-info for the current vcpu. */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode) pvti = get_pvti(cpu); + migrate_count = pvti->migrate_count; + version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* @@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode) cpu1 = __getcpu() & VGETCPU_CPU_MASK; } while (unlikely(cpu != cpu1 || (pvti->pvti.version & 1) || - pvti->pvti.version != version)); + pvti->pvti.version != version || + pvti->migrate_count != migrate_count)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bd8b8459c3d0..5240f563076d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1070,6 +1070,23 @@ static inline void xen_write_cr8(unsigned long val) BUG_ON(val); } #endif + +static u64 xen_read_msr_safe(unsigned int msr, int *err) +{ + u64 val; + + val = native_read_msr_safe(msr, err); + switch (msr) { + case MSR_IA32_APICBASE: +#ifdef CONFIG_X86_X2APIC + if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31)))) +#endif + val &= ~X2APIC_ENABLE; + break; + } + return val; +} + static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -1240,7 +1257,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, + .read_msr = xen_read_msr_safe, .write_msr = xen_write_msr_safe, .read_tsc = native_read_tsc, @@ -1741,6 +1758,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #ifdef CONFIG_X86_32 i386_start_kernel(); #else + cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ x86_64_start_reservations((char *)__pa_symbol(&boot_params)); #endif } |